kullm-mistral-S / trainer_state.json
heavytail's picture
Upload folder using huggingface_hub
08b01a1 verified
raw
history blame contribute delete
No virus
136 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.913265306122449,
"eval_steps": 800,
"global_step": 4500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0,
"loss": 2.7231,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 0,
"loss": 2.8688,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 3.0604,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 3.3619,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 3.1263,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 3.1305,
"step": 24
},
{
"epoch": 0.01,
"learning_rate": 1.5188198451414044e-06,
"loss": 1.2543,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 2.0735116692035353e-06,
"loss": 1.0489,
"step": 32
},
{
"epoch": 0.02,
"learning_rate": 2.420532010460384e-06,
"loss": 0.8238,
"step": 36
},
{
"epoch": 0.02,
"learning_rate": 2.6736916157045096e-06,
"loss": 0.6357,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 2.873103126046782e-06,
"loss": 0.9375,
"step": 44
},
{
"epoch": 0.02,
"learning_rate": 3.0376396902828088e-06,
"loss": 1.0042,
"step": 48
},
{
"epoch": 0.02,
"learning_rate": 3.177703030066258e-06,
"loss": 0.7668,
"step": 52
},
{
"epoch": 0.02,
"learning_rate": 3.299639602133279e-06,
"loss": 0.9168,
"step": 56
},
{
"epoch": 0.03,
"learning_rate": 3.407608173344298e-06,
"loss": 0.8997,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 3.5044824703694353e-06,
"loss": 1.0025,
"step": 64
},
{
"epoch": 0.03,
"learning_rate": 3.5923315143449394e-06,
"loss": 0.6651,
"step": 68
},
{
"epoch": 0.03,
"learning_rate": 3.6726945828900284e-06,
"loss": 0.699,
"step": 72
},
{
"epoch": 0.03,
"learning_rate": 3.746747919084026e-06,
"loss": 0.886,
"step": 76
},
{
"epoch": 0.03,
"learning_rate": 3.8154106182475455e-06,
"loss": 0.8937,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 3.8794145195304064e-06,
"loss": 0.6582,
"step": 84
},
{
"epoch": 0.04,
"learning_rate": 3.9393518556017876e-06,
"loss": 0.7974,
"step": 88
},
{
"epoch": 0.04,
"learning_rate": 3.995708648306083e-06,
"loss": 0.9472,
"step": 92
},
{
"epoch": 0.04,
"learning_rate": 4.0488886794862905e-06,
"loss": 0.8867,
"step": 96
},
{
"epoch": 0.04,
"learning_rate": 4.099231058976525e-06,
"loss": 1.061,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 4.1470233384070705e-06,
"loss": 0.7292,
"step": 104
},
{
"epoch": 0.05,
"learning_rate": 4.192511460845915e-06,
"loss": 0.9262,
"step": 108
},
{
"epoch": 0.05,
"learning_rate": 4.235907420412398e-06,
"loss": 0.7092,
"step": 112
},
{
"epoch": 0.05,
"learning_rate": 4.27739523661862e-06,
"loss": 0.7174,
"step": 116
},
{
"epoch": 0.05,
"learning_rate": 4.317135669700268e-06,
"loss": 0.8711,
"step": 120
},
{
"epoch": 0.05,
"learning_rate": 4.355269982485126e-06,
"loss": 0.9286,
"step": 124
},
{
"epoch": 0.05,
"learning_rate": 4.391922971188186e-06,
"loss": 0.9947,
"step": 128
},
{
"epoch": 0.06,
"learning_rate": 4.427205429264097e-06,
"loss": 0.8179,
"step": 132
},
{
"epoch": 0.06,
"learning_rate": 4.461216167003915e-06,
"loss": 0.6159,
"step": 136
},
{
"epoch": 0.06,
"learning_rate": 4.494043679663919e-06,
"loss": 0.8981,
"step": 140
},
{
"epoch": 0.06,
"learning_rate": 4.525767535063022e-06,
"loss": 0.9206,
"step": 144
},
{
"epoch": 0.06,
"learning_rate": 4.556459535424214e-06,
"loss": 0.7768,
"step": 148
},
{
"epoch": 0.06,
"learning_rate": 4.5861846961499975e-06,
"loss": 0.8515,
"step": 152
},
{
"epoch": 0.07,
"learning_rate": 4.6150020750907925e-06,
"loss": 0.8131,
"step": 156
},
{
"epoch": 0.07,
"learning_rate": 4.642965478900328e-06,
"loss": 1.1253,
"step": 160
},
{
"epoch": 0.07,
"learning_rate": 4.670124067711698e-06,
"loss": 0.9236,
"step": 164
},
{
"epoch": 0.07,
"learning_rate": 4.689992082159791e-06,
"loss": 0.8775,
"step": 168
},
{
"epoch": 0.07,
"learning_rate": 4.715848379822425e-06,
"loss": 0.8848,
"step": 172
},
{
"epoch": 0.07,
"learning_rate": 4.74101509336297e-06,
"loss": 0.829,
"step": 176
},
{
"epoch": 0.08,
"learning_rate": 4.76552805154028e-06,
"loss": 0.9367,
"step": 180
},
{
"epoch": 0.08,
"learning_rate": 4.789420361336724e-06,
"loss": 0.6566,
"step": 184
},
{
"epoch": 0.08,
"learning_rate": 4.812722676847563e-06,
"loss": 0.8104,
"step": 188
},
{
"epoch": 0.08,
"learning_rate": 4.835463435763974e-06,
"loss": 0.74,
"step": 192
},
{
"epoch": 0.08,
"learning_rate": 4.857669068026358e-06,
"loss": 0.6171,
"step": 196
},
{
"epoch": 0.09,
"learning_rate": 4.879364180487766e-06,
"loss": 1.0545,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 4.900571720823068e-06,
"loss": 0.9191,
"step": 204
},
{
"epoch": 0.09,
"learning_rate": 4.921313123421507e-06,
"loss": 0.6995,
"step": 208
},
{
"epoch": 0.09,
"learning_rate": 4.941608439588058e-06,
"loss": 0.8479,
"step": 212
},
{
"epoch": 0.09,
"learning_rate": 4.9614764540361516e-06,
"loss": 0.8976,
"step": 216
},
{
"epoch": 0.09,
"learning_rate": 4.980934789368156e-06,
"loss": 0.851,
"step": 220
},
{
"epoch": 0.1,
"learning_rate": 5e-06,
"loss": 0.7368,
"step": 224
},
{
"epoch": 0.1,
"learning_rate": 4.997812135355893e-06,
"loss": 1.0125,
"step": 228
},
{
"epoch": 0.1,
"learning_rate": 4.994894982497083e-06,
"loss": 0.9607,
"step": 232
},
{
"epoch": 0.1,
"learning_rate": 4.991977829638274e-06,
"loss": 0.6321,
"step": 236
},
{
"epoch": 0.1,
"learning_rate": 4.989060676779464e-06,
"loss": 0.7108,
"step": 240
},
{
"epoch": 0.1,
"learning_rate": 4.986143523920654e-06,
"loss": 0.7881,
"step": 244
},
{
"epoch": 0.11,
"learning_rate": 4.983226371061844e-06,
"loss": 0.7147,
"step": 248
},
{
"epoch": 0.11,
"learning_rate": 4.980309218203034e-06,
"loss": 0.9225,
"step": 252
},
{
"epoch": 0.11,
"learning_rate": 4.977392065344224e-06,
"loss": 0.8849,
"step": 256
},
{
"epoch": 0.11,
"learning_rate": 4.974474912485414e-06,
"loss": 0.942,
"step": 260
},
{
"epoch": 0.11,
"learning_rate": 4.971557759626604e-06,
"loss": 1.1629,
"step": 264
},
{
"epoch": 0.11,
"learning_rate": 4.968640606767795e-06,
"loss": 0.8053,
"step": 268
},
{
"epoch": 0.12,
"learning_rate": 4.965723453908986e-06,
"loss": 0.6558,
"step": 272
},
{
"epoch": 0.12,
"learning_rate": 4.962806301050176e-06,
"loss": 0.9606,
"step": 276
},
{
"epoch": 0.12,
"learning_rate": 4.959889148191366e-06,
"loss": 0.9533,
"step": 280
},
{
"epoch": 0.12,
"learning_rate": 4.956971995332556e-06,
"loss": 0.8758,
"step": 284
},
{
"epoch": 0.12,
"learning_rate": 4.954054842473746e-06,
"loss": 0.5825,
"step": 288
},
{
"epoch": 0.12,
"learning_rate": 4.951137689614936e-06,
"loss": 0.8221,
"step": 292
},
{
"epoch": 0.13,
"learning_rate": 4.948220536756126e-06,
"loss": 0.8954,
"step": 296
},
{
"epoch": 0.13,
"learning_rate": 4.945303383897317e-06,
"loss": 0.9549,
"step": 300
},
{
"epoch": 0.13,
"learning_rate": 4.942386231038507e-06,
"loss": 0.8821,
"step": 304
},
{
"epoch": 0.13,
"learning_rate": 4.939469078179697e-06,
"loss": 0.9861,
"step": 308
},
{
"epoch": 0.13,
"learning_rate": 4.936551925320887e-06,
"loss": 0.9681,
"step": 312
},
{
"epoch": 0.13,
"learning_rate": 4.933634772462078e-06,
"loss": 0.647,
"step": 316
},
{
"epoch": 0.14,
"learning_rate": 4.930717619603268e-06,
"loss": 0.8151,
"step": 320
},
{
"epoch": 0.14,
"learning_rate": 4.9278004667444575e-06,
"loss": 1.0373,
"step": 324
},
{
"epoch": 0.14,
"learning_rate": 4.9248833138856475e-06,
"loss": 0.8858,
"step": 328
},
{
"epoch": 0.14,
"learning_rate": 4.921966161026838e-06,
"loss": 0.8392,
"step": 332
},
{
"epoch": 0.14,
"learning_rate": 4.919049008168029e-06,
"loss": 1.0194,
"step": 336
},
{
"epoch": 0.14,
"learning_rate": 4.916131855309218e-06,
"loss": 0.7719,
"step": 340
},
{
"epoch": 0.15,
"learning_rate": 4.913214702450409e-06,
"loss": 0.6672,
"step": 344
},
{
"epoch": 0.15,
"learning_rate": 4.910297549591599e-06,
"loss": 0.8939,
"step": 348
},
{
"epoch": 0.15,
"learning_rate": 4.90738039673279e-06,
"loss": 0.6781,
"step": 352
},
{
"epoch": 0.15,
"learning_rate": 4.90446324387398e-06,
"loss": 0.7391,
"step": 356
},
{
"epoch": 0.15,
"learning_rate": 4.9015460910151695e-06,
"loss": 0.9476,
"step": 360
},
{
"epoch": 0.15,
"learning_rate": 4.8986289381563595e-06,
"loss": 0.6729,
"step": 364
},
{
"epoch": 0.16,
"learning_rate": 4.89571178529755e-06,
"loss": 0.8046,
"step": 368
},
{
"epoch": 0.16,
"learning_rate": 4.89279463243874e-06,
"loss": 0.8521,
"step": 372
},
{
"epoch": 0.16,
"learning_rate": 4.88987747957993e-06,
"loss": 0.8688,
"step": 376
},
{
"epoch": 0.16,
"learning_rate": 4.886960326721121e-06,
"loss": 0.875,
"step": 380
},
{
"epoch": 0.16,
"learning_rate": 4.884043173862311e-06,
"loss": 0.6733,
"step": 384
},
{
"epoch": 0.16,
"learning_rate": 4.881126021003501e-06,
"loss": 0.7755,
"step": 388
},
{
"epoch": 0.17,
"learning_rate": 4.878208868144691e-06,
"loss": 0.7047,
"step": 392
},
{
"epoch": 0.17,
"learning_rate": 4.8752917152858815e-06,
"loss": 0.6979,
"step": 396
},
{
"epoch": 0.17,
"learning_rate": 4.8723745624270714e-06,
"loss": 0.8033,
"step": 400
},
{
"epoch": 0.17,
"learning_rate": 4.869457409568261e-06,
"loss": 0.8039,
"step": 404
},
{
"epoch": 0.17,
"learning_rate": 4.866540256709452e-06,
"loss": 0.9366,
"step": 408
},
{
"epoch": 0.18,
"learning_rate": 4.863623103850642e-06,
"loss": 0.609,
"step": 412
},
{
"epoch": 0.18,
"learning_rate": 4.860705950991833e-06,
"loss": 0.7258,
"step": 416
},
{
"epoch": 0.18,
"learning_rate": 4.857788798133022e-06,
"loss": 1.018,
"step": 420
},
{
"epoch": 0.18,
"learning_rate": 4.854871645274213e-06,
"loss": 0.7338,
"step": 424
},
{
"epoch": 0.18,
"learning_rate": 4.851954492415403e-06,
"loss": 0.6595,
"step": 428
},
{
"epoch": 0.18,
"learning_rate": 4.8490373395565935e-06,
"loss": 0.7456,
"step": 432
},
{
"epoch": 0.19,
"learning_rate": 4.8461201866977834e-06,
"loss": 0.637,
"step": 436
},
{
"epoch": 0.19,
"learning_rate": 4.843203033838973e-06,
"loss": 0.8026,
"step": 440
},
{
"epoch": 0.19,
"learning_rate": 4.840285880980164e-06,
"loss": 0.8439,
"step": 444
},
{
"epoch": 0.19,
"learning_rate": 4.837368728121354e-06,
"loss": 0.646,
"step": 448
},
{
"epoch": 0.19,
"learning_rate": 4.834451575262544e-06,
"loss": 0.8644,
"step": 452
},
{
"epoch": 0.19,
"learning_rate": 4.831534422403734e-06,
"loss": 0.8595,
"step": 456
},
{
"epoch": 0.2,
"learning_rate": 4.828617269544925e-06,
"loss": 1.023,
"step": 460
},
{
"epoch": 0.2,
"learning_rate": 4.825700116686115e-06,
"loss": 0.7745,
"step": 464
},
{
"epoch": 0.2,
"learning_rate": 4.822782963827305e-06,
"loss": 0.9293,
"step": 468
},
{
"epoch": 0.2,
"learning_rate": 4.8198658109684954e-06,
"loss": 0.7542,
"step": 472
},
{
"epoch": 0.2,
"learning_rate": 4.816948658109685e-06,
"loss": 1.0059,
"step": 476
},
{
"epoch": 0.2,
"learning_rate": 4.814031505250875e-06,
"loss": 0.8302,
"step": 480
},
{
"epoch": 0.21,
"learning_rate": 4.811114352392065e-06,
"loss": 0.809,
"step": 484
},
{
"epoch": 0.21,
"learning_rate": 4.808197199533256e-06,
"loss": 0.7628,
"step": 488
},
{
"epoch": 0.21,
"learning_rate": 4.805280046674446e-06,
"loss": 0.6753,
"step": 492
},
{
"epoch": 0.21,
"learning_rate": 4.802362893815637e-06,
"loss": 0.8719,
"step": 496
},
{
"epoch": 0.21,
"learning_rate": 4.799445740956827e-06,
"loss": 0.7059,
"step": 500
},
{
"epoch": 0.21,
"learning_rate": 4.796528588098017e-06,
"loss": 0.6902,
"step": 504
},
{
"epoch": 0.22,
"learning_rate": 4.793611435239207e-06,
"loss": 1.104,
"step": 508
},
{
"epoch": 0.22,
"learning_rate": 4.790694282380397e-06,
"loss": 0.837,
"step": 512
},
{
"epoch": 0.22,
"learning_rate": 4.787777129521587e-06,
"loss": 0.794,
"step": 516
},
{
"epoch": 0.22,
"learning_rate": 4.784859976662777e-06,
"loss": 0.7997,
"step": 520
},
{
"epoch": 0.22,
"learning_rate": 4.781942823803968e-06,
"loss": 0.6738,
"step": 524
},
{
"epoch": 0.22,
"learning_rate": 4.779025670945158e-06,
"loss": 0.963,
"step": 528
},
{
"epoch": 0.23,
"learning_rate": 4.776108518086348e-06,
"loss": 0.6323,
"step": 532
},
{
"epoch": 0.23,
"learning_rate": 4.773191365227539e-06,
"loss": 0.7243,
"step": 536
},
{
"epoch": 0.23,
"learning_rate": 4.770274212368729e-06,
"loss": 0.8873,
"step": 540
},
{
"epoch": 0.23,
"learning_rate": 4.7673570595099186e-06,
"loss": 0.7435,
"step": 544
},
{
"epoch": 0.23,
"learning_rate": 4.7644399066511085e-06,
"loss": 0.8006,
"step": 548
},
{
"epoch": 0.23,
"learning_rate": 4.761522753792299e-06,
"loss": 0.9557,
"step": 552
},
{
"epoch": 0.24,
"learning_rate": 4.758605600933489e-06,
"loss": 0.7131,
"step": 556
},
{
"epoch": 0.24,
"learning_rate": 4.755688448074679e-06,
"loss": 0.8007,
"step": 560
},
{
"epoch": 0.24,
"learning_rate": 4.752771295215869e-06,
"loss": 0.8634,
"step": 564
},
{
"epoch": 0.24,
"learning_rate": 4.74985414235706e-06,
"loss": 0.7848,
"step": 568
},
{
"epoch": 0.24,
"learning_rate": 4.746936989498251e-06,
"loss": 0.6159,
"step": 572
},
{
"epoch": 0.24,
"learning_rate": 4.744019836639441e-06,
"loss": 0.7327,
"step": 576
},
{
"epoch": 0.25,
"learning_rate": 4.7411026837806305e-06,
"loss": 0.733,
"step": 580
},
{
"epoch": 0.25,
"learning_rate": 4.7381855309218205e-06,
"loss": 0.8081,
"step": 584
},
{
"epoch": 0.25,
"learning_rate": 4.735268378063011e-06,
"loss": 0.9809,
"step": 588
},
{
"epoch": 0.25,
"learning_rate": 4.732351225204201e-06,
"loss": 0.7101,
"step": 592
},
{
"epoch": 0.25,
"learning_rate": 4.729434072345391e-06,
"loss": 0.6195,
"step": 596
},
{
"epoch": 0.26,
"learning_rate": 4.726516919486582e-06,
"loss": 0.6483,
"step": 600
},
{
"epoch": 0.26,
"learning_rate": 4.723599766627772e-06,
"loss": 0.819,
"step": 604
},
{
"epoch": 0.26,
"learning_rate": 4.720682613768962e-06,
"loss": 0.7032,
"step": 608
},
{
"epoch": 0.26,
"learning_rate": 4.717765460910152e-06,
"loss": 0.7933,
"step": 612
},
{
"epoch": 0.26,
"learning_rate": 4.7148483080513425e-06,
"loss": 0.9605,
"step": 616
},
{
"epoch": 0.26,
"learning_rate": 4.7119311551925325e-06,
"loss": 0.7783,
"step": 620
},
{
"epoch": 0.27,
"learning_rate": 4.709014002333722e-06,
"loss": 0.7616,
"step": 624
},
{
"epoch": 0.27,
"learning_rate": 4.706096849474912e-06,
"loss": 0.6611,
"step": 628
},
{
"epoch": 0.27,
"learning_rate": 4.703179696616103e-06,
"loss": 0.656,
"step": 632
},
{
"epoch": 0.27,
"learning_rate": 4.700262543757294e-06,
"loss": 0.731,
"step": 636
},
{
"epoch": 0.27,
"learning_rate": 4.697345390898483e-06,
"loss": 0.6204,
"step": 640
},
{
"epoch": 0.27,
"learning_rate": 4.694428238039674e-06,
"loss": 0.695,
"step": 644
},
{
"epoch": 0.28,
"learning_rate": 4.691511085180864e-06,
"loss": 0.9691,
"step": 648
},
{
"epoch": 0.28,
"learning_rate": 4.6885939323220545e-06,
"loss": 0.5099,
"step": 652
},
{
"epoch": 0.28,
"learning_rate": 4.6856767794632445e-06,
"loss": 0.6427,
"step": 656
},
{
"epoch": 0.28,
"learning_rate": 4.682759626604434e-06,
"loss": 0.9209,
"step": 660
},
{
"epoch": 0.28,
"learning_rate": 4.679842473745624e-06,
"loss": 0.891,
"step": 664
},
{
"epoch": 0.28,
"learning_rate": 4.676925320886815e-06,
"loss": 0.7068,
"step": 668
},
{
"epoch": 0.29,
"learning_rate": 4.674008168028005e-06,
"loss": 0.4388,
"step": 672
},
{
"epoch": 0.29,
"learning_rate": 4.671091015169195e-06,
"loss": 0.7769,
"step": 676
},
{
"epoch": 0.29,
"learning_rate": 4.668173862310386e-06,
"loss": 1.03,
"step": 680
},
{
"epoch": 0.29,
"learning_rate": 4.665256709451576e-06,
"loss": 0.9345,
"step": 684
},
{
"epoch": 0.29,
"learning_rate": 4.662339556592766e-06,
"loss": 0.8261,
"step": 688
},
{
"epoch": 0.29,
"learning_rate": 4.659422403733956e-06,
"loss": 0.5926,
"step": 692
},
{
"epoch": 0.3,
"learning_rate": 4.656505250875146e-06,
"loss": 0.618,
"step": 696
},
{
"epoch": 0.3,
"learning_rate": 4.653588098016336e-06,
"loss": 0.8133,
"step": 700
},
{
"epoch": 0.3,
"learning_rate": 4.650670945157526e-06,
"loss": 0.7822,
"step": 704
},
{
"epoch": 0.3,
"learning_rate": 4.647753792298717e-06,
"loss": 0.6367,
"step": 708
},
{
"epoch": 0.3,
"learning_rate": 4.644836639439907e-06,
"loss": 0.7457,
"step": 712
},
{
"epoch": 0.3,
"learning_rate": 4.641919486581098e-06,
"loss": 0.5508,
"step": 716
},
{
"epoch": 0.31,
"learning_rate": 4.639002333722287e-06,
"loss": 0.8247,
"step": 720
},
{
"epoch": 0.31,
"learning_rate": 4.636085180863478e-06,
"loss": 0.8024,
"step": 724
},
{
"epoch": 0.31,
"learning_rate": 4.633168028004668e-06,
"loss": 0.5489,
"step": 728
},
{
"epoch": 0.31,
"learning_rate": 4.630250875145858e-06,
"loss": 0.5789,
"step": 732
},
{
"epoch": 0.31,
"learning_rate": 4.627333722287048e-06,
"loss": 0.7541,
"step": 736
},
{
"epoch": 0.31,
"learning_rate": 4.624416569428238e-06,
"loss": 0.6148,
"step": 740
},
{
"epoch": 0.32,
"learning_rate": 4.621499416569429e-06,
"loss": 0.98,
"step": 744
},
{
"epoch": 0.32,
"learning_rate": 4.618582263710619e-06,
"loss": 0.635,
"step": 748
},
{
"epoch": 0.32,
"learning_rate": 4.615665110851809e-06,
"loss": 0.9664,
"step": 752
},
{
"epoch": 0.32,
"learning_rate": 4.612747957992999e-06,
"loss": 0.8736,
"step": 756
},
{
"epoch": 0.32,
"learning_rate": 4.60983080513419e-06,
"loss": 0.6281,
"step": 760
},
{
"epoch": 0.32,
"learning_rate": 4.60691365227538e-06,
"loss": 0.9843,
"step": 764
},
{
"epoch": 0.33,
"learning_rate": 4.6039964994165695e-06,
"loss": 0.7795,
"step": 768
},
{
"epoch": 0.33,
"learning_rate": 4.60107934655776e-06,
"loss": 0.8437,
"step": 772
},
{
"epoch": 0.33,
"learning_rate": 4.59816219369895e-06,
"loss": 0.6626,
"step": 776
},
{
"epoch": 0.33,
"learning_rate": 4.59524504084014e-06,
"loss": 0.8164,
"step": 780
},
{
"epoch": 0.33,
"learning_rate": 4.59232788798133e-06,
"loss": 0.7359,
"step": 784
},
{
"epoch": 0.34,
"learning_rate": 4.589410735122521e-06,
"loss": 0.7709,
"step": 788
},
{
"epoch": 0.34,
"learning_rate": 4.586493582263711e-06,
"loss": 0.8953,
"step": 792
},
{
"epoch": 0.34,
"learning_rate": 4.583576429404902e-06,
"loss": 0.57,
"step": 796
},
{
"epoch": 0.34,
"learning_rate": 4.5806592765460916e-06,
"loss": 0.9151,
"step": 800
},
{
"epoch": 0.34,
"learning_rate": 4.5777421236872815e-06,
"loss": 0.7838,
"step": 804
},
{
"epoch": 0.34,
"learning_rate": 4.574824970828472e-06,
"loss": 0.8183,
"step": 808
},
{
"epoch": 0.35,
"learning_rate": 4.571907817969662e-06,
"loss": 0.9169,
"step": 812
},
{
"epoch": 0.35,
"learning_rate": 4.568990665110852e-06,
"loss": 0.6786,
"step": 816
},
{
"epoch": 0.35,
"learning_rate": 4.566073512252042e-06,
"loss": 0.7783,
"step": 820
},
{
"epoch": 0.35,
"learning_rate": 4.563156359393233e-06,
"loss": 0.811,
"step": 824
},
{
"epoch": 0.35,
"learning_rate": 4.560239206534423e-06,
"loss": 0.8965,
"step": 828
},
{
"epoch": 0.35,
"learning_rate": 4.557322053675613e-06,
"loss": 0.7526,
"step": 832
},
{
"epoch": 0.36,
"learning_rate": 4.5544049008168036e-06,
"loss": 0.6549,
"step": 836
},
{
"epoch": 0.36,
"learning_rate": 4.5514877479579935e-06,
"loss": 0.77,
"step": 840
},
{
"epoch": 0.36,
"learning_rate": 4.5485705950991834e-06,
"loss": 0.5399,
"step": 844
},
{
"epoch": 0.36,
"learning_rate": 4.545653442240373e-06,
"loss": 0.7214,
"step": 848
},
{
"epoch": 0.36,
"learning_rate": 4.542736289381564e-06,
"loss": 0.7454,
"step": 852
},
{
"epoch": 0.36,
"learning_rate": 4.539819136522754e-06,
"loss": 0.6774,
"step": 856
},
{
"epoch": 0.37,
"learning_rate": 4.536901983663944e-06,
"loss": 0.855,
"step": 860
},
{
"epoch": 0.37,
"learning_rate": 4.533984830805134e-06,
"loss": 0.8038,
"step": 864
},
{
"epoch": 0.37,
"learning_rate": 4.531067677946325e-06,
"loss": 0.6897,
"step": 868
},
{
"epoch": 0.37,
"learning_rate": 4.5281505250875156e-06,
"loss": 0.774,
"step": 872
},
{
"epoch": 0.37,
"learning_rate": 4.525233372228705e-06,
"loss": 0.9166,
"step": 876
},
{
"epoch": 0.37,
"learning_rate": 4.5223162193698954e-06,
"loss": 0.5643,
"step": 880
},
{
"epoch": 0.38,
"learning_rate": 4.519399066511085e-06,
"loss": 0.5427,
"step": 884
},
{
"epoch": 0.38,
"learning_rate": 4.516481913652276e-06,
"loss": 0.6834,
"step": 888
},
{
"epoch": 0.38,
"learning_rate": 4.513564760793466e-06,
"loss": 0.8979,
"step": 892
},
{
"epoch": 0.38,
"learning_rate": 4.510647607934656e-06,
"loss": 0.8804,
"step": 896
},
{
"epoch": 0.38,
"learning_rate": 4.507730455075847e-06,
"loss": 0.6715,
"step": 900
},
{
"epoch": 0.38,
"learning_rate": 4.504813302217037e-06,
"loss": 0.8331,
"step": 904
},
{
"epoch": 0.39,
"learning_rate": 4.501896149358227e-06,
"loss": 0.9623,
"step": 908
},
{
"epoch": 0.39,
"learning_rate": 4.498978996499417e-06,
"loss": 0.7293,
"step": 912
},
{
"epoch": 0.39,
"learning_rate": 4.496061843640607e-06,
"loss": 0.6704,
"step": 916
},
{
"epoch": 0.39,
"learning_rate": 4.493144690781797e-06,
"loss": 0.6452,
"step": 920
},
{
"epoch": 0.39,
"learning_rate": 4.490227537922987e-06,
"loss": 0.7556,
"step": 924
},
{
"epoch": 0.39,
"learning_rate": 4.487310385064177e-06,
"loss": 0.6235,
"step": 928
},
{
"epoch": 0.4,
"learning_rate": 4.484393232205368e-06,
"loss": 0.789,
"step": 932
},
{
"epoch": 0.4,
"learning_rate": 4.481476079346558e-06,
"loss": 0.7959,
"step": 936
},
{
"epoch": 0.4,
"learning_rate": 4.478558926487748e-06,
"loss": 0.8286,
"step": 940
},
{
"epoch": 0.4,
"learning_rate": 4.475641773628939e-06,
"loss": 0.8604,
"step": 944
},
{
"epoch": 0.4,
"learning_rate": 4.472724620770129e-06,
"loss": 0.6135,
"step": 948
},
{
"epoch": 0.4,
"learning_rate": 4.469807467911319e-06,
"loss": 0.4793,
"step": 952
},
{
"epoch": 0.41,
"learning_rate": 4.4668903150525085e-06,
"loss": 0.7225,
"step": 956
},
{
"epoch": 0.41,
"learning_rate": 4.463973162193699e-06,
"loss": 0.5255,
"step": 960
},
{
"epoch": 0.41,
"learning_rate": 4.461056009334889e-06,
"loss": 0.4695,
"step": 964
},
{
"epoch": 0.41,
"learning_rate": 4.45813885647608e-06,
"loss": 0.8741,
"step": 968
},
{
"epoch": 0.41,
"learning_rate": 4.45522170361727e-06,
"loss": 0.6311,
"step": 972
},
{
"epoch": 0.41,
"learning_rate": 4.45230455075846e-06,
"loss": 0.6423,
"step": 976
},
{
"epoch": 0.42,
"learning_rate": 4.449387397899651e-06,
"loss": 0.7879,
"step": 980
},
{
"epoch": 0.42,
"learning_rate": 4.446470245040841e-06,
"loss": 0.9635,
"step": 984
},
{
"epoch": 0.42,
"learning_rate": 4.4435530921820306e-06,
"loss": 0.6497,
"step": 988
},
{
"epoch": 0.42,
"learning_rate": 4.4406359393232205e-06,
"loss": 0.673,
"step": 992
},
{
"epoch": 0.42,
"learning_rate": 4.437718786464411e-06,
"loss": 0.7035,
"step": 996
},
{
"epoch": 0.43,
"learning_rate": 4.434801633605601e-06,
"loss": 0.7664,
"step": 1000
},
{
"epoch": 0.43,
"learning_rate": 4.431884480746791e-06,
"loss": 0.6817,
"step": 1004
},
{
"epoch": 0.43,
"learning_rate": 4.428967327887982e-06,
"loss": 0.7443,
"step": 1008
},
{
"epoch": 0.43,
"learning_rate": 4.426050175029172e-06,
"loss": 0.7205,
"step": 1012
},
{
"epoch": 0.43,
"learning_rate": 4.423133022170362e-06,
"loss": 0.6672,
"step": 1016
},
{
"epoch": 0.43,
"learning_rate": 4.420215869311552e-06,
"loss": 0.7427,
"step": 1020
},
{
"epoch": 0.44,
"learning_rate": 4.4172987164527425e-06,
"loss": 0.659,
"step": 1024
},
{
"epoch": 0.44,
"learning_rate": 4.4143815635939325e-06,
"loss": 0.832,
"step": 1028
},
{
"epoch": 0.44,
"learning_rate": 4.411464410735123e-06,
"loss": 0.7823,
"step": 1032
},
{
"epoch": 0.44,
"learning_rate": 4.408547257876313e-06,
"loss": 0.712,
"step": 1036
},
{
"epoch": 0.44,
"learning_rate": 4.405630105017503e-06,
"loss": 0.8165,
"step": 1040
},
{
"epoch": 0.44,
"learning_rate": 4.402712952158694e-06,
"loss": 0.6221,
"step": 1044
},
{
"epoch": 0.45,
"learning_rate": 4.399795799299884e-06,
"loss": 0.684,
"step": 1048
},
{
"epoch": 0.45,
"learning_rate": 4.396878646441074e-06,
"loss": 0.6845,
"step": 1052
},
{
"epoch": 0.45,
"learning_rate": 4.393961493582264e-06,
"loss": 0.8462,
"step": 1056
},
{
"epoch": 0.45,
"learning_rate": 4.3910443407234545e-06,
"loss": 0.6242,
"step": 1060
},
{
"epoch": 0.45,
"learning_rate": 4.3881271878646445e-06,
"loss": 0.5025,
"step": 1064
},
{
"epoch": 0.45,
"learning_rate": 4.385210035005834e-06,
"loss": 0.6651,
"step": 1068
},
{
"epoch": 0.46,
"learning_rate": 4.382292882147025e-06,
"loss": 0.7653,
"step": 1072
},
{
"epoch": 0.46,
"learning_rate": 4.379375729288215e-06,
"loss": 0.9375,
"step": 1076
},
{
"epoch": 0.46,
"learning_rate": 4.376458576429405e-06,
"loss": 0.6936,
"step": 1080
},
{
"epoch": 0.46,
"learning_rate": 4.373541423570595e-06,
"loss": 0.6709,
"step": 1084
},
{
"epoch": 0.46,
"learning_rate": 4.370624270711786e-06,
"loss": 0.7321,
"step": 1088
},
{
"epoch": 0.46,
"learning_rate": 4.367707117852976e-06,
"loss": 1.0432,
"step": 1092
},
{
"epoch": 0.47,
"learning_rate": 4.364789964994166e-06,
"loss": 0.6687,
"step": 1096
},
{
"epoch": 0.47,
"learning_rate": 4.3618728121353565e-06,
"loss": 0.9193,
"step": 1100
},
{
"epoch": 0.47,
"learning_rate": 4.358955659276546e-06,
"loss": 0.5975,
"step": 1104
},
{
"epoch": 0.47,
"learning_rate": 4.356038506417737e-06,
"loss": 0.7527,
"step": 1108
},
{
"epoch": 0.47,
"learning_rate": 4.353121353558927e-06,
"loss": 0.9754,
"step": 1112
},
{
"epoch": 0.47,
"learning_rate": 4.350204200700117e-06,
"loss": 0.8299,
"step": 1116
},
{
"epoch": 0.48,
"learning_rate": 4.347287047841307e-06,
"loss": 0.8524,
"step": 1120
},
{
"epoch": 0.48,
"learning_rate": 4.344369894982498e-06,
"loss": 0.8139,
"step": 1124
},
{
"epoch": 0.48,
"learning_rate": 4.341452742123688e-06,
"loss": 0.6144,
"step": 1128
},
{
"epoch": 0.48,
"learning_rate": 4.338535589264878e-06,
"loss": 0.8328,
"step": 1132
},
{
"epoch": 0.48,
"learning_rate": 4.3356184364060684e-06,
"loss": 0.5855,
"step": 1136
},
{
"epoch": 0.48,
"learning_rate": 4.332701283547258e-06,
"loss": 0.8227,
"step": 1140
},
{
"epoch": 0.49,
"learning_rate": 4.329784130688448e-06,
"loss": 0.6442,
"step": 1144
},
{
"epoch": 0.49,
"learning_rate": 4.326866977829638e-06,
"loss": 0.5148,
"step": 1148
},
{
"epoch": 0.49,
"learning_rate": 4.323949824970829e-06,
"loss": 0.8127,
"step": 1152
},
{
"epoch": 0.49,
"learning_rate": 4.321032672112019e-06,
"loss": 0.8909,
"step": 1156
},
{
"epoch": 0.49,
"learning_rate": 4.318115519253209e-06,
"loss": 0.6379,
"step": 1160
},
{
"epoch": 0.49,
"learning_rate": 4.315198366394399e-06,
"loss": 0.5912,
"step": 1164
},
{
"epoch": 0.5,
"learning_rate": 4.31228121353559e-06,
"loss": 0.6885,
"step": 1168
},
{
"epoch": 0.5,
"learning_rate": 4.3093640606767804e-06,
"loss": 0.8584,
"step": 1172
},
{
"epoch": 0.5,
"learning_rate": 4.3064469078179695e-06,
"loss": 0.6441,
"step": 1176
},
{
"epoch": 0.5,
"learning_rate": 4.30352975495916e-06,
"loss": 0.6933,
"step": 1180
},
{
"epoch": 0.5,
"learning_rate": 4.30061260210035e-06,
"loss": 0.6647,
"step": 1184
},
{
"epoch": 0.51,
"learning_rate": 4.297695449241541e-06,
"loss": 0.7737,
"step": 1188
},
{
"epoch": 0.51,
"learning_rate": 4.294778296382731e-06,
"loss": 0.8209,
"step": 1192
},
{
"epoch": 0.51,
"learning_rate": 4.291861143523921e-06,
"loss": 0.9695,
"step": 1196
},
{
"epoch": 0.51,
"learning_rate": 4.288943990665112e-06,
"loss": 0.855,
"step": 1200
},
{
"epoch": 0.51,
"learning_rate": 4.286026837806302e-06,
"loss": 0.7051,
"step": 1204
},
{
"epoch": 0.51,
"learning_rate": 4.283109684947492e-06,
"loss": 0.4676,
"step": 1208
},
{
"epoch": 0.52,
"learning_rate": 4.2801925320886815e-06,
"loss": 0.6563,
"step": 1212
},
{
"epoch": 0.52,
"learning_rate": 4.277275379229872e-06,
"loss": 0.6791,
"step": 1216
},
{
"epoch": 0.52,
"learning_rate": 4.274358226371062e-06,
"loss": 0.6531,
"step": 1220
},
{
"epoch": 0.52,
"learning_rate": 4.271441073512252e-06,
"loss": 0.7638,
"step": 1224
},
{
"epoch": 0.52,
"learning_rate": 4.268523920653442e-06,
"loss": 0.8035,
"step": 1228
},
{
"epoch": 0.52,
"learning_rate": 4.265606767794633e-06,
"loss": 0.6947,
"step": 1232
},
{
"epoch": 0.53,
"learning_rate": 4.262689614935823e-06,
"loss": 0.4111,
"step": 1236
},
{
"epoch": 0.53,
"learning_rate": 4.259772462077013e-06,
"loss": 0.787,
"step": 1240
},
{
"epoch": 0.53,
"learning_rate": 4.2568553092182036e-06,
"loss": 0.7891,
"step": 1244
},
{
"epoch": 0.53,
"learning_rate": 4.2539381563593935e-06,
"loss": 0.6001,
"step": 1248
},
{
"epoch": 0.53,
"learning_rate": 4.251021003500584e-06,
"loss": 0.5617,
"step": 1252
},
{
"epoch": 0.53,
"learning_rate": 4.248103850641773e-06,
"loss": 0.9523,
"step": 1256
},
{
"epoch": 0.54,
"learning_rate": 4.245186697782964e-06,
"loss": 0.8455,
"step": 1260
},
{
"epoch": 0.54,
"learning_rate": 4.242269544924154e-06,
"loss": 0.7674,
"step": 1264
},
{
"epoch": 0.54,
"learning_rate": 4.239352392065345e-06,
"loss": 0.5396,
"step": 1268
},
{
"epoch": 0.54,
"learning_rate": 4.236435239206535e-06,
"loss": 0.5736,
"step": 1272
},
{
"epoch": 0.54,
"learning_rate": 4.233518086347725e-06,
"loss": 0.5877,
"step": 1276
},
{
"epoch": 0.54,
"learning_rate": 4.2306009334889156e-06,
"loss": 0.6466,
"step": 1280
},
{
"epoch": 0.55,
"learning_rate": 4.2276837806301055e-06,
"loss": 0.4551,
"step": 1284
},
{
"epoch": 0.55,
"learning_rate": 4.2247666277712954e-06,
"loss": 0.5959,
"step": 1288
},
{
"epoch": 0.55,
"learning_rate": 4.221849474912485e-06,
"loss": 0.6903,
"step": 1292
},
{
"epoch": 0.55,
"learning_rate": 4.218932322053676e-06,
"loss": 0.7372,
"step": 1296
},
{
"epoch": 0.55,
"learning_rate": 4.216015169194866e-06,
"loss": 0.7741,
"step": 1300
},
{
"epoch": 0.55,
"learning_rate": 4.213098016336056e-06,
"loss": 0.6861,
"step": 1304
},
{
"epoch": 0.56,
"learning_rate": 4.210180863477247e-06,
"loss": 0.7325,
"step": 1308
},
{
"epoch": 0.56,
"learning_rate": 4.207263710618437e-06,
"loss": 0.6446,
"step": 1312
},
{
"epoch": 0.56,
"learning_rate": 4.204346557759627e-06,
"loss": 0.61,
"step": 1316
},
{
"epoch": 0.56,
"learning_rate": 4.201429404900817e-06,
"loss": 0.6286,
"step": 1320
},
{
"epoch": 0.56,
"learning_rate": 4.1985122520420074e-06,
"loss": 0.7191,
"step": 1324
},
{
"epoch": 0.56,
"learning_rate": 4.195595099183197e-06,
"loss": 0.8621,
"step": 1328
},
{
"epoch": 0.57,
"learning_rate": 4.192677946324388e-06,
"loss": 0.7634,
"step": 1332
},
{
"epoch": 0.57,
"learning_rate": 4.189760793465578e-06,
"loss": 0.7067,
"step": 1336
},
{
"epoch": 0.57,
"learning_rate": 4.186843640606768e-06,
"loss": 0.6049,
"step": 1340
},
{
"epoch": 0.57,
"learning_rate": 4.183926487747959e-06,
"loss": 0.3731,
"step": 1344
},
{
"epoch": 0.57,
"learning_rate": 4.181009334889149e-06,
"loss": 0.5955,
"step": 1348
},
{
"epoch": 0.57,
"learning_rate": 4.178092182030339e-06,
"loss": 0.5576,
"step": 1352
},
{
"epoch": 0.58,
"learning_rate": 4.175175029171529e-06,
"loss": 0.6731,
"step": 1356
},
{
"epoch": 0.58,
"learning_rate": 4.172257876312719e-06,
"loss": 0.8847,
"step": 1360
},
{
"epoch": 0.58,
"learning_rate": 4.169340723453909e-06,
"loss": 0.6158,
"step": 1364
},
{
"epoch": 0.58,
"learning_rate": 4.166423570595099e-06,
"loss": 0.7721,
"step": 1368
},
{
"epoch": 0.58,
"learning_rate": 4.16350641773629e-06,
"loss": 0.6517,
"step": 1372
},
{
"epoch": 0.59,
"learning_rate": 4.16058926487748e-06,
"loss": 0.6014,
"step": 1376
},
{
"epoch": 0.59,
"learning_rate": 4.15767211201867e-06,
"loss": 0.8049,
"step": 1380
},
{
"epoch": 0.59,
"learning_rate": 4.15475495915986e-06,
"loss": 0.7245,
"step": 1384
},
{
"epoch": 0.59,
"learning_rate": 4.151837806301051e-06,
"loss": 0.8411,
"step": 1388
},
{
"epoch": 0.59,
"learning_rate": 4.148920653442241e-06,
"loss": 0.6463,
"step": 1392
},
{
"epoch": 0.59,
"learning_rate": 4.1460035005834306e-06,
"loss": 0.5053,
"step": 1396
},
{
"epoch": 0.6,
"learning_rate": 4.143086347724621e-06,
"loss": 0.4919,
"step": 1400
},
{
"epoch": 0.6,
"learning_rate": 4.140169194865811e-06,
"loss": 0.7352,
"step": 1404
},
{
"epoch": 0.6,
"learning_rate": 4.137252042007002e-06,
"loss": 0.5577,
"step": 1408
},
{
"epoch": 0.6,
"learning_rate": 4.134334889148192e-06,
"loss": 0.7224,
"step": 1412
},
{
"epoch": 0.6,
"learning_rate": 4.131417736289382e-06,
"loss": 0.9195,
"step": 1416
},
{
"epoch": 0.6,
"learning_rate": 4.128500583430572e-06,
"loss": 0.7652,
"step": 1420
},
{
"epoch": 0.61,
"learning_rate": 4.125583430571763e-06,
"loss": 0.756,
"step": 1424
},
{
"epoch": 0.61,
"learning_rate": 4.122666277712953e-06,
"loss": 0.7378,
"step": 1428
},
{
"epoch": 0.61,
"learning_rate": 4.1197491248541425e-06,
"loss": 0.7264,
"step": 1432
},
{
"epoch": 0.61,
"learning_rate": 4.116831971995333e-06,
"loss": 0.7489,
"step": 1436
},
{
"epoch": 0.61,
"learning_rate": 4.113914819136523e-06,
"loss": 0.661,
"step": 1440
},
{
"epoch": 0.61,
"learning_rate": 4.110997666277713e-06,
"loss": 0.6761,
"step": 1444
},
{
"epoch": 0.62,
"learning_rate": 4.108080513418903e-06,
"loss": 0.62,
"step": 1448
},
{
"epoch": 0.62,
"learning_rate": 4.105163360560094e-06,
"loss": 0.5477,
"step": 1452
},
{
"epoch": 0.62,
"learning_rate": 4.102246207701284e-06,
"loss": 0.7107,
"step": 1456
},
{
"epoch": 0.62,
"learning_rate": 4.099329054842474e-06,
"loss": 0.9508,
"step": 1460
},
{
"epoch": 0.62,
"learning_rate": 4.096411901983664e-06,
"loss": 0.7668,
"step": 1464
},
{
"epoch": 0.62,
"learning_rate": 4.0934947491248545e-06,
"loss": 0.7971,
"step": 1468
},
{
"epoch": 0.63,
"learning_rate": 4.0905775962660445e-06,
"loss": 0.7532,
"step": 1472
},
{
"epoch": 0.63,
"learning_rate": 4.087660443407234e-06,
"loss": 0.6721,
"step": 1476
},
{
"epoch": 0.63,
"learning_rate": 4.084743290548425e-06,
"loss": 0.702,
"step": 1480
},
{
"epoch": 0.63,
"learning_rate": 4.081826137689615e-06,
"loss": 0.5931,
"step": 1484
},
{
"epoch": 0.63,
"learning_rate": 4.078908984830806e-06,
"loss": 0.8941,
"step": 1488
},
{
"epoch": 0.63,
"learning_rate": 4.075991831971996e-06,
"loss": 0.4728,
"step": 1492
},
{
"epoch": 0.64,
"learning_rate": 4.073074679113186e-06,
"loss": 0.5366,
"step": 1496
},
{
"epoch": 0.64,
"learning_rate": 4.070157526254377e-06,
"loss": 0.6567,
"step": 1500
},
{
"epoch": 0.64,
"learning_rate": 4.0672403733955665e-06,
"loss": 0.6594,
"step": 1504
},
{
"epoch": 0.64,
"learning_rate": 4.0643232205367565e-06,
"loss": 0.5096,
"step": 1508
},
{
"epoch": 0.64,
"learning_rate": 4.061406067677946e-06,
"loss": 0.6852,
"step": 1512
},
{
"epoch": 0.64,
"learning_rate": 4.058488914819137e-06,
"loss": 0.7667,
"step": 1516
},
{
"epoch": 0.65,
"learning_rate": 4.055571761960327e-06,
"loss": 0.662,
"step": 1520
},
{
"epoch": 0.65,
"learning_rate": 4.052654609101517e-06,
"loss": 0.6275,
"step": 1524
},
{
"epoch": 0.65,
"learning_rate": 4.049737456242707e-06,
"loss": 0.715,
"step": 1528
},
{
"epoch": 0.65,
"learning_rate": 4.046820303383898e-06,
"loss": 0.4791,
"step": 1532
},
{
"epoch": 0.65,
"learning_rate": 4.043903150525088e-06,
"loss": 0.6893,
"step": 1536
},
{
"epoch": 0.65,
"learning_rate": 4.040985997666278e-06,
"loss": 0.4941,
"step": 1540
},
{
"epoch": 0.66,
"learning_rate": 4.0380688448074685e-06,
"loss": 0.5576,
"step": 1544
},
{
"epoch": 0.66,
"learning_rate": 4.035151691948658e-06,
"loss": 0.7089,
"step": 1548
},
{
"epoch": 0.66,
"learning_rate": 4.032234539089848e-06,
"loss": 0.4944,
"step": 1552
},
{
"epoch": 0.66,
"learning_rate": 4.029317386231038e-06,
"loss": 0.4799,
"step": 1556
},
{
"epoch": 0.66,
"learning_rate": 4.026400233372229e-06,
"loss": 0.6483,
"step": 1560
},
{
"epoch": 0.66,
"learning_rate": 4.023483080513419e-06,
"loss": 0.7882,
"step": 1564
},
{
"epoch": 0.67,
"learning_rate": 4.02056592765461e-06,
"loss": 0.6979,
"step": 1568
},
{
"epoch": 0.67,
"learning_rate": 4.0176487747958e-06,
"loss": 0.6079,
"step": 1572
},
{
"epoch": 0.67,
"learning_rate": 4.01473162193699e-06,
"loss": 0.7046,
"step": 1576
},
{
"epoch": 0.67,
"learning_rate": 4.0118144690781804e-06,
"loss": 0.5994,
"step": 1580
},
{
"epoch": 0.67,
"learning_rate": 4.00889731621937e-06,
"loss": 0.628,
"step": 1584
},
{
"epoch": 0.68,
"learning_rate": 4.00598016336056e-06,
"loss": 0.4581,
"step": 1588
},
{
"epoch": 0.68,
"learning_rate": 4.00306301050175e-06,
"loss": 0.6283,
"step": 1592
},
{
"epoch": 0.68,
"learning_rate": 4.000145857642941e-06,
"loss": 0.6736,
"step": 1596
},
{
"epoch": 0.68,
"learning_rate": 3.997228704784131e-06,
"loss": 0.6118,
"step": 1600
},
{
"epoch": 0.68,
"learning_rate": 3.994311551925321e-06,
"loss": 0.8692,
"step": 1604
},
{
"epoch": 0.68,
"learning_rate": 3.991394399066512e-06,
"loss": 0.7431,
"step": 1608
},
{
"epoch": 0.69,
"learning_rate": 3.988477246207702e-06,
"loss": 0.6712,
"step": 1612
},
{
"epoch": 0.69,
"learning_rate": 3.985560093348892e-06,
"loss": 0.5141,
"step": 1616
},
{
"epoch": 0.69,
"learning_rate": 3.9826429404900815e-06,
"loss": 0.4037,
"step": 1620
},
{
"epoch": 0.69,
"learning_rate": 3.979725787631272e-06,
"loss": 0.5442,
"step": 1624
},
{
"epoch": 0.69,
"learning_rate": 3.976808634772462e-06,
"loss": 0.7256,
"step": 1628
},
{
"epoch": 0.69,
"learning_rate": 3.973891481913652e-06,
"loss": 0.7292,
"step": 1632
},
{
"epoch": 0.7,
"learning_rate": 3.970974329054843e-06,
"loss": 0.7739,
"step": 1636
},
{
"epoch": 0.7,
"learning_rate": 3.968057176196033e-06,
"loss": 0.6673,
"step": 1640
},
{
"epoch": 0.7,
"learning_rate": 3.965140023337224e-06,
"loss": 0.8099,
"step": 1644
},
{
"epoch": 0.7,
"learning_rate": 3.962222870478414e-06,
"loss": 0.5271,
"step": 1648
},
{
"epoch": 0.7,
"learning_rate": 3.9593057176196036e-06,
"loss": 0.7032,
"step": 1652
},
{
"epoch": 0.7,
"learning_rate": 3.9563885647607935e-06,
"loss": 0.6766,
"step": 1656
},
{
"epoch": 0.71,
"learning_rate": 3.953471411901984e-06,
"loss": 0.6286,
"step": 1660
},
{
"epoch": 0.71,
"learning_rate": 3.950554259043174e-06,
"loss": 0.6236,
"step": 1664
},
{
"epoch": 0.71,
"learning_rate": 3.947637106184364e-06,
"loss": 0.7438,
"step": 1668
},
{
"epoch": 0.71,
"learning_rate": 3.944719953325555e-06,
"loss": 0.6499,
"step": 1672
},
{
"epoch": 0.71,
"learning_rate": 3.941802800466745e-06,
"loss": 0.7669,
"step": 1676
},
{
"epoch": 0.71,
"learning_rate": 3.938885647607935e-06,
"loss": 0.4944,
"step": 1680
},
{
"epoch": 0.72,
"learning_rate": 3.935968494749125e-06,
"loss": 0.4765,
"step": 1684
},
{
"epoch": 0.72,
"learning_rate": 3.9330513418903156e-06,
"loss": 0.7785,
"step": 1688
},
{
"epoch": 0.72,
"learning_rate": 3.9301341890315055e-06,
"loss": 0.6521,
"step": 1692
},
{
"epoch": 0.72,
"learning_rate": 3.9272170361726954e-06,
"loss": 0.6744,
"step": 1696
},
{
"epoch": 0.72,
"learning_rate": 3.924299883313886e-06,
"loss": 0.6819,
"step": 1700
},
{
"epoch": 0.72,
"learning_rate": 3.921382730455076e-06,
"loss": 0.6566,
"step": 1704
},
{
"epoch": 0.73,
"learning_rate": 3.918465577596267e-06,
"loss": 0.7201,
"step": 1708
},
{
"epoch": 0.73,
"learning_rate": 3.915548424737456e-06,
"loss": 0.373,
"step": 1712
},
{
"epoch": 0.73,
"learning_rate": 3.912631271878647e-06,
"loss": 0.8481,
"step": 1716
},
{
"epoch": 0.73,
"learning_rate": 3.909714119019837e-06,
"loss": 0.6479,
"step": 1720
},
{
"epoch": 0.73,
"learning_rate": 3.9067969661610276e-06,
"loss": 0.5682,
"step": 1724
},
{
"epoch": 0.73,
"learning_rate": 3.9038798133022175e-06,
"loss": 0.651,
"step": 1728
},
{
"epoch": 0.74,
"learning_rate": 3.9009626604434074e-06,
"loss": 0.637,
"step": 1732
},
{
"epoch": 0.74,
"learning_rate": 3.898045507584598e-06,
"loss": 0.5119,
"step": 1736
},
{
"epoch": 0.74,
"learning_rate": 3.895128354725788e-06,
"loss": 0.5434,
"step": 1740
},
{
"epoch": 0.74,
"learning_rate": 3.892211201866978e-06,
"loss": 0.7295,
"step": 1744
},
{
"epoch": 0.74,
"learning_rate": 3.889294049008168e-06,
"loss": 0.7214,
"step": 1748
},
{
"epoch": 0.74,
"learning_rate": 3.886376896149359e-06,
"loss": 0.5946,
"step": 1752
},
{
"epoch": 0.75,
"learning_rate": 3.883459743290549e-06,
"loss": 0.6668,
"step": 1756
},
{
"epoch": 0.75,
"learning_rate": 3.880542590431739e-06,
"loss": 0.6054,
"step": 1760
},
{
"epoch": 0.75,
"learning_rate": 3.877625437572929e-06,
"loss": 0.6904,
"step": 1764
},
{
"epoch": 0.75,
"learning_rate": 3.874708284714119e-06,
"loss": 0.7803,
"step": 1768
},
{
"epoch": 0.75,
"learning_rate": 3.871791131855309e-06,
"loss": 0.6766,
"step": 1772
},
{
"epoch": 0.76,
"learning_rate": 3.868873978996499e-06,
"loss": 0.6957,
"step": 1776
},
{
"epoch": 0.76,
"learning_rate": 3.86595682613769e-06,
"loss": 0.8129,
"step": 1780
},
{
"epoch": 0.76,
"learning_rate": 3.86303967327888e-06,
"loss": 0.7652,
"step": 1784
},
{
"epoch": 0.76,
"learning_rate": 3.860122520420071e-06,
"loss": 0.4505,
"step": 1788
},
{
"epoch": 0.76,
"learning_rate": 3.85720536756126e-06,
"loss": 0.6318,
"step": 1792
},
{
"epoch": 0.76,
"learning_rate": 3.854288214702451e-06,
"loss": 0.7018,
"step": 1796
},
{
"epoch": 0.77,
"learning_rate": 3.8513710618436415e-06,
"loss": 0.8348,
"step": 1800
},
{
"epoch": 0.77,
"learning_rate": 3.848453908984831e-06,
"loss": 0.5523,
"step": 1804
},
{
"epoch": 0.77,
"learning_rate": 3.845536756126021e-06,
"loss": 0.5858,
"step": 1808
},
{
"epoch": 0.77,
"learning_rate": 3.842619603267211e-06,
"loss": 0.3905,
"step": 1812
},
{
"epoch": 0.77,
"learning_rate": 3.839702450408402e-06,
"loss": 0.7015,
"step": 1816
},
{
"epoch": 0.77,
"learning_rate": 3.836785297549592e-06,
"loss": 0.4228,
"step": 1820
},
{
"epoch": 0.78,
"learning_rate": 3.833868144690782e-06,
"loss": 0.7709,
"step": 1824
},
{
"epoch": 0.78,
"learning_rate": 3.830950991831972e-06,
"loss": 0.5833,
"step": 1828
},
{
"epoch": 0.78,
"learning_rate": 3.828033838973163e-06,
"loss": 0.6342,
"step": 1832
},
{
"epoch": 0.78,
"learning_rate": 3.825116686114353e-06,
"loss": 0.843,
"step": 1836
},
{
"epoch": 0.78,
"learning_rate": 3.8221995332555425e-06,
"loss": 0.5876,
"step": 1840
},
{
"epoch": 0.78,
"learning_rate": 3.819282380396733e-06,
"loss": 0.5432,
"step": 1844
},
{
"epoch": 0.79,
"learning_rate": 3.816365227537923e-06,
"loss": 0.7204,
"step": 1848
},
{
"epoch": 0.79,
"learning_rate": 3.8134480746791136e-06,
"loss": 0.4466,
"step": 1852
},
{
"epoch": 0.79,
"learning_rate": 3.8105309218203036e-06,
"loss": 0.623,
"step": 1856
},
{
"epoch": 0.79,
"learning_rate": 3.807613768961494e-06,
"loss": 0.3623,
"step": 1860
},
{
"epoch": 0.79,
"learning_rate": 3.804696616102684e-06,
"loss": 0.6491,
"step": 1864
},
{
"epoch": 0.79,
"learning_rate": 3.8017794632438742e-06,
"loss": 0.6469,
"step": 1868
},
{
"epoch": 0.8,
"learning_rate": 3.7988623103850646e-06,
"loss": 0.5412,
"step": 1872
},
{
"epoch": 0.8,
"learning_rate": 3.7959451575262545e-06,
"loss": 0.995,
"step": 1876
},
{
"epoch": 0.8,
"learning_rate": 3.793028004667445e-06,
"loss": 0.6152,
"step": 1880
},
{
"epoch": 0.8,
"learning_rate": 3.790110851808635e-06,
"loss": 0.7216,
"step": 1884
},
{
"epoch": 0.8,
"learning_rate": 3.787193698949825e-06,
"loss": 0.5137,
"step": 1888
},
{
"epoch": 0.8,
"learning_rate": 3.784276546091015e-06,
"loss": 0.6703,
"step": 1892
},
{
"epoch": 0.81,
"learning_rate": 3.7813593932322055e-06,
"loss": 0.5394,
"step": 1896
},
{
"epoch": 0.81,
"learning_rate": 3.7784422403733963e-06,
"loss": 0.6228,
"step": 1900
},
{
"epoch": 0.81,
"learning_rate": 3.7755250875145862e-06,
"loss": 0.6231,
"step": 1904
},
{
"epoch": 0.81,
"learning_rate": 3.7726079346557766e-06,
"loss": 0.6658,
"step": 1908
},
{
"epoch": 0.81,
"learning_rate": 3.7696907817969665e-06,
"loss": 0.5279,
"step": 1912
},
{
"epoch": 0.81,
"learning_rate": 3.766773628938157e-06,
"loss": 0.4709,
"step": 1916
},
{
"epoch": 0.82,
"learning_rate": 3.763856476079347e-06,
"loss": 0.6442,
"step": 1920
},
{
"epoch": 0.82,
"learning_rate": 3.760939323220537e-06,
"loss": 0.6787,
"step": 1924
},
{
"epoch": 0.82,
"learning_rate": 3.758022170361727e-06,
"loss": 0.6453,
"step": 1928
},
{
"epoch": 0.82,
"learning_rate": 3.7551050175029175e-06,
"loss": 0.7487,
"step": 1932
},
{
"epoch": 0.82,
"learning_rate": 3.752187864644108e-06,
"loss": 0.6078,
"step": 1936
},
{
"epoch": 0.82,
"learning_rate": 3.749270711785298e-06,
"loss": 0.5252,
"step": 1940
},
{
"epoch": 0.83,
"learning_rate": 3.746353558926488e-06,
"loss": 0.4936,
"step": 1944
},
{
"epoch": 0.83,
"learning_rate": 3.743436406067678e-06,
"loss": 0.4545,
"step": 1948
},
{
"epoch": 0.83,
"learning_rate": 3.7405192532088685e-06,
"loss": 0.7937,
"step": 1952
},
{
"epoch": 0.83,
"learning_rate": 3.7376021003500584e-06,
"loss": 0.8007,
"step": 1956
},
{
"epoch": 0.83,
"learning_rate": 3.7346849474912488e-06,
"loss": 0.4401,
"step": 1960
},
{
"epoch": 0.84,
"learning_rate": 3.731767794632439e-06,
"loss": 0.8051,
"step": 1964
},
{
"epoch": 0.84,
"learning_rate": 3.728850641773629e-06,
"loss": 0.7178,
"step": 1968
},
{
"epoch": 0.84,
"learning_rate": 3.72593348891482e-06,
"loss": 0.5673,
"step": 1972
},
{
"epoch": 0.84,
"learning_rate": 3.7230163360560094e-06,
"loss": 0.8238,
"step": 1976
},
{
"epoch": 0.84,
"learning_rate": 3.7200991831972e-06,
"loss": 0.646,
"step": 1980
},
{
"epoch": 0.84,
"learning_rate": 3.7171820303383897e-06,
"loss": 0.5166,
"step": 1984
},
{
"epoch": 0.85,
"learning_rate": 3.7142648774795804e-06,
"loss": 0.5725,
"step": 1988
},
{
"epoch": 0.85,
"learning_rate": 3.7113477246207704e-06,
"loss": 0.8298,
"step": 1992
},
{
"epoch": 0.85,
"learning_rate": 3.7084305717619607e-06,
"loss": 0.5858,
"step": 1996
},
{
"epoch": 0.85,
"learning_rate": 3.705513418903151e-06,
"loss": 0.5432,
"step": 2000
},
{
"epoch": 0.85,
"learning_rate": 3.702596266044341e-06,
"loss": 0.5454,
"step": 2004
},
{
"epoch": 0.85,
"learning_rate": 3.6996791131855314e-06,
"loss": 0.6,
"step": 2008
},
{
"epoch": 0.86,
"learning_rate": 3.6967619603267213e-06,
"loss": 0.5889,
"step": 2012
},
{
"epoch": 0.86,
"learning_rate": 3.6938448074679117e-06,
"loss": 0.7542,
"step": 2016
},
{
"epoch": 0.86,
"learning_rate": 3.6909276546091016e-06,
"loss": 0.7573,
"step": 2020
},
{
"epoch": 0.86,
"learning_rate": 3.688010501750292e-06,
"loss": 0.7429,
"step": 2024
},
{
"epoch": 0.86,
"learning_rate": 3.685093348891482e-06,
"loss": 0.6015,
"step": 2028
},
{
"epoch": 0.86,
"learning_rate": 3.6821761960326723e-06,
"loss": 0.6009,
"step": 2032
},
{
"epoch": 0.87,
"learning_rate": 3.6792590431738627e-06,
"loss": 0.4447,
"step": 2036
},
{
"epoch": 0.87,
"learning_rate": 3.6763418903150526e-06,
"loss": 0.667,
"step": 2040
},
{
"epoch": 0.87,
"learning_rate": 3.673424737456243e-06,
"loss": 0.594,
"step": 2044
},
{
"epoch": 0.87,
"learning_rate": 3.670507584597433e-06,
"loss": 0.6368,
"step": 2048
},
{
"epoch": 0.87,
"learning_rate": 3.6675904317386237e-06,
"loss": 0.5883,
"step": 2052
},
{
"epoch": 0.87,
"learning_rate": 3.6646732788798132e-06,
"loss": 0.7004,
"step": 2056
},
{
"epoch": 0.88,
"learning_rate": 3.661756126021004e-06,
"loss": 0.6169,
"step": 2060
},
{
"epoch": 0.88,
"learning_rate": 3.6588389731621944e-06,
"loss": 0.6757,
"step": 2064
},
{
"epoch": 0.88,
"learning_rate": 3.6559218203033843e-06,
"loss": 0.6134,
"step": 2068
},
{
"epoch": 0.88,
"learning_rate": 3.6530046674445747e-06,
"loss": 0.6381,
"step": 2072
},
{
"epoch": 0.88,
"learning_rate": 3.6500875145857646e-06,
"loss": 0.715,
"step": 2076
},
{
"epoch": 0.88,
"learning_rate": 3.647170361726955e-06,
"loss": 0.6319,
"step": 2080
},
{
"epoch": 0.89,
"learning_rate": 3.644253208868145e-06,
"loss": 0.4187,
"step": 2084
},
{
"epoch": 0.89,
"learning_rate": 3.6413360560093353e-06,
"loss": 0.5733,
"step": 2088
},
{
"epoch": 0.89,
"learning_rate": 3.638418903150525e-06,
"loss": 0.6943,
"step": 2092
},
{
"epoch": 0.89,
"learning_rate": 3.6355017502917156e-06,
"loss": 0.4808,
"step": 2096
},
{
"epoch": 0.89,
"learning_rate": 3.632584597432906e-06,
"loss": 0.4813,
"step": 2100
},
{
"epoch": 0.89,
"learning_rate": 3.629667444574096e-06,
"loss": 0.5777,
"step": 2104
},
{
"epoch": 0.9,
"learning_rate": 3.6267502917152862e-06,
"loss": 0.5967,
"step": 2108
},
{
"epoch": 0.9,
"learning_rate": 3.623833138856476e-06,
"loss": 0.5644,
"step": 2112
},
{
"epoch": 0.9,
"learning_rate": 3.6209159859976665e-06,
"loss": 0.6584,
"step": 2116
},
{
"epoch": 0.9,
"learning_rate": 3.6179988331388565e-06,
"loss": 0.5162,
"step": 2120
},
{
"epoch": 0.9,
"learning_rate": 3.615081680280047e-06,
"loss": 0.6585,
"step": 2124
},
{
"epoch": 0.9,
"learning_rate": 3.6121645274212368e-06,
"loss": 0.7716,
"step": 2128
},
{
"epoch": 0.91,
"learning_rate": 3.6092473745624276e-06,
"loss": 0.5249,
"step": 2132
},
{
"epoch": 0.91,
"learning_rate": 3.606330221703618e-06,
"loss": 0.722,
"step": 2136
},
{
"epoch": 0.91,
"learning_rate": 3.603413068844808e-06,
"loss": 0.6634,
"step": 2140
},
{
"epoch": 0.91,
"learning_rate": 3.6004959159859982e-06,
"loss": 0.6547,
"step": 2144
},
{
"epoch": 0.91,
"learning_rate": 3.597578763127188e-06,
"loss": 0.7804,
"step": 2148
},
{
"epoch": 0.91,
"learning_rate": 3.5946616102683785e-06,
"loss": 0.5373,
"step": 2152
},
{
"epoch": 0.92,
"learning_rate": 3.5917444574095685e-06,
"loss": 0.6122,
"step": 2156
},
{
"epoch": 0.92,
"learning_rate": 3.588827304550759e-06,
"loss": 0.5015,
"step": 2160
},
{
"epoch": 0.92,
"learning_rate": 3.585910151691949e-06,
"loss": 0.6872,
"step": 2164
},
{
"epoch": 0.92,
"learning_rate": 3.582992998833139e-06,
"loss": 0.3367,
"step": 2168
},
{
"epoch": 0.92,
"learning_rate": 3.5800758459743295e-06,
"loss": 0.4729,
"step": 2172
},
{
"epoch": 0.93,
"learning_rate": 3.5771586931155194e-06,
"loss": 0.7766,
"step": 2176
},
{
"epoch": 0.93,
"learning_rate": 3.5742415402567098e-06,
"loss": 0.7324,
"step": 2180
},
{
"epoch": 0.93,
"learning_rate": 3.5713243873978997e-06,
"loss": 0.5244,
"step": 2184
},
{
"epoch": 0.93,
"learning_rate": 3.56840723453909e-06,
"loss": 0.6321,
"step": 2188
},
{
"epoch": 0.93,
"learning_rate": 3.56549008168028e-06,
"loss": 0.6109,
"step": 2192
},
{
"epoch": 0.93,
"learning_rate": 3.5625729288214704e-06,
"loss": 0.5534,
"step": 2196
},
{
"epoch": 0.94,
"learning_rate": 3.559655775962661e-06,
"loss": 0.5453,
"step": 2200
},
{
"epoch": 0.94,
"learning_rate": 3.5567386231038507e-06,
"loss": 0.4379,
"step": 2204
},
{
"epoch": 0.94,
"learning_rate": 3.5538214702450415e-06,
"loss": 0.5268,
"step": 2208
},
{
"epoch": 0.94,
"learning_rate": 3.5509043173862314e-06,
"loss": 0.7081,
"step": 2212
},
{
"epoch": 0.94,
"learning_rate": 3.5479871645274218e-06,
"loss": 0.5149,
"step": 2216
},
{
"epoch": 0.94,
"learning_rate": 3.5450700116686117e-06,
"loss": 0.4048,
"step": 2220
},
{
"epoch": 0.95,
"learning_rate": 3.542152858809802e-06,
"loss": 0.7552,
"step": 2224
},
{
"epoch": 0.95,
"learning_rate": 3.539235705950992e-06,
"loss": 0.4624,
"step": 2228
},
{
"epoch": 0.95,
"learning_rate": 3.5363185530921824e-06,
"loss": 0.5872,
"step": 2232
},
{
"epoch": 0.95,
"learning_rate": 3.5334014002333727e-06,
"loss": 0.5265,
"step": 2236
},
{
"epoch": 0.95,
"learning_rate": 3.5304842473745627e-06,
"loss": 0.4802,
"step": 2240
},
{
"epoch": 0.95,
"learning_rate": 3.527567094515753e-06,
"loss": 0.6172,
"step": 2244
},
{
"epoch": 0.96,
"learning_rate": 3.524649941656943e-06,
"loss": 0.5788,
"step": 2248
},
{
"epoch": 0.96,
"learning_rate": 3.5217327887981333e-06,
"loss": 0.5426,
"step": 2252
},
{
"epoch": 0.96,
"learning_rate": 3.5188156359393233e-06,
"loss": 0.6427,
"step": 2256
},
{
"epoch": 0.96,
"learning_rate": 3.5158984830805136e-06,
"loss": 0.6474,
"step": 2260
},
{
"epoch": 0.96,
"learning_rate": 3.512981330221704e-06,
"loss": 0.5518,
"step": 2264
},
{
"epoch": 0.96,
"learning_rate": 3.510064177362894e-06,
"loss": 0.6028,
"step": 2268
},
{
"epoch": 0.97,
"learning_rate": 3.5071470245040843e-06,
"loss": 0.4178,
"step": 2272
},
{
"epoch": 0.97,
"learning_rate": 3.5042298716452742e-06,
"loss": 0.761,
"step": 2276
},
{
"epoch": 0.97,
"learning_rate": 3.501312718786465e-06,
"loss": 0.7076,
"step": 2280
},
{
"epoch": 0.97,
"learning_rate": 3.4983955659276545e-06,
"loss": 0.5466,
"step": 2284
},
{
"epoch": 0.97,
"learning_rate": 3.4954784130688453e-06,
"loss": 0.4539,
"step": 2288
},
{
"epoch": 0.97,
"learning_rate": 3.4925612602100353e-06,
"loss": 0.6436,
"step": 2292
},
{
"epoch": 0.98,
"learning_rate": 3.4896441073512256e-06,
"loss": 0.471,
"step": 2296
},
{
"epoch": 0.98,
"learning_rate": 3.486726954492416e-06,
"loss": 0.6848,
"step": 2300
},
{
"epoch": 0.98,
"learning_rate": 3.483809801633606e-06,
"loss": 0.6043,
"step": 2304
},
{
"epoch": 0.98,
"learning_rate": 3.4808926487747963e-06,
"loss": 0.6458,
"step": 2308
},
{
"epoch": 0.98,
"learning_rate": 3.4779754959159862e-06,
"loss": 0.6336,
"step": 2312
},
{
"epoch": 0.98,
"learning_rate": 3.4750583430571766e-06,
"loss": 0.5843,
"step": 2316
},
{
"epoch": 0.99,
"learning_rate": 3.4721411901983665e-06,
"loss": 0.4364,
"step": 2320
},
{
"epoch": 0.99,
"learning_rate": 3.469224037339557e-06,
"loss": 0.5796,
"step": 2324
},
{
"epoch": 0.99,
"learning_rate": 3.466306884480747e-06,
"loss": 0.5754,
"step": 2328
},
{
"epoch": 0.99,
"learning_rate": 3.463389731621937e-06,
"loss": 0.6848,
"step": 2332
},
{
"epoch": 0.99,
"learning_rate": 3.4604725787631276e-06,
"loss": 0.6489,
"step": 2336
},
{
"epoch": 0.99,
"learning_rate": 3.4575554259043175e-06,
"loss": 0.6255,
"step": 2340
},
{
"epoch": 1.0,
"learning_rate": 3.454638273045508e-06,
"loss": 0.4827,
"step": 2344
},
{
"epoch": 1.0,
"learning_rate": 3.451721120186698e-06,
"loss": 0.5178,
"step": 2348
},
{
"epoch": 1.0,
"learning_rate": 3.448803967327888e-06,
"loss": 0.5294,
"step": 2352
},
{
"epoch": 1.0,
"learning_rate": 3.445886814469078e-06,
"loss": 0.4968,
"step": 2356
},
{
"epoch": 1.0,
"learning_rate": 3.442969661610269e-06,
"loss": 0.7301,
"step": 2360
},
{
"epoch": 1.01,
"learning_rate": 3.4400525087514592e-06,
"loss": 0.5503,
"step": 2364
},
{
"epoch": 1.01,
"learning_rate": 3.437135355892649e-06,
"loss": 0.4724,
"step": 2368
},
{
"epoch": 1.01,
"learning_rate": 3.4342182030338395e-06,
"loss": 0.5471,
"step": 2372
},
{
"epoch": 1.01,
"learning_rate": 3.4313010501750295e-06,
"loss": 0.5414,
"step": 2376
},
{
"epoch": 1.01,
"learning_rate": 3.42838389731622e-06,
"loss": 0.4228,
"step": 2380
},
{
"epoch": 1.01,
"learning_rate": 3.4254667444574098e-06,
"loss": 0.6098,
"step": 2384
},
{
"epoch": 1.02,
"learning_rate": 3.4225495915986e-06,
"loss": 0.5935,
"step": 2388
},
{
"epoch": 1.02,
"learning_rate": 3.41963243873979e-06,
"loss": 0.5567,
"step": 2392
},
{
"epoch": 1.02,
"learning_rate": 3.4167152858809804e-06,
"loss": 0.4576,
"step": 2396
},
{
"epoch": 1.02,
"learning_rate": 3.413798133022171e-06,
"loss": 0.5125,
"step": 2400
},
{
"epoch": 1.02,
"learning_rate": 3.4108809801633607e-06,
"loss": 0.4188,
"step": 2404
},
{
"epoch": 1.02,
"learning_rate": 3.407963827304551e-06,
"loss": 0.6133,
"step": 2408
},
{
"epoch": 1.03,
"learning_rate": 3.405046674445741e-06,
"loss": 0.5203,
"step": 2412
},
{
"epoch": 1.03,
"learning_rate": 3.4021295215869314e-06,
"loss": 0.5936,
"step": 2416
},
{
"epoch": 1.03,
"learning_rate": 3.3992123687281213e-06,
"loss": 0.5783,
"step": 2420
},
{
"epoch": 1.03,
"learning_rate": 3.3962952158693117e-06,
"loss": 0.5501,
"step": 2424
},
{
"epoch": 1.03,
"learning_rate": 3.3933780630105017e-06,
"loss": 0.6859,
"step": 2428
},
{
"epoch": 1.03,
"learning_rate": 3.390460910151692e-06,
"loss": 0.5361,
"step": 2432
},
{
"epoch": 1.04,
"learning_rate": 3.387543757292883e-06,
"loss": 0.3481,
"step": 2436
},
{
"epoch": 1.04,
"learning_rate": 3.3846266044340727e-06,
"loss": 0.5475,
"step": 2440
},
{
"epoch": 1.04,
"learning_rate": 3.381709451575263e-06,
"loss": 0.4606,
"step": 2444
},
{
"epoch": 1.04,
"learning_rate": 3.378792298716453e-06,
"loss": 0.3753,
"step": 2448
},
{
"epoch": 1.04,
"learning_rate": 3.3758751458576434e-06,
"loss": 0.5286,
"step": 2452
},
{
"epoch": 1.04,
"learning_rate": 3.3729579929988333e-06,
"loss": 0.5214,
"step": 2456
},
{
"epoch": 1.05,
"learning_rate": 3.3700408401400237e-06,
"loss": 0.4971,
"step": 2460
},
{
"epoch": 1.05,
"learning_rate": 3.367123687281214e-06,
"loss": 0.5731,
"step": 2464
},
{
"epoch": 1.05,
"learning_rate": 3.364206534422404e-06,
"loss": 0.6563,
"step": 2468
},
{
"epoch": 1.05,
"learning_rate": 3.3612893815635944e-06,
"loss": 0.3885,
"step": 2472
},
{
"epoch": 1.05,
"learning_rate": 3.3583722287047843e-06,
"loss": 0.7304,
"step": 2476
},
{
"epoch": 1.05,
"learning_rate": 3.3554550758459747e-06,
"loss": 0.6601,
"step": 2480
},
{
"epoch": 1.06,
"learning_rate": 3.3525379229871646e-06,
"loss": 0.4629,
"step": 2484
},
{
"epoch": 1.06,
"learning_rate": 3.349620770128355e-06,
"loss": 0.5769,
"step": 2488
},
{
"epoch": 1.06,
"learning_rate": 3.346703617269545e-06,
"loss": 0.5616,
"step": 2492
},
{
"epoch": 1.06,
"learning_rate": 3.3437864644107353e-06,
"loss": 0.4976,
"step": 2496
},
{
"epoch": 1.06,
"learning_rate": 3.340869311551926e-06,
"loss": 0.6104,
"step": 2500
},
{
"epoch": 1.06,
"learning_rate": 3.3379521586931156e-06,
"loss": 0.4875,
"step": 2504
},
{
"epoch": 1.07,
"learning_rate": 3.3350350058343064e-06,
"loss": 0.3093,
"step": 2508
},
{
"epoch": 1.07,
"learning_rate": 3.332117852975496e-06,
"loss": 0.3887,
"step": 2512
},
{
"epoch": 1.07,
"learning_rate": 3.3292007001166867e-06,
"loss": 0.6688,
"step": 2516
},
{
"epoch": 1.07,
"learning_rate": 3.3262835472578766e-06,
"loss": 0.479,
"step": 2520
},
{
"epoch": 1.07,
"learning_rate": 3.323366394399067e-06,
"loss": 0.616,
"step": 2524
},
{
"epoch": 1.07,
"learning_rate": 3.320449241540257e-06,
"loss": 0.8513,
"step": 2528
},
{
"epoch": 1.08,
"learning_rate": 3.3175320886814473e-06,
"loss": 0.4224,
"step": 2532
},
{
"epoch": 1.08,
"learning_rate": 3.3146149358226376e-06,
"loss": 0.4577,
"step": 2536
},
{
"epoch": 1.08,
"learning_rate": 3.3116977829638276e-06,
"loss": 0.4286,
"step": 2540
},
{
"epoch": 1.08,
"learning_rate": 3.308780630105018e-06,
"loss": 0.5298,
"step": 2544
},
{
"epoch": 1.08,
"learning_rate": 3.305863477246208e-06,
"loss": 0.4531,
"step": 2548
},
{
"epoch": 1.09,
"learning_rate": 3.3029463243873982e-06,
"loss": 0.561,
"step": 2552
},
{
"epoch": 1.09,
"learning_rate": 3.300029171528588e-06,
"loss": 0.4931,
"step": 2556
},
{
"epoch": 1.09,
"learning_rate": 3.2971120186697785e-06,
"loss": 0.6341,
"step": 2560
},
{
"epoch": 1.09,
"learning_rate": 3.294194865810969e-06,
"loss": 0.5096,
"step": 2564
},
{
"epoch": 1.09,
"learning_rate": 3.291277712952159e-06,
"loss": 0.3863,
"step": 2568
},
{
"epoch": 1.09,
"learning_rate": 3.288360560093349e-06,
"loss": 0.6633,
"step": 2572
},
{
"epoch": 1.1,
"learning_rate": 3.285443407234539e-06,
"loss": 0.7305,
"step": 2576
},
{
"epoch": 1.1,
"learning_rate": 3.2825262543757295e-06,
"loss": 0.627,
"step": 2580
},
{
"epoch": 1.1,
"learning_rate": 3.2796091015169194e-06,
"loss": 0.4865,
"step": 2584
},
{
"epoch": 1.1,
"learning_rate": 3.27669194865811e-06,
"loss": 0.507,
"step": 2588
},
{
"epoch": 1.1,
"learning_rate": 3.2737747957992997e-06,
"loss": 0.5226,
"step": 2592
},
{
"epoch": 1.1,
"learning_rate": 3.2708576429404905e-06,
"loss": 0.5523,
"step": 2596
},
{
"epoch": 1.11,
"learning_rate": 3.267940490081681e-06,
"loss": 0.5195,
"step": 2600
},
{
"epoch": 1.11,
"learning_rate": 3.265023337222871e-06,
"loss": 0.3034,
"step": 2604
},
{
"epoch": 1.11,
"learning_rate": 3.262106184364061e-06,
"loss": 0.7056,
"step": 2608
},
{
"epoch": 1.11,
"learning_rate": 3.259189031505251e-06,
"loss": 0.6256,
"step": 2612
},
{
"epoch": 1.11,
"learning_rate": 3.2562718786464415e-06,
"loss": 0.4434,
"step": 2616
},
{
"epoch": 1.11,
"learning_rate": 3.2533547257876314e-06,
"loss": 0.3871,
"step": 2620
},
{
"epoch": 1.12,
"learning_rate": 3.2504375729288218e-06,
"loss": 0.6902,
"step": 2624
},
{
"epoch": 1.12,
"learning_rate": 3.2475204200700117e-06,
"loss": 0.3734,
"step": 2628
},
{
"epoch": 1.12,
"learning_rate": 3.244603267211202e-06,
"loss": 0.4771,
"step": 2632
},
{
"epoch": 1.12,
"learning_rate": 3.2416861143523924e-06,
"loss": 0.5457,
"step": 2636
},
{
"epoch": 1.12,
"learning_rate": 3.2387689614935824e-06,
"loss": 0.5787,
"step": 2640
},
{
"epoch": 1.12,
"learning_rate": 3.2358518086347727e-06,
"loss": 0.7326,
"step": 2644
},
{
"epoch": 1.13,
"learning_rate": 3.2329346557759627e-06,
"loss": 0.4606,
"step": 2648
},
{
"epoch": 1.13,
"learning_rate": 3.230017502917153e-06,
"loss": 0.5832,
"step": 2652
},
{
"epoch": 1.13,
"learning_rate": 3.227100350058343e-06,
"loss": 0.4674,
"step": 2656
},
{
"epoch": 1.13,
"learning_rate": 3.2241831971995333e-06,
"loss": 0.3263,
"step": 2660
},
{
"epoch": 1.13,
"learning_rate": 3.221266044340724e-06,
"loss": 0.433,
"step": 2664
},
{
"epoch": 1.13,
"learning_rate": 3.218348891481914e-06,
"loss": 0.562,
"step": 2668
},
{
"epoch": 1.14,
"learning_rate": 3.2154317386231044e-06,
"loss": 0.5465,
"step": 2672
},
{
"epoch": 1.14,
"learning_rate": 3.2125145857642944e-06,
"loss": 0.5806,
"step": 2676
},
{
"epoch": 1.14,
"learning_rate": 3.2095974329054847e-06,
"loss": 0.5941,
"step": 2680
},
{
"epoch": 1.14,
"learning_rate": 3.2066802800466747e-06,
"loss": 0.5672,
"step": 2684
},
{
"epoch": 1.14,
"learning_rate": 3.203763127187865e-06,
"loss": 0.4044,
"step": 2688
},
{
"epoch": 1.14,
"learning_rate": 3.200845974329055e-06,
"loss": 0.6372,
"step": 2692
},
{
"epoch": 1.15,
"learning_rate": 3.1979288214702453e-06,
"loss": 0.5306,
"step": 2696
},
{
"epoch": 1.15,
"learning_rate": 3.1950116686114357e-06,
"loss": 0.6751,
"step": 2700
},
{
"epoch": 1.15,
"learning_rate": 3.1920945157526256e-06,
"loss": 0.3999,
"step": 2704
},
{
"epoch": 1.15,
"learning_rate": 3.189177362893816e-06,
"loss": 0.6431,
"step": 2708
},
{
"epoch": 1.15,
"learning_rate": 3.186260210035006e-06,
"loss": 0.7637,
"step": 2712
},
{
"epoch": 1.15,
"learning_rate": 3.1833430571761963e-06,
"loss": 0.4698,
"step": 2716
},
{
"epoch": 1.16,
"learning_rate": 3.1804259043173862e-06,
"loss": 0.5558,
"step": 2720
},
{
"epoch": 1.16,
"learning_rate": 3.1775087514585766e-06,
"loss": 0.5202,
"step": 2724
},
{
"epoch": 1.16,
"learning_rate": 3.1745915985997665e-06,
"loss": 0.4455,
"step": 2728
},
{
"epoch": 1.16,
"learning_rate": 3.171674445740957e-06,
"loss": 0.6568,
"step": 2732
},
{
"epoch": 1.16,
"learning_rate": 3.1687572928821477e-06,
"loss": 0.6879,
"step": 2736
},
{
"epoch": 1.16,
"learning_rate": 3.165840140023337e-06,
"loss": 0.474,
"step": 2740
},
{
"epoch": 1.17,
"learning_rate": 3.162922987164528e-06,
"loss": 0.5385,
"step": 2744
},
{
"epoch": 1.17,
"learning_rate": 3.160005834305718e-06,
"loss": 0.6955,
"step": 2748
},
{
"epoch": 1.17,
"learning_rate": 3.1570886814469083e-06,
"loss": 0.501,
"step": 2752
},
{
"epoch": 1.17,
"learning_rate": 3.1541715285880982e-06,
"loss": 0.5093,
"step": 2756
},
{
"epoch": 1.17,
"learning_rate": 3.1512543757292886e-06,
"loss": 0.3698,
"step": 2760
},
{
"epoch": 1.18,
"learning_rate": 3.148337222870479e-06,
"loss": 0.4271,
"step": 2764
},
{
"epoch": 1.18,
"learning_rate": 3.145420070011669e-06,
"loss": 0.5814,
"step": 2768
},
{
"epoch": 1.18,
"learning_rate": 3.1425029171528592e-06,
"loss": 0.3312,
"step": 2772
},
{
"epoch": 1.18,
"learning_rate": 3.139585764294049e-06,
"loss": 0.5727,
"step": 2776
},
{
"epoch": 1.18,
"learning_rate": 3.1366686114352395e-06,
"loss": 0.6723,
"step": 2780
},
{
"epoch": 1.18,
"learning_rate": 3.1337514585764295e-06,
"loss": 0.5772,
"step": 2784
},
{
"epoch": 1.19,
"learning_rate": 3.13083430571762e-06,
"loss": 0.2121,
"step": 2788
},
{
"epoch": 1.19,
"learning_rate": 3.1279171528588098e-06,
"loss": 0.5148,
"step": 2792
},
{
"epoch": 1.19,
"learning_rate": 3.125e-06,
"loss": 0.402,
"step": 2796
},
{
"epoch": 1.19,
"learning_rate": 3.1220828471411905e-06,
"loss": 0.4757,
"step": 2800
},
{
"epoch": 1.19,
"learning_rate": 3.1191656942823805e-06,
"loss": 0.5157,
"step": 2804
},
{
"epoch": 1.19,
"learning_rate": 3.1162485414235712e-06,
"loss": 0.5417,
"step": 2808
},
{
"epoch": 1.2,
"learning_rate": 3.1133313885647608e-06,
"loss": 0.3401,
"step": 2812
},
{
"epoch": 1.2,
"learning_rate": 3.1104142357059515e-06,
"loss": 0.6862,
"step": 2816
},
{
"epoch": 1.2,
"learning_rate": 3.107497082847141e-06,
"loss": 0.516,
"step": 2820
},
{
"epoch": 1.2,
"learning_rate": 3.104579929988332e-06,
"loss": 0.5201,
"step": 2824
},
{
"epoch": 1.2,
"learning_rate": 3.1016627771295218e-06,
"loss": 0.4436,
"step": 2828
},
{
"epoch": 1.2,
"learning_rate": 3.098745624270712e-06,
"loss": 0.5983,
"step": 2832
},
{
"epoch": 1.21,
"learning_rate": 3.0958284714119025e-06,
"loss": 0.5076,
"step": 2836
},
{
"epoch": 1.21,
"learning_rate": 3.0929113185530924e-06,
"loss": 0.4357,
"step": 2840
},
{
"epoch": 1.21,
"learning_rate": 3.089994165694283e-06,
"loss": 0.5015,
"step": 2844
},
{
"epoch": 1.21,
"learning_rate": 3.0870770128354727e-06,
"loss": 0.5186,
"step": 2848
},
{
"epoch": 1.21,
"learning_rate": 3.084159859976663e-06,
"loss": 0.5653,
"step": 2852
},
{
"epoch": 1.21,
"learning_rate": 3.081242707117853e-06,
"loss": 0.6339,
"step": 2856
},
{
"epoch": 1.22,
"learning_rate": 3.0783255542590434e-06,
"loss": 0.5534,
"step": 2860
},
{
"epoch": 1.22,
"learning_rate": 3.0754084014002338e-06,
"loss": 0.5144,
"step": 2864
},
{
"epoch": 1.22,
"learning_rate": 3.0724912485414237e-06,
"loss": 0.393,
"step": 2868
},
{
"epoch": 1.22,
"learning_rate": 3.069574095682614e-06,
"loss": 0.4453,
"step": 2872
},
{
"epoch": 1.22,
"learning_rate": 3.066656942823804e-06,
"loss": 0.5278,
"step": 2876
},
{
"epoch": 1.22,
"learning_rate": 3.0637397899649944e-06,
"loss": 0.7833,
"step": 2880
},
{
"epoch": 1.23,
"learning_rate": 3.0608226371061843e-06,
"loss": 0.4581,
"step": 2884
},
{
"epoch": 1.23,
"learning_rate": 3.0579054842473747e-06,
"loss": 0.3942,
"step": 2888
},
{
"epoch": 1.23,
"learning_rate": 3.0549883313885646e-06,
"loss": 0.6298,
"step": 2892
},
{
"epoch": 1.23,
"learning_rate": 3.0520711785297554e-06,
"loss": 0.6276,
"step": 2896
},
{
"epoch": 1.23,
"learning_rate": 3.0491540256709458e-06,
"loss": 0.737,
"step": 2900
},
{
"epoch": 1.23,
"learning_rate": 3.0462368728121357e-06,
"loss": 0.5208,
"step": 2904
},
{
"epoch": 1.24,
"learning_rate": 3.043319719953326e-06,
"loss": 0.5953,
"step": 2908
},
{
"epoch": 1.24,
"learning_rate": 3.040402567094516e-06,
"loss": 0.5373,
"step": 2912
},
{
"epoch": 1.24,
"learning_rate": 3.0374854142357064e-06,
"loss": 0.3865,
"step": 2916
},
{
"epoch": 1.24,
"learning_rate": 3.0345682613768963e-06,
"loss": 0.5451,
"step": 2920
},
{
"epoch": 1.24,
"learning_rate": 3.0316511085180867e-06,
"loss": 0.4434,
"step": 2924
},
{
"epoch": 1.24,
"learning_rate": 3.0287339556592766e-06,
"loss": 0.6256,
"step": 2928
},
{
"epoch": 1.25,
"learning_rate": 3.025816802800467e-06,
"loss": 0.4372,
"step": 2932
},
{
"epoch": 1.25,
"learning_rate": 3.0228996499416573e-06,
"loss": 0.4225,
"step": 2936
},
{
"epoch": 1.25,
"learning_rate": 3.0199824970828473e-06,
"loss": 0.6176,
"step": 2940
},
{
"epoch": 1.25,
"learning_rate": 3.0170653442240376e-06,
"loss": 0.5814,
"step": 2944
},
{
"epoch": 1.25,
"learning_rate": 3.0141481913652276e-06,
"loss": 0.6453,
"step": 2948
},
{
"epoch": 1.26,
"learning_rate": 3.011231038506418e-06,
"loss": 0.599,
"step": 2952
},
{
"epoch": 1.26,
"learning_rate": 3.008313885647608e-06,
"loss": 0.4661,
"step": 2956
},
{
"epoch": 1.26,
"learning_rate": 3.0053967327887982e-06,
"loss": 0.3218,
"step": 2960
},
{
"epoch": 1.26,
"learning_rate": 3.003208868144691e-06,
"loss": 0.6456,
"step": 2964
},
{
"epoch": 1.26,
"learning_rate": 3.0002917152858813e-06,
"loss": 0.2891,
"step": 2968
},
{
"epoch": 1.26,
"learning_rate": 2.9973745624270716e-06,
"loss": 0.4821,
"step": 2972
},
{
"epoch": 1.27,
"learning_rate": 2.9944574095682616e-06,
"loss": 0.733,
"step": 2976
},
{
"epoch": 1.27,
"learning_rate": 2.991540256709452e-06,
"loss": 0.2193,
"step": 2980
},
{
"epoch": 1.27,
"learning_rate": 2.988623103850642e-06,
"loss": 0.4148,
"step": 2984
},
{
"epoch": 1.27,
"learning_rate": 2.9857059509918322e-06,
"loss": 0.5474,
"step": 2988
},
{
"epoch": 1.27,
"learning_rate": 2.982788798133022e-06,
"loss": 0.4005,
"step": 2992
},
{
"epoch": 1.27,
"learning_rate": 2.9798716452742125e-06,
"loss": 0.5799,
"step": 2996
},
{
"epoch": 1.28,
"learning_rate": 2.9769544924154025e-06,
"loss": 0.4967,
"step": 3000
},
{
"epoch": 1.28,
"learning_rate": 2.974037339556593e-06,
"loss": 0.4672,
"step": 3004
},
{
"epoch": 1.28,
"learning_rate": 2.9711201866977836e-06,
"loss": 0.4892,
"step": 3008
},
{
"epoch": 1.28,
"learning_rate": 2.9682030338389735e-06,
"loss": 0.554,
"step": 3012
},
{
"epoch": 1.28,
"learning_rate": 2.965285880980164e-06,
"loss": 0.3553,
"step": 3016
},
{
"epoch": 1.28,
"learning_rate": 2.962368728121354e-06,
"loss": 0.5259,
"step": 3020
},
{
"epoch": 1.29,
"learning_rate": 2.959451575262544e-06,
"loss": 0.4461,
"step": 3024
},
{
"epoch": 1.29,
"learning_rate": 2.956534422403734e-06,
"loss": 0.4768,
"step": 3028
},
{
"epoch": 1.29,
"learning_rate": 2.9536172695449245e-06,
"loss": 0.4537,
"step": 3032
},
{
"epoch": 1.29,
"learning_rate": 2.9507001166861144e-06,
"loss": 0.5611,
"step": 3036
},
{
"epoch": 1.29,
"learning_rate": 2.947782963827305e-06,
"loss": 0.5684,
"step": 3040
},
{
"epoch": 1.29,
"learning_rate": 2.944865810968495e-06,
"loss": 0.2357,
"step": 3044
},
{
"epoch": 1.3,
"learning_rate": 2.941948658109685e-06,
"loss": 0.4909,
"step": 3048
},
{
"epoch": 1.3,
"learning_rate": 2.9390315052508755e-06,
"loss": 0.5225,
"step": 3052
},
{
"epoch": 1.3,
"learning_rate": 2.9361143523920654e-06,
"loss": 0.391,
"step": 3056
},
{
"epoch": 1.3,
"learning_rate": 2.9331971995332558e-06,
"loss": 0.4058,
"step": 3060
},
{
"epoch": 1.3,
"learning_rate": 2.9302800466744457e-06,
"loss": 0.446,
"step": 3064
},
{
"epoch": 1.3,
"learning_rate": 2.927362893815636e-06,
"loss": 0.3136,
"step": 3068
},
{
"epoch": 1.31,
"learning_rate": 2.924445740956827e-06,
"loss": 0.4259,
"step": 3072
},
{
"epoch": 1.31,
"learning_rate": 2.9215285880980164e-06,
"loss": 0.4293,
"step": 3076
},
{
"epoch": 1.31,
"learning_rate": 2.918611435239207e-06,
"loss": 0.6366,
"step": 3080
},
{
"epoch": 1.31,
"learning_rate": 2.9156942823803967e-06,
"loss": 0.5103,
"step": 3084
},
{
"epoch": 1.31,
"learning_rate": 2.9127771295215875e-06,
"loss": 0.4994,
"step": 3088
},
{
"epoch": 1.31,
"learning_rate": 2.9098599766627774e-06,
"loss": 0.5369,
"step": 3092
},
{
"epoch": 1.32,
"learning_rate": 2.9069428238039678e-06,
"loss": 0.4632,
"step": 3096
},
{
"epoch": 1.32,
"learning_rate": 2.9040256709451577e-06,
"loss": 0.3315,
"step": 3100
},
{
"epoch": 1.32,
"learning_rate": 2.901108518086348e-06,
"loss": 0.5776,
"step": 3104
},
{
"epoch": 1.32,
"learning_rate": 2.8981913652275384e-06,
"loss": 0.4545,
"step": 3108
},
{
"epoch": 1.32,
"learning_rate": 2.8952742123687284e-06,
"loss": 0.6122,
"step": 3112
},
{
"epoch": 1.32,
"learning_rate": 2.8923570595099187e-06,
"loss": 0.2894,
"step": 3116
},
{
"epoch": 1.33,
"learning_rate": 2.8894399066511087e-06,
"loss": 0.5543,
"step": 3120
},
{
"epoch": 1.33,
"learning_rate": 2.886522753792299e-06,
"loss": 0.524,
"step": 3124
},
{
"epoch": 1.33,
"learning_rate": 2.883605600933489e-06,
"loss": 0.5279,
"step": 3128
},
{
"epoch": 1.33,
"learning_rate": 2.8806884480746793e-06,
"loss": 0.3323,
"step": 3132
},
{
"epoch": 1.33,
"learning_rate": 2.8777712952158693e-06,
"loss": 0.5169,
"step": 3136
},
{
"epoch": 1.34,
"learning_rate": 2.8748541423570596e-06,
"loss": 0.4117,
"step": 3140
},
{
"epoch": 1.34,
"learning_rate": 2.87193698949825e-06,
"loss": 0.2958,
"step": 3144
},
{
"epoch": 1.34,
"learning_rate": 2.86901983663944e-06,
"loss": 0.5434,
"step": 3148
},
{
"epoch": 1.34,
"learning_rate": 2.8661026837806307e-06,
"loss": 0.3813,
"step": 3152
},
{
"epoch": 1.34,
"learning_rate": 2.8631855309218202e-06,
"loss": 0.4985,
"step": 3156
},
{
"epoch": 1.34,
"learning_rate": 2.860268378063011e-06,
"loss": 0.5145,
"step": 3160
},
{
"epoch": 1.35,
"learning_rate": 2.8573512252042005e-06,
"loss": 0.4176,
"step": 3164
},
{
"epoch": 1.35,
"learning_rate": 2.8544340723453913e-06,
"loss": 0.4034,
"step": 3168
},
{
"epoch": 1.35,
"learning_rate": 2.8515169194865817e-06,
"loss": 0.514,
"step": 3172
},
{
"epoch": 1.35,
"learning_rate": 2.8485997666277716e-06,
"loss": 0.4951,
"step": 3176
},
{
"epoch": 1.35,
"learning_rate": 2.845682613768962e-06,
"loss": 0.3946,
"step": 3180
},
{
"epoch": 1.35,
"learning_rate": 2.842765460910152e-06,
"loss": 0.4,
"step": 3184
},
{
"epoch": 1.36,
"learning_rate": 2.8398483080513423e-06,
"loss": 0.5394,
"step": 3188
},
{
"epoch": 1.36,
"learning_rate": 2.8369311551925322e-06,
"loss": 0.6328,
"step": 3192
},
{
"epoch": 1.36,
"learning_rate": 2.8340140023337226e-06,
"loss": 0.4449,
"step": 3196
},
{
"epoch": 1.36,
"learning_rate": 2.8310968494749125e-06,
"loss": 0.4787,
"step": 3200
},
{
"epoch": 1.36,
"learning_rate": 2.828179696616103e-06,
"loss": 0.3408,
"step": 3204
},
{
"epoch": 1.36,
"learning_rate": 2.8252625437572932e-06,
"loss": 0.4688,
"step": 3208
},
{
"epoch": 1.37,
"learning_rate": 2.822345390898483e-06,
"loss": 0.557,
"step": 3212
},
{
"epoch": 1.37,
"learning_rate": 2.8194282380396735e-06,
"loss": 0.5299,
"step": 3216
},
{
"epoch": 1.37,
"learning_rate": 2.8165110851808635e-06,
"loss": 0.468,
"step": 3220
},
{
"epoch": 1.37,
"learning_rate": 2.813593932322054e-06,
"loss": 0.3655,
"step": 3224
},
{
"epoch": 1.37,
"learning_rate": 2.8106767794632438e-06,
"loss": 0.2575,
"step": 3228
},
{
"epoch": 1.37,
"learning_rate": 2.807759626604434e-06,
"loss": 0.5705,
"step": 3232
},
{
"epoch": 1.38,
"learning_rate": 2.804842473745624e-06,
"loss": 0.4812,
"step": 3236
},
{
"epoch": 1.38,
"learning_rate": 2.801925320886815e-06,
"loss": 0.5761,
"step": 3240
},
{
"epoch": 1.38,
"learning_rate": 2.7990081680280052e-06,
"loss": 0.6039,
"step": 3244
},
{
"epoch": 1.38,
"learning_rate": 2.796091015169195e-06,
"loss": 0.3454,
"step": 3248
},
{
"epoch": 1.38,
"learning_rate": 2.7931738623103855e-06,
"loss": 0.5554,
"step": 3252
},
{
"epoch": 1.38,
"learning_rate": 2.7902567094515755e-06,
"loss": 0.4531,
"step": 3256
},
{
"epoch": 1.39,
"learning_rate": 2.787339556592766e-06,
"loss": 0.3332,
"step": 3260
},
{
"epoch": 1.39,
"learning_rate": 2.7844224037339558e-06,
"loss": 0.3159,
"step": 3264
},
{
"epoch": 1.39,
"learning_rate": 2.781505250875146e-06,
"loss": 0.498,
"step": 3268
},
{
"epoch": 1.39,
"learning_rate": 2.7785880980163365e-06,
"loss": 0.4386,
"step": 3272
},
{
"epoch": 1.39,
"learning_rate": 2.7756709451575264e-06,
"loss": 0.4108,
"step": 3276
},
{
"epoch": 1.39,
"learning_rate": 2.772753792298717e-06,
"loss": 0.3739,
"step": 3280
},
{
"epoch": 1.4,
"learning_rate": 2.7698366394399067e-06,
"loss": 0.3242,
"step": 3284
},
{
"epoch": 1.4,
"learning_rate": 2.766919486581097e-06,
"loss": 0.5226,
"step": 3288
},
{
"epoch": 1.4,
"learning_rate": 2.764002333722287e-06,
"loss": 0.655,
"step": 3292
},
{
"epoch": 1.4,
"learning_rate": 2.7610851808634774e-06,
"loss": 0.3379,
"step": 3296
},
{
"epoch": 1.4,
"learning_rate": 2.7581680280046673e-06,
"loss": 0.5461,
"step": 3300
},
{
"epoch": 1.4,
"learning_rate": 2.7552508751458577e-06,
"loss": 0.4614,
"step": 3304
},
{
"epoch": 1.41,
"learning_rate": 2.7523337222870485e-06,
"loss": 0.6258,
"step": 3308
},
{
"epoch": 1.41,
"learning_rate": 2.749416569428238e-06,
"loss": 0.3237,
"step": 3312
},
{
"epoch": 1.41,
"learning_rate": 2.7464994165694288e-06,
"loss": 0.4256,
"step": 3316
},
{
"epoch": 1.41,
"learning_rate": 2.7435822637106187e-06,
"loss": 0.4114,
"step": 3320
},
{
"epoch": 1.41,
"learning_rate": 2.740665110851809e-06,
"loss": 0.7195,
"step": 3324
},
{
"epoch": 1.41,
"learning_rate": 2.737747957992999e-06,
"loss": 0.5899,
"step": 3328
},
{
"epoch": 1.42,
"learning_rate": 2.7348308051341894e-06,
"loss": 0.4853,
"step": 3332
},
{
"epoch": 1.42,
"learning_rate": 2.7319136522753793e-06,
"loss": 0.4043,
"step": 3336
},
{
"epoch": 1.42,
"learning_rate": 2.7289964994165697e-06,
"loss": 0.5738,
"step": 3340
},
{
"epoch": 1.42,
"learning_rate": 2.72607934655776e-06,
"loss": 0.5704,
"step": 3344
},
{
"epoch": 1.42,
"learning_rate": 2.72316219369895e-06,
"loss": 0.6312,
"step": 3348
},
{
"epoch": 1.43,
"learning_rate": 2.7202450408401404e-06,
"loss": 0.3233,
"step": 3352
},
{
"epoch": 1.43,
"learning_rate": 2.7173278879813303e-06,
"loss": 0.5598,
"step": 3356
},
{
"epoch": 1.43,
"learning_rate": 2.7144107351225207e-06,
"loss": 0.3501,
"step": 3360
},
{
"epoch": 1.43,
"learning_rate": 2.7114935822637106e-06,
"loss": 0.3945,
"step": 3364
},
{
"epoch": 1.43,
"learning_rate": 2.708576429404901e-06,
"loss": 0.4801,
"step": 3368
},
{
"epoch": 1.43,
"learning_rate": 2.7056592765460913e-06,
"loss": 0.5775,
"step": 3372
},
{
"epoch": 1.44,
"learning_rate": 2.7027421236872813e-06,
"loss": 0.3939,
"step": 3376
},
{
"epoch": 1.44,
"learning_rate": 2.699824970828472e-06,
"loss": 0.4818,
"step": 3380
},
{
"epoch": 1.44,
"learning_rate": 2.6969078179696616e-06,
"loss": 0.4698,
"step": 3384
},
{
"epoch": 1.44,
"learning_rate": 2.6939906651108523e-06,
"loss": 0.3852,
"step": 3388
},
{
"epoch": 1.44,
"learning_rate": 2.691073512252042e-06,
"loss": 0.4516,
"step": 3392
},
{
"epoch": 1.44,
"learning_rate": 2.6881563593932326e-06,
"loss": 0.3766,
"step": 3396
},
{
"epoch": 1.45,
"learning_rate": 2.6852392065344226e-06,
"loss": 0.315,
"step": 3400
},
{
"epoch": 1.45,
"learning_rate": 2.682322053675613e-06,
"loss": 0.4293,
"step": 3404
},
{
"epoch": 1.45,
"learning_rate": 2.6794049008168033e-06,
"loss": 0.5251,
"step": 3408
},
{
"epoch": 1.45,
"learning_rate": 2.6764877479579932e-06,
"loss": 0.4517,
"step": 3412
},
{
"epoch": 1.45,
"learning_rate": 2.6735705950991836e-06,
"loss": 0.568,
"step": 3416
},
{
"epoch": 1.45,
"learning_rate": 2.6706534422403735e-06,
"loss": 0.5349,
"step": 3420
},
{
"epoch": 1.46,
"learning_rate": 2.667736289381564e-06,
"loss": 0.4316,
"step": 3424
},
{
"epoch": 1.46,
"learning_rate": 2.664819136522754e-06,
"loss": 0.372,
"step": 3428
},
{
"epoch": 1.46,
"learning_rate": 2.661901983663944e-06,
"loss": 0.4546,
"step": 3432
},
{
"epoch": 1.46,
"learning_rate": 2.658984830805134e-06,
"loss": 0.3791,
"step": 3436
},
{
"epoch": 1.46,
"learning_rate": 2.6560676779463245e-06,
"loss": 0.4281,
"step": 3440
},
{
"epoch": 1.46,
"learning_rate": 2.653150525087515e-06,
"loss": 0.3564,
"step": 3444
},
{
"epoch": 1.47,
"learning_rate": 2.650233372228705e-06,
"loss": 0.443,
"step": 3448
},
{
"epoch": 1.47,
"learning_rate": 2.647316219369895e-06,
"loss": 0.6713,
"step": 3452
},
{
"epoch": 1.47,
"learning_rate": 2.644399066511085e-06,
"loss": 0.2832,
"step": 3456
},
{
"epoch": 1.47,
"learning_rate": 2.641481913652276e-06,
"loss": 0.5267,
"step": 3460
},
{
"epoch": 1.47,
"learning_rate": 2.6385647607934654e-06,
"loss": 0.688,
"step": 3464
},
{
"epoch": 1.47,
"learning_rate": 2.635647607934656e-06,
"loss": 0.6487,
"step": 3468
},
{
"epoch": 1.48,
"learning_rate": 2.6327304550758466e-06,
"loss": 0.6653,
"step": 3472
},
{
"epoch": 1.48,
"learning_rate": 2.6298133022170365e-06,
"loss": 0.4218,
"step": 3476
},
{
"epoch": 1.48,
"learning_rate": 2.626896149358227e-06,
"loss": 0.5679,
"step": 3480
},
{
"epoch": 1.48,
"learning_rate": 2.623978996499417e-06,
"loss": 0.3429,
"step": 3484
},
{
"epoch": 1.48,
"learning_rate": 2.621061843640607e-06,
"loss": 0.4428,
"step": 3488
},
{
"epoch": 1.48,
"learning_rate": 2.618144690781797e-06,
"loss": 0.4073,
"step": 3492
},
{
"epoch": 1.49,
"learning_rate": 2.6152275379229875e-06,
"loss": 0.476,
"step": 3496
},
{
"epoch": 1.49,
"learning_rate": 2.6123103850641774e-06,
"loss": 0.3964,
"step": 3500
},
{
"epoch": 1.49,
"learning_rate": 2.6093932322053678e-06,
"loss": 0.4971,
"step": 3504
},
{
"epoch": 1.49,
"learning_rate": 2.606476079346558e-06,
"loss": 0.5309,
"step": 3508
},
{
"epoch": 1.49,
"learning_rate": 2.603558926487748e-06,
"loss": 0.7094,
"step": 3512
},
{
"epoch": 1.49,
"learning_rate": 2.6006417736289384e-06,
"loss": 0.4345,
"step": 3516
},
{
"epoch": 1.5,
"learning_rate": 2.5977246207701284e-06,
"loss": 0.5559,
"step": 3520
},
{
"epoch": 1.5,
"learning_rate": 2.5948074679113187e-06,
"loss": 0.519,
"step": 3524
},
{
"epoch": 1.5,
"learning_rate": 2.5918903150525087e-06,
"loss": 0.4054,
"step": 3528
},
{
"epoch": 1.5,
"learning_rate": 2.588973162193699e-06,
"loss": 0.4334,
"step": 3532
},
{
"epoch": 1.5,
"learning_rate": 2.586056009334889e-06,
"loss": 0.3535,
"step": 3536
},
{
"epoch": 1.51,
"learning_rate": 2.5831388564760793e-06,
"loss": 0.6168,
"step": 3540
},
{
"epoch": 1.51,
"learning_rate": 2.58022170361727e-06,
"loss": 0.3484,
"step": 3544
},
{
"epoch": 1.51,
"learning_rate": 2.57730455075846e-06,
"loss": 0.3373,
"step": 3548
},
{
"epoch": 1.51,
"learning_rate": 2.5743873978996504e-06,
"loss": 0.5002,
"step": 3552
},
{
"epoch": 1.51,
"learning_rate": 2.5714702450408404e-06,
"loss": 0.4713,
"step": 3556
},
{
"epoch": 1.51,
"learning_rate": 2.5685530921820307e-06,
"loss": 0.4236,
"step": 3560
},
{
"epoch": 1.52,
"learning_rate": 2.5656359393232207e-06,
"loss": 0.49,
"step": 3564
},
{
"epoch": 1.52,
"learning_rate": 2.562718786464411e-06,
"loss": 0.3107,
"step": 3568
},
{
"epoch": 1.52,
"learning_rate": 2.5598016336056014e-06,
"loss": 0.6111,
"step": 3572
},
{
"epoch": 1.52,
"learning_rate": 2.5568844807467913e-06,
"loss": 0.4386,
"step": 3576
},
{
"epoch": 1.52,
"learning_rate": 2.5539673278879817e-06,
"loss": 0.434,
"step": 3580
},
{
"epoch": 1.52,
"learning_rate": 2.5510501750291716e-06,
"loss": 0.3085,
"step": 3584
},
{
"epoch": 1.53,
"learning_rate": 2.548133022170362e-06,
"loss": 0.6368,
"step": 3588
},
{
"epoch": 1.53,
"learning_rate": 2.545215869311552e-06,
"loss": 0.3797,
"step": 3592
},
{
"epoch": 1.53,
"learning_rate": 2.5422987164527423e-06,
"loss": 0.5329,
"step": 3596
},
{
"epoch": 1.53,
"learning_rate": 2.5393815635939322e-06,
"loss": 0.5015,
"step": 3600
},
{
"epoch": 1.53,
"learning_rate": 2.5364644107351226e-06,
"loss": 0.333,
"step": 3604
},
{
"epoch": 1.53,
"learning_rate": 2.5335472578763134e-06,
"loss": 0.349,
"step": 3608
},
{
"epoch": 1.54,
"learning_rate": 2.530630105017503e-06,
"loss": 0.5365,
"step": 3612
},
{
"epoch": 1.54,
"learning_rate": 2.5277129521586937e-06,
"loss": 0.6777,
"step": 3616
},
{
"epoch": 1.54,
"learning_rate": 2.524795799299883e-06,
"loss": 0.5432,
"step": 3620
},
{
"epoch": 1.54,
"learning_rate": 2.521878646441074e-06,
"loss": 0.2763,
"step": 3624
},
{
"epoch": 1.54,
"learning_rate": 2.518961493582264e-06,
"loss": 0.5183,
"step": 3628
},
{
"epoch": 1.54,
"learning_rate": 2.5160443407234543e-06,
"loss": 0.4486,
"step": 3632
},
{
"epoch": 1.55,
"learning_rate": 2.513127187864644e-06,
"loss": 0.3521,
"step": 3636
},
{
"epoch": 1.55,
"learning_rate": 2.5102100350058346e-06,
"loss": 0.5112,
"step": 3640
},
{
"epoch": 1.55,
"learning_rate": 2.507292882147025e-06,
"loss": 0.378,
"step": 3644
},
{
"epoch": 1.55,
"learning_rate": 2.504375729288215e-06,
"loss": 0.4282,
"step": 3648
},
{
"epoch": 1.55,
"learning_rate": 2.5014585764294052e-06,
"loss": 0.5283,
"step": 3652
},
{
"epoch": 1.55,
"learning_rate": 2.4985414235705956e-06,
"loss": 0.5269,
"step": 3656
},
{
"epoch": 1.56,
"learning_rate": 2.4956242707117855e-06,
"loss": 0.3568,
"step": 3660
},
{
"epoch": 1.56,
"learning_rate": 2.492707117852976e-06,
"loss": 0.3911,
"step": 3664
},
{
"epoch": 1.56,
"learning_rate": 2.489789964994166e-06,
"loss": 0.554,
"step": 3668
},
{
"epoch": 1.56,
"learning_rate": 2.486872812135356e-06,
"loss": 0.6132,
"step": 3672
},
{
"epoch": 1.56,
"learning_rate": 2.483955659276546e-06,
"loss": 0.5473,
"step": 3676
},
{
"epoch": 1.56,
"learning_rate": 2.4810385064177365e-06,
"loss": 0.472,
"step": 3680
},
{
"epoch": 1.57,
"learning_rate": 2.4781213535589264e-06,
"loss": 0.4616,
"step": 3684
},
{
"epoch": 1.57,
"learning_rate": 2.4752042007001172e-06,
"loss": 0.5357,
"step": 3688
},
{
"epoch": 1.57,
"learning_rate": 2.472287047841307e-06,
"loss": 0.5629,
"step": 3692
},
{
"epoch": 1.57,
"learning_rate": 2.4693698949824975e-06,
"loss": 0.5284,
"step": 3696
},
{
"epoch": 1.57,
"learning_rate": 2.4664527421236875e-06,
"loss": 0.3988,
"step": 3700
},
{
"epoch": 1.57,
"learning_rate": 2.463535589264878e-06,
"loss": 0.7007,
"step": 3704
},
{
"epoch": 1.58,
"learning_rate": 2.4606184364060678e-06,
"loss": 0.2642,
"step": 3708
},
{
"epoch": 1.58,
"learning_rate": 2.457701283547258e-06,
"loss": 0.6179,
"step": 3712
},
{
"epoch": 1.58,
"learning_rate": 2.454784130688448e-06,
"loss": 0.5595,
"step": 3716
},
{
"epoch": 1.58,
"learning_rate": 2.4518669778296384e-06,
"loss": 0.4074,
"step": 3720
},
{
"epoch": 1.58,
"learning_rate": 2.448949824970829e-06,
"loss": 0.3635,
"step": 3724
},
{
"epoch": 1.59,
"learning_rate": 2.446032672112019e-06,
"loss": 0.3888,
"step": 3728
},
{
"epoch": 1.59,
"learning_rate": 2.443115519253209e-06,
"loss": 0.2755,
"step": 3732
},
{
"epoch": 1.59,
"learning_rate": 2.4401983663943995e-06,
"loss": 0.4511,
"step": 3736
},
{
"epoch": 1.59,
"learning_rate": 2.4372812135355894e-06,
"loss": 0.5516,
"step": 3740
},
{
"epoch": 1.59,
"learning_rate": 2.4343640606767798e-06,
"loss": 0.4041,
"step": 3744
},
{
"epoch": 1.59,
"learning_rate": 2.4314469078179697e-06,
"loss": 0.1592,
"step": 3748
},
{
"epoch": 1.6,
"learning_rate": 2.42852975495916e-06,
"loss": 0.5583,
"step": 3752
},
{
"epoch": 1.6,
"learning_rate": 2.4256126021003504e-06,
"loss": 0.445,
"step": 3756
},
{
"epoch": 1.6,
"learning_rate": 2.4226954492415404e-06,
"loss": 0.3201,
"step": 3760
},
{
"epoch": 1.6,
"learning_rate": 2.4197782963827307e-06,
"loss": 0.521,
"step": 3764
},
{
"epoch": 1.6,
"learning_rate": 2.416861143523921e-06,
"loss": 0.4229,
"step": 3768
},
{
"epoch": 1.6,
"learning_rate": 2.413943990665111e-06,
"loss": 0.4319,
"step": 3772
},
{
"epoch": 1.61,
"learning_rate": 2.4110268378063014e-06,
"loss": 0.4708,
"step": 3776
},
{
"epoch": 1.61,
"learning_rate": 2.4081096849474913e-06,
"loss": 0.5419,
"step": 3780
},
{
"epoch": 1.61,
"learning_rate": 2.4051925320886817e-06,
"loss": 0.4454,
"step": 3784
},
{
"epoch": 1.61,
"learning_rate": 2.402275379229872e-06,
"loss": 0.4804,
"step": 3788
},
{
"epoch": 1.61,
"learning_rate": 2.399358226371062e-06,
"loss": 0.5686,
"step": 3792
},
{
"epoch": 1.61,
"learning_rate": 2.3964410735122523e-06,
"loss": 0.5354,
"step": 3796
},
{
"epoch": 1.62,
"learning_rate": 2.3935239206534423e-06,
"loss": 0.5101,
"step": 3800
},
{
"epoch": 1.62,
"learning_rate": 2.3906067677946326e-06,
"loss": 0.3946,
"step": 3804
},
{
"epoch": 1.62,
"learning_rate": 2.387689614935823e-06,
"loss": 0.4951,
"step": 3808
},
{
"epoch": 1.62,
"learning_rate": 2.384772462077013e-06,
"loss": 0.4952,
"step": 3812
},
{
"epoch": 1.62,
"learning_rate": 2.3818553092182033e-06,
"loss": 0.5854,
"step": 3816
},
{
"epoch": 1.62,
"learning_rate": 2.3789381563593932e-06,
"loss": 0.4074,
"step": 3820
},
{
"epoch": 1.63,
"learning_rate": 2.3760210035005836e-06,
"loss": 0.4058,
"step": 3824
},
{
"epoch": 1.63,
"learning_rate": 2.373103850641774e-06,
"loss": 0.4952,
"step": 3828
},
{
"epoch": 1.63,
"learning_rate": 2.370186697782964e-06,
"loss": 0.5502,
"step": 3832
},
{
"epoch": 1.63,
"learning_rate": 2.3672695449241543e-06,
"loss": 0.4379,
"step": 3836
},
{
"epoch": 1.63,
"learning_rate": 2.364352392065344e-06,
"loss": 0.4695,
"step": 3840
},
{
"epoch": 1.63,
"learning_rate": 2.3614352392065346e-06,
"loss": 0.5127,
"step": 3844
},
{
"epoch": 1.64,
"learning_rate": 2.3585180863477245e-06,
"loss": 0.4037,
"step": 3848
},
{
"epoch": 1.64,
"learning_rate": 2.355600933488915e-06,
"loss": 0.3387,
"step": 3852
},
{
"epoch": 1.64,
"learning_rate": 2.3526837806301052e-06,
"loss": 0.5302,
"step": 3856
},
{
"epoch": 1.64,
"learning_rate": 2.3497666277712956e-06,
"loss": 0.4549,
"step": 3860
},
{
"epoch": 1.64,
"learning_rate": 2.3468494749124855e-06,
"loss": 0.4871,
"step": 3864
},
{
"epoch": 1.64,
"learning_rate": 2.343932322053676e-06,
"loss": 0.4167,
"step": 3868
},
{
"epoch": 1.65,
"learning_rate": 2.341015169194866e-06,
"loss": 0.4498,
"step": 3872
},
{
"epoch": 1.65,
"learning_rate": 2.338098016336056e-06,
"loss": 0.3591,
"step": 3876
},
{
"epoch": 1.65,
"learning_rate": 2.335180863477246e-06,
"loss": 0.3389,
"step": 3880
},
{
"epoch": 1.65,
"learning_rate": 2.3322637106184365e-06,
"loss": 0.4688,
"step": 3884
},
{
"epoch": 1.65,
"learning_rate": 2.329346557759627e-06,
"loss": 0.4151,
"step": 3888
},
{
"epoch": 1.65,
"learning_rate": 2.3264294049008172e-06,
"loss": 0.5132,
"step": 3892
},
{
"epoch": 1.66,
"learning_rate": 2.323512252042007e-06,
"loss": 0.3682,
"step": 3896
},
{
"epoch": 1.66,
"learning_rate": 2.3205950991831975e-06,
"loss": 0.3668,
"step": 3900
},
{
"epoch": 1.66,
"learning_rate": 2.3176779463243875e-06,
"loss": 0.2637,
"step": 3904
},
{
"epoch": 1.66,
"learning_rate": 2.314760793465578e-06,
"loss": 0.5291,
"step": 3908
},
{
"epoch": 1.66,
"learning_rate": 2.3118436406067678e-06,
"loss": 0.5459,
"step": 3912
},
{
"epoch": 1.66,
"learning_rate": 2.308926487747958e-06,
"loss": 0.5774,
"step": 3916
},
{
"epoch": 1.67,
"learning_rate": 2.306009334889148e-06,
"loss": 0.5955,
"step": 3920
},
{
"epoch": 1.67,
"learning_rate": 2.303092182030339e-06,
"loss": 0.2941,
"step": 3924
},
{
"epoch": 1.67,
"learning_rate": 2.300175029171529e-06,
"loss": 0.4735,
"step": 3928
},
{
"epoch": 1.67,
"learning_rate": 2.297257876312719e-06,
"loss": 0.27,
"step": 3932
},
{
"epoch": 1.67,
"learning_rate": 2.294340723453909e-06,
"loss": 0.496,
"step": 3936
},
{
"epoch": 1.68,
"learning_rate": 2.2914235705950995e-06,
"loss": 0.279,
"step": 3940
},
{
"epoch": 1.68,
"learning_rate": 2.2885064177362894e-06,
"loss": 0.3848,
"step": 3944
},
{
"epoch": 1.68,
"learning_rate": 2.2855892648774798e-06,
"loss": 0.4868,
"step": 3948
},
{
"epoch": 1.68,
"learning_rate": 2.2826721120186697e-06,
"loss": 0.5328,
"step": 3952
},
{
"epoch": 1.68,
"learning_rate": 2.2797549591598605e-06,
"loss": 0.3838,
"step": 3956
},
{
"epoch": 1.68,
"learning_rate": 2.2768378063010504e-06,
"loss": 0.4603,
"step": 3960
},
{
"epoch": 1.69,
"learning_rate": 2.2739206534422408e-06,
"loss": 0.515,
"step": 3964
},
{
"epoch": 1.69,
"learning_rate": 2.2710035005834307e-06,
"loss": 0.4371,
"step": 3968
},
{
"epoch": 1.69,
"learning_rate": 2.268086347724621e-06,
"loss": 0.5561,
"step": 3972
},
{
"epoch": 1.69,
"learning_rate": 2.265169194865811e-06,
"loss": 0.3882,
"step": 3976
},
{
"epoch": 1.69,
"learning_rate": 2.2622520420070014e-06,
"loss": 0.4774,
"step": 3980
},
{
"epoch": 1.69,
"learning_rate": 2.2593348891481913e-06,
"loss": 0.6546,
"step": 3984
},
{
"epoch": 1.7,
"learning_rate": 2.2564177362893817e-06,
"loss": 0.4335,
"step": 3988
},
{
"epoch": 1.7,
"learning_rate": 2.253500583430572e-06,
"loss": 0.3352,
"step": 3992
},
{
"epoch": 1.7,
"learning_rate": 2.2505834305717624e-06,
"loss": 0.2946,
"step": 3996
},
{
"epoch": 1.7,
"learning_rate": 2.2476662777129523e-06,
"loss": 0.2657,
"step": 4000
},
{
"epoch": 1.7,
"learning_rate": 2.2447491248541427e-06,
"loss": 0.4859,
"step": 4004
},
{
"epoch": 1.7,
"learning_rate": 2.2418319719953326e-06,
"loss": 0.3047,
"step": 4008
},
{
"epoch": 1.71,
"learning_rate": 2.238914819136523e-06,
"loss": 0.5828,
"step": 4012
},
{
"epoch": 1.71,
"learning_rate": 2.235997666277713e-06,
"loss": 0.4141,
"step": 4016
},
{
"epoch": 1.71,
"learning_rate": 2.2330805134189033e-06,
"loss": 0.4643,
"step": 4020
},
{
"epoch": 1.71,
"learning_rate": 2.2301633605600937e-06,
"loss": 0.5517,
"step": 4024
},
{
"epoch": 1.71,
"learning_rate": 2.2272462077012836e-06,
"loss": 0.551,
"step": 4028
},
{
"epoch": 1.71,
"learning_rate": 2.224329054842474e-06,
"loss": 0.4237,
"step": 4032
},
{
"epoch": 1.72,
"learning_rate": 2.2214119019836643e-06,
"loss": 0.4345,
"step": 4036
},
{
"epoch": 1.72,
"learning_rate": 2.2184947491248543e-06,
"loss": 0.428,
"step": 4040
},
{
"epoch": 1.72,
"learning_rate": 2.2155775962660446e-06,
"loss": 0.3503,
"step": 4044
},
{
"epoch": 1.72,
"learning_rate": 2.2126604434072346e-06,
"loss": 0.4281,
"step": 4048
},
{
"epoch": 1.72,
"learning_rate": 2.209743290548425e-06,
"loss": 0.5451,
"step": 4052
},
{
"epoch": 1.72,
"learning_rate": 2.2068261376896153e-06,
"loss": 0.6199,
"step": 4056
},
{
"epoch": 1.73,
"learning_rate": 2.2039089848308052e-06,
"loss": 0.6851,
"step": 4060
},
{
"epoch": 1.73,
"learning_rate": 2.2009918319719956e-06,
"loss": 0.3541,
"step": 4064
},
{
"epoch": 1.73,
"learning_rate": 2.1980746791131855e-06,
"loss": 0.4333,
"step": 4068
},
{
"epoch": 1.73,
"learning_rate": 2.195157526254376e-06,
"loss": 0.413,
"step": 4072
},
{
"epoch": 1.73,
"learning_rate": 2.1922403733955663e-06,
"loss": 0.4385,
"step": 4076
},
{
"epoch": 1.73,
"learning_rate": 2.189323220536756e-06,
"loss": 0.3163,
"step": 4080
},
{
"epoch": 1.74,
"learning_rate": 2.1864060676779466e-06,
"loss": 0.4678,
"step": 4084
},
{
"epoch": 1.74,
"learning_rate": 2.183488914819137e-06,
"loss": 0.2889,
"step": 4088
},
{
"epoch": 1.74,
"learning_rate": 2.180571761960327e-06,
"loss": 0.3339,
"step": 4092
},
{
"epoch": 1.74,
"learning_rate": 2.1776546091015172e-06,
"loss": 0.4381,
"step": 4096
},
{
"epoch": 1.74,
"learning_rate": 2.174737456242707e-06,
"loss": 0.4926,
"step": 4100
},
{
"epoch": 1.74,
"learning_rate": 2.1718203033838975e-06,
"loss": 0.4147,
"step": 4104
},
{
"epoch": 1.75,
"learning_rate": 2.1689031505250875e-06,
"loss": 0.525,
"step": 4108
},
{
"epoch": 1.75,
"learning_rate": 2.165985997666278e-06,
"loss": 0.6573,
"step": 4112
},
{
"epoch": 1.75,
"learning_rate": 2.163068844807468e-06,
"loss": 0.4188,
"step": 4116
},
{
"epoch": 1.75,
"learning_rate": 2.1601516919486586e-06,
"loss": 0.2622,
"step": 4120
},
{
"epoch": 1.75,
"learning_rate": 2.1572345390898485e-06,
"loss": 0.4533,
"step": 4124
},
{
"epoch": 1.76,
"learning_rate": 2.154317386231039e-06,
"loss": 0.5007,
"step": 4128
},
{
"epoch": 1.76,
"learning_rate": 2.151400233372229e-06,
"loss": 0.5307,
"step": 4132
},
{
"epoch": 1.76,
"learning_rate": 2.148483080513419e-06,
"loss": 0.4071,
"step": 4136
},
{
"epoch": 1.76,
"learning_rate": 2.145565927654609e-06,
"loss": 0.5252,
"step": 4140
},
{
"epoch": 1.76,
"learning_rate": 2.1426487747957995e-06,
"loss": 0.3672,
"step": 4144
},
{
"epoch": 1.76,
"learning_rate": 2.1397316219369894e-06,
"loss": 0.3608,
"step": 4148
},
{
"epoch": 1.77,
"learning_rate": 2.1368144690781798e-06,
"loss": 0.4581,
"step": 4152
},
{
"epoch": 1.77,
"learning_rate": 2.13389731621937e-06,
"loss": 0.4953,
"step": 4156
},
{
"epoch": 1.77,
"learning_rate": 2.1309801633605605e-06,
"loss": 0.515,
"step": 4160
},
{
"epoch": 1.77,
"learning_rate": 2.1280630105017504e-06,
"loss": 0.4272,
"step": 4164
},
{
"epoch": 1.77,
"learning_rate": 2.1251458576429408e-06,
"loss": 0.5713,
"step": 4168
},
{
"epoch": 1.77,
"learning_rate": 2.1222287047841307e-06,
"loss": 0.3837,
"step": 4172
},
{
"epoch": 1.78,
"learning_rate": 2.119311551925321e-06,
"loss": 0.4367,
"step": 4176
},
{
"epoch": 1.78,
"learning_rate": 2.116394399066511e-06,
"loss": 0.3561,
"step": 4180
},
{
"epoch": 1.78,
"learning_rate": 2.1134772462077014e-06,
"loss": 0.2825,
"step": 4184
},
{
"epoch": 1.78,
"learning_rate": 2.1105600933488917e-06,
"loss": 0.2891,
"step": 4188
},
{
"epoch": 1.78,
"learning_rate": 2.107642940490082e-06,
"loss": 0.4617,
"step": 4192
},
{
"epoch": 1.78,
"learning_rate": 2.104725787631272e-06,
"loss": 0.4423,
"step": 4196
},
{
"epoch": 1.79,
"learning_rate": 2.1018086347724624e-06,
"loss": 0.2344,
"step": 4200
},
{
"epoch": 1.79,
"learning_rate": 2.0988914819136523e-06,
"loss": 0.5355,
"step": 4204
},
{
"epoch": 1.79,
"learning_rate": 2.0959743290548427e-06,
"loss": 0.427,
"step": 4208
},
{
"epoch": 1.79,
"learning_rate": 2.0930571761960326e-06,
"loss": 0.3997,
"step": 4212
},
{
"epoch": 1.79,
"learning_rate": 2.090140023337223e-06,
"loss": 0.3945,
"step": 4216
},
{
"epoch": 1.79,
"learning_rate": 2.0872228704784134e-06,
"loss": 0.3998,
"step": 4220
},
{
"epoch": 1.8,
"learning_rate": 2.0843057176196037e-06,
"loss": 0.4695,
"step": 4224
},
{
"epoch": 1.8,
"learning_rate": 2.0813885647607937e-06,
"loss": 0.3051,
"step": 4228
},
{
"epoch": 1.8,
"learning_rate": 2.078471411901984e-06,
"loss": 0.4195,
"step": 4232
},
{
"epoch": 1.8,
"learning_rate": 2.075554259043174e-06,
"loss": 0.4064,
"step": 4236
},
{
"epoch": 1.8,
"learning_rate": 2.0726371061843643e-06,
"loss": 0.4709,
"step": 4240
},
{
"epoch": 1.8,
"learning_rate": 2.0697199533255543e-06,
"loss": 0.6638,
"step": 4244
},
{
"epoch": 1.81,
"learning_rate": 2.0668028004667446e-06,
"loss": 0.4436,
"step": 4248
},
{
"epoch": 1.81,
"learning_rate": 2.0638856476079346e-06,
"loss": 0.3434,
"step": 4252
},
{
"epoch": 1.81,
"learning_rate": 2.060968494749125e-06,
"loss": 0.2871,
"step": 4256
},
{
"epoch": 1.81,
"learning_rate": 2.0580513418903153e-06,
"loss": 0.2696,
"step": 4260
},
{
"epoch": 1.81,
"learning_rate": 2.0551341890315057e-06,
"loss": 0.373,
"step": 4264
},
{
"epoch": 1.81,
"learning_rate": 2.0522170361726956e-06,
"loss": 0.4656,
"step": 4268
},
{
"epoch": 1.82,
"learning_rate": 2.049299883313886e-06,
"loss": 0.3027,
"step": 4272
},
{
"epoch": 1.82,
"learning_rate": 2.046382730455076e-06,
"loss": 0.7287,
"step": 4276
},
{
"epoch": 1.82,
"learning_rate": 2.0434655775962663e-06,
"loss": 0.3299,
"step": 4280
},
{
"epoch": 1.82,
"learning_rate": 2.040548424737456e-06,
"loss": 0.3556,
"step": 4284
},
{
"epoch": 1.82,
"learning_rate": 2.0376312718786466e-06,
"loss": 0.3275,
"step": 4288
},
{
"epoch": 1.82,
"learning_rate": 2.034714119019837e-06,
"loss": 0.334,
"step": 4292
},
{
"epoch": 1.83,
"learning_rate": 2.031796966161027e-06,
"loss": 0.4437,
"step": 4296
},
{
"epoch": 1.83,
"learning_rate": 2.0288798133022172e-06,
"loss": 0.4776,
"step": 4300
},
{
"epoch": 1.83,
"learning_rate": 2.0259626604434076e-06,
"loss": 0.497,
"step": 4304
},
{
"epoch": 1.83,
"learning_rate": 2.0230455075845975e-06,
"loss": 0.6054,
"step": 4308
},
{
"epoch": 1.83,
"learning_rate": 2.020128354725788e-06,
"loss": 0.3877,
"step": 4312
},
{
"epoch": 1.84,
"learning_rate": 2.017211201866978e-06,
"loss": 0.3442,
"step": 4316
},
{
"epoch": 1.84,
"learning_rate": 2.014294049008168e-06,
"loss": 0.5557,
"step": 4320
},
{
"epoch": 1.84,
"learning_rate": 2.0113768961493586e-06,
"loss": 0.5137,
"step": 4324
},
{
"epoch": 1.84,
"learning_rate": 2.0084597432905485e-06,
"loss": 0.6162,
"step": 4328
},
{
"epoch": 1.84,
"learning_rate": 2.005542590431739e-06,
"loss": 0.4181,
"step": 4332
},
{
"epoch": 1.84,
"learning_rate": 2.002625437572929e-06,
"loss": 0.3354,
"step": 4336
},
{
"epoch": 1.85,
"learning_rate": 1.999708284714119e-06,
"loss": 0.3924,
"step": 4340
},
{
"epoch": 1.85,
"learning_rate": 1.9967911318553095e-06,
"loss": 0.3781,
"step": 4344
},
{
"epoch": 1.85,
"learning_rate": 1.9938739789964995e-06,
"loss": 0.4388,
"step": 4348
},
{
"epoch": 1.85,
"learning_rate": 1.99095682613769e-06,
"loss": 0.4512,
"step": 4352
},
{
"epoch": 1.85,
"learning_rate": 1.98803967327888e-06,
"loss": 0.4171,
"step": 4356
},
{
"epoch": 1.85,
"learning_rate": 1.98512252042007e-06,
"loss": 0.3768,
"step": 4360
},
{
"epoch": 1.86,
"learning_rate": 1.9822053675612605e-06,
"loss": 0.3517,
"step": 4364
},
{
"epoch": 1.86,
"learning_rate": 1.9792882147024504e-06,
"loss": 0.412,
"step": 4368
},
{
"epoch": 1.86,
"learning_rate": 1.9763710618436408e-06,
"loss": 0.2887,
"step": 4372
},
{
"epoch": 1.86,
"learning_rate": 1.9734539089848307e-06,
"loss": 0.3032,
"step": 4376
},
{
"epoch": 1.86,
"learning_rate": 1.970536756126021e-06,
"loss": 0.3046,
"step": 4380
},
{
"epoch": 1.86,
"learning_rate": 1.9676196032672114e-06,
"loss": 0.3144,
"step": 4384
},
{
"epoch": 1.87,
"learning_rate": 1.964702450408402e-06,
"loss": 0.533,
"step": 4388
},
{
"epoch": 1.87,
"learning_rate": 1.9617852975495917e-06,
"loss": 0.3993,
"step": 4392
},
{
"epoch": 1.87,
"learning_rate": 1.958868144690782e-06,
"loss": 0.3253,
"step": 4396
},
{
"epoch": 1.87,
"learning_rate": 1.955950991831972e-06,
"loss": 0.4888,
"step": 4400
},
{
"epoch": 1.87,
"learning_rate": 1.9530338389731624e-06,
"loss": 0.5357,
"step": 4404
},
{
"epoch": 1.87,
"learning_rate": 1.9501166861143524e-06,
"loss": 0.3057,
"step": 4408
},
{
"epoch": 1.88,
"learning_rate": 1.9471995332555427e-06,
"loss": 0.3553,
"step": 4412
},
{
"epoch": 1.88,
"learning_rate": 1.9442823803967327e-06,
"loss": 0.3878,
"step": 4416
},
{
"epoch": 1.88,
"learning_rate": 1.9413652275379234e-06,
"loss": 0.315,
"step": 4420
},
{
"epoch": 1.88,
"learning_rate": 1.9384480746791134e-06,
"loss": 0.3791,
"step": 4424
},
{
"epoch": 1.88,
"learning_rate": 1.9355309218203037e-06,
"loss": 0.4092,
"step": 4428
},
{
"epoch": 1.88,
"learning_rate": 1.9326137689614937e-06,
"loss": 0.3911,
"step": 4432
},
{
"epoch": 1.89,
"learning_rate": 1.929696616102684e-06,
"loss": 0.3135,
"step": 4436
},
{
"epoch": 1.89,
"learning_rate": 1.926779463243874e-06,
"loss": 0.3574,
"step": 4440
},
{
"epoch": 1.89,
"learning_rate": 1.9238623103850643e-06,
"loss": 0.3147,
"step": 4444
},
{
"epoch": 1.89,
"learning_rate": 1.9209451575262543e-06,
"loss": 0.5802,
"step": 4448
},
{
"epoch": 1.89,
"learning_rate": 1.9180280046674446e-06,
"loss": 0.5169,
"step": 4452
},
{
"epoch": 1.89,
"learning_rate": 1.915110851808635e-06,
"loss": 0.4599,
"step": 4456
},
{
"epoch": 1.9,
"learning_rate": 1.9121936989498254e-06,
"loss": 0.3763,
"step": 4460
},
{
"epoch": 1.9,
"learning_rate": 1.9092765460910153e-06,
"loss": 0.4633,
"step": 4464
},
{
"epoch": 1.9,
"learning_rate": 1.9063593932322055e-06,
"loss": 0.462,
"step": 4468
},
{
"epoch": 1.9,
"learning_rate": 1.9034422403733956e-06,
"loss": 0.4908,
"step": 4472
},
{
"epoch": 1.9,
"learning_rate": 1.900525087514586e-06,
"loss": 0.3367,
"step": 4476
},
{
"epoch": 1.9,
"learning_rate": 1.8976079346557761e-06,
"loss": 0.4497,
"step": 4480
},
{
"epoch": 1.91,
"learning_rate": 1.8946907817969663e-06,
"loss": 0.3253,
"step": 4484
},
{
"epoch": 1.91,
"learning_rate": 1.8917736289381566e-06,
"loss": 0.5464,
"step": 4488
},
{
"epoch": 1.91,
"learning_rate": 1.8888564760793468e-06,
"loss": 0.3004,
"step": 4492
},
{
"epoch": 1.91,
"learning_rate": 1.885939323220537e-06,
"loss": 0.3876,
"step": 4496
},
{
"epoch": 1.91,
"learning_rate": 1.883022170361727e-06,
"loss": 0.4443,
"step": 4500
}
],
"logging_steps": 4,
"max_steps": 7056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 34569317253120.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}