Nhanvi282's picture
Model save
79d09c0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.998925020155872,
"eval_steps": 500,
"global_step": 3720,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.571428571428571e-08,
"loss": 1.7752,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 7.142857142857142e-08,
"loss": 1.7952,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 1.0714285714285713e-07,
"loss": 1.7784,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 1.4285714285714285e-07,
"loss": 1.7848,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 1.7857142857142858e-07,
"loss": 1.7954,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 2.1428571428571426e-07,
"loss": 1.7604,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 2.5e-07,
"loss": 1.8112,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 2.857142857142857e-07,
"loss": 1.7883,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 3.2142857142857145e-07,
"loss": 1.8288,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 3.5714285714285716e-07,
"loss": 1.838,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 3.928571428571428e-07,
"loss": 1.7953,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 4.285714285714285e-07,
"loss": 1.751,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 4.6428571428571427e-07,
"loss": 1.8237,
"step": 26
},
{
"epoch": 0.03,
"learning_rate": 5e-07,
"loss": 1.8142,
"step": 28
},
{
"epoch": 0.03,
"learning_rate": 5.357142857142857e-07,
"loss": 1.8103,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 5.714285714285714e-07,
"loss": 1.7695,
"step": 32
},
{
"epoch": 0.04,
"learning_rate": 6.071428571428571e-07,
"loss": 1.8141,
"step": 34
},
{
"epoch": 0.04,
"learning_rate": 6.428571428571429e-07,
"loss": 1.8245,
"step": 36
},
{
"epoch": 0.04,
"learning_rate": 6.785714285714286e-07,
"loss": 1.8322,
"step": 38
},
{
"epoch": 0.04,
"learning_rate": 7.142857142857143e-07,
"loss": 1.8062,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 7.5e-07,
"loss": 1.8008,
"step": 42
},
{
"epoch": 0.05,
"learning_rate": 7.857142857142856e-07,
"loss": 1.8011,
"step": 44
},
{
"epoch": 0.05,
"learning_rate": 8.214285714285713e-07,
"loss": 1.7714,
"step": 46
},
{
"epoch": 0.05,
"learning_rate": 8.57142857142857e-07,
"loss": 1.7806,
"step": 48
},
{
"epoch": 0.05,
"learning_rate": 8.928571428571428e-07,
"loss": 1.8029,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 9.285714285714285e-07,
"loss": 1.7956,
"step": 52
},
{
"epoch": 0.06,
"learning_rate": 9.642857142857142e-07,
"loss": 1.759,
"step": 54
},
{
"epoch": 0.06,
"learning_rate": 1e-06,
"loss": 1.8218,
"step": 56
},
{
"epoch": 0.06,
"learning_rate": 1.0357142857142857e-06,
"loss": 1.7866,
"step": 58
},
{
"epoch": 0.06,
"learning_rate": 1.0714285714285714e-06,
"loss": 1.7776,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 1.107142857142857e-06,
"loss": 1.7683,
"step": 62
},
{
"epoch": 0.07,
"learning_rate": 1.1428571428571428e-06,
"loss": 1.7662,
"step": 64
},
{
"epoch": 0.07,
"learning_rate": 1.1785714285714285e-06,
"loss": 1.844,
"step": 66
},
{
"epoch": 0.07,
"learning_rate": 1.2142857142857142e-06,
"loss": 1.8085,
"step": 68
},
{
"epoch": 0.08,
"learning_rate": 1.2499999999999999e-06,
"loss": 1.7617,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 1.2857142857142858e-06,
"loss": 1.7718,
"step": 72
},
{
"epoch": 0.08,
"learning_rate": 1.3214285714285713e-06,
"loss": 1.8005,
"step": 74
},
{
"epoch": 0.08,
"learning_rate": 1.3571428571428572e-06,
"loss": 1.8495,
"step": 76
},
{
"epoch": 0.08,
"learning_rate": 1.3928571428571427e-06,
"loss": 1.77,
"step": 78
},
{
"epoch": 0.09,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.8365,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 1.4642857142857141e-06,
"loss": 1.8015,
"step": 82
},
{
"epoch": 0.09,
"learning_rate": 1.5e-06,
"loss": 1.7715,
"step": 84
},
{
"epoch": 0.09,
"learning_rate": 1.5357142857142857e-06,
"loss": 1.7716,
"step": 86
},
{
"epoch": 0.09,
"learning_rate": 1.5714285714285712e-06,
"loss": 1.8093,
"step": 88
},
{
"epoch": 0.1,
"learning_rate": 1.6071428571428572e-06,
"loss": 1.7822,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 1.6428571428571426e-06,
"loss": 1.8427,
"step": 92
},
{
"epoch": 0.1,
"learning_rate": 1.6785714285714286e-06,
"loss": 1.7447,
"step": 94
},
{
"epoch": 0.1,
"learning_rate": 1.714285714285714e-06,
"loss": 1.8466,
"step": 96
},
{
"epoch": 0.11,
"learning_rate": 1.75e-06,
"loss": 1.7943,
"step": 98
},
{
"epoch": 0.11,
"learning_rate": 1.7857142857142857e-06,
"loss": 1.7368,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 1.8214285714285714e-06,
"loss": 1.7748,
"step": 102
},
{
"epoch": 0.11,
"learning_rate": 1.857142857142857e-06,
"loss": 1.7385,
"step": 104
},
{
"epoch": 0.11,
"learning_rate": 1.8928571428571428e-06,
"loss": 1.7912,
"step": 106
},
{
"epoch": 0.12,
"learning_rate": 1.9285714285714285e-06,
"loss": 1.8207,
"step": 108
},
{
"epoch": 0.12,
"learning_rate": 1.964285714285714e-06,
"loss": 1.8504,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 2e-06,
"loss": 1.7297,
"step": 112
},
{
"epoch": 0.12,
"learning_rate": 1.9999984836600627e-06,
"loss": 1.804,
"step": 114
},
{
"epoch": 0.12,
"learning_rate": 1.9999939346448493e-06,
"loss": 1.7596,
"step": 116
},
{
"epoch": 0.13,
"learning_rate": 1.9999863529681556e-06,
"loss": 1.7427,
"step": 118
},
{
"epoch": 0.13,
"learning_rate": 1.9999757386529745e-06,
"loss": 1.7642,
"step": 120
},
{
"epoch": 0.13,
"learning_rate": 1.9999620917314953e-06,
"loss": 1.7923,
"step": 122
},
{
"epoch": 0.13,
"learning_rate": 1.9999454122451056e-06,
"loss": 1.8078,
"step": 124
},
{
"epoch": 0.14,
"learning_rate": 1.9999257002443882e-06,
"loss": 1.7529,
"step": 126
},
{
"epoch": 0.14,
"learning_rate": 1.9999029557891238e-06,
"loss": 1.8209,
"step": 128
},
{
"epoch": 0.14,
"learning_rate": 1.9998771789482887e-06,
"loss": 1.7966,
"step": 130
},
{
"epoch": 0.14,
"learning_rate": 1.999848369800056e-06,
"loss": 1.7624,
"step": 132
},
{
"epoch": 0.14,
"learning_rate": 1.9998165284317942e-06,
"loss": 1.7955,
"step": 134
},
{
"epoch": 0.15,
"learning_rate": 1.9997816549400686e-06,
"loss": 1.7967,
"step": 136
},
{
"epoch": 0.15,
"learning_rate": 1.999743749430639e-06,
"loss": 1.812,
"step": 138
},
{
"epoch": 0.15,
"learning_rate": 1.999702812018461e-06,
"loss": 1.7924,
"step": 140
},
{
"epoch": 0.15,
"learning_rate": 1.999658842827684e-06,
"loss": 1.8119,
"step": 142
},
{
"epoch": 0.15,
"learning_rate": 1.999611841991653e-06,
"loss": 1.7845,
"step": 144
},
{
"epoch": 0.16,
"learning_rate": 1.9995618096529063e-06,
"loss": 1.7908,
"step": 146
},
{
"epoch": 0.16,
"learning_rate": 1.999508745963176e-06,
"loss": 1.7558,
"step": 148
},
{
"epoch": 0.16,
"learning_rate": 1.999452651083388e-06,
"loss": 1.7613,
"step": 150
},
{
"epoch": 0.16,
"learning_rate": 1.9993935251836588e-06,
"loss": 1.7936,
"step": 152
},
{
"epoch": 0.17,
"learning_rate": 1.999331368443299e-06,
"loss": 1.8252,
"step": 154
},
{
"epoch": 0.17,
"learning_rate": 1.99926618105081e-06,
"loss": 1.7885,
"step": 156
},
{
"epoch": 0.17,
"learning_rate": 1.9991979632038844e-06,
"loss": 1.7688,
"step": 158
},
{
"epoch": 0.17,
"learning_rate": 1.999126715109405e-06,
"loss": 1.7978,
"step": 160
},
{
"epoch": 0.17,
"learning_rate": 1.9990524369834445e-06,
"loss": 1.7914,
"step": 162
},
{
"epoch": 0.18,
"learning_rate": 1.9989751290512647e-06,
"loss": 1.7627,
"step": 164
},
{
"epoch": 0.18,
"learning_rate": 1.998894791547316e-06,
"loss": 1.7832,
"step": 166
},
{
"epoch": 0.18,
"learning_rate": 1.998811424715236e-06,
"loss": 1.7679,
"step": 168
},
{
"epoch": 0.18,
"learning_rate": 1.99872502880785e-06,
"loss": 1.7889,
"step": 170
},
{
"epoch": 0.18,
"learning_rate": 1.9986356040871684e-06,
"loss": 1.755,
"step": 172
},
{
"epoch": 0.19,
"learning_rate": 1.9985431508243884e-06,
"loss": 1.8065,
"step": 174
},
{
"epoch": 0.19,
"learning_rate": 1.998447669299891e-06,
"loss": 1.7217,
"step": 176
},
{
"epoch": 0.19,
"learning_rate": 1.998349159803241e-06,
"loss": 1.79,
"step": 178
},
{
"epoch": 0.19,
"learning_rate": 1.9982476226331863e-06,
"loss": 1.7365,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 1.9981430580976567e-06,
"loss": 1.7904,
"step": 182
},
{
"epoch": 0.2,
"learning_rate": 1.9980354665137626e-06,
"loss": 1.7985,
"step": 184
},
{
"epoch": 0.2,
"learning_rate": 1.997924848207795e-06,
"loss": 1.7578,
"step": 186
},
{
"epoch": 0.2,
"learning_rate": 1.997811203515224e-06,
"loss": 1.7971,
"step": 188
},
{
"epoch": 0.2,
"learning_rate": 1.997694532780697e-06,
"loss": 1.7861,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 1.9975748363580403e-06,
"loss": 1.8196,
"step": 192
},
{
"epoch": 0.21,
"learning_rate": 1.9974521146102534e-06,
"loss": 1.7824,
"step": 194
},
{
"epoch": 0.21,
"learning_rate": 1.9973263679095126e-06,
"loss": 1.7837,
"step": 196
},
{
"epoch": 0.21,
"learning_rate": 1.9971975966371677e-06,
"loss": 1.7168,
"step": 198
},
{
"epoch": 0.21,
"learning_rate": 1.9970658011837403e-06,
"loss": 1.786,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 1.996930981948924e-06,
"loss": 1.7517,
"step": 202
},
{
"epoch": 0.22,
"learning_rate": 1.9967931393415824e-06,
"loss": 1.7935,
"step": 204
},
{
"epoch": 0.22,
"learning_rate": 1.996652273779748e-06,
"loss": 1.7614,
"step": 206
},
{
"epoch": 0.22,
"learning_rate": 1.996508385690621e-06,
"loss": 1.7422,
"step": 208
},
{
"epoch": 0.23,
"learning_rate": 1.9963614755105675e-06,
"loss": 1.7906,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 1.9962115436851197e-06,
"loss": 1.7935,
"step": 212
},
{
"epoch": 0.23,
"learning_rate": 1.9960585906689724e-06,
"loss": 1.8088,
"step": 214
},
{
"epoch": 0.23,
"learning_rate": 1.995902616925983e-06,
"loss": 1.7836,
"step": 216
},
{
"epoch": 0.23,
"learning_rate": 1.995743622929171e-06,
"loss": 1.8082,
"step": 218
},
{
"epoch": 0.24,
"learning_rate": 1.9955816091607123e-06,
"loss": 1.7421,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 1.9954165761119447e-06,
"loss": 1.7582,
"step": 222
},
{
"epoch": 0.24,
"learning_rate": 1.99524852428336e-06,
"loss": 1.7798,
"step": 224
},
{
"epoch": 0.24,
"learning_rate": 1.9950774541846052e-06,
"loss": 1.7439,
"step": 226
},
{
"epoch": 0.25,
"learning_rate": 1.9949033663344813e-06,
"loss": 1.8031,
"step": 228
},
{
"epoch": 0.25,
"learning_rate": 1.9947262612609412e-06,
"loss": 1.8042,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 1.994546139501088e-06,
"loss": 1.723,
"step": 232
},
{
"epoch": 0.25,
"learning_rate": 1.994363001601173e-06,
"loss": 1.7698,
"step": 234
},
{
"epoch": 0.25,
"learning_rate": 1.994176848116595e-06,
"loss": 1.8292,
"step": 236
},
{
"epoch": 0.26,
"learning_rate": 1.993987679611898e-06,
"loss": 1.7419,
"step": 238
},
{
"epoch": 0.26,
"learning_rate": 1.993795496660769e-06,
"loss": 1.7854,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 1.9936002998460383e-06,
"loss": 1.7535,
"step": 242
},
{
"epoch": 0.26,
"learning_rate": 1.9934020897596747e-06,
"loss": 1.7828,
"step": 244
},
{
"epoch": 0.26,
"learning_rate": 1.9932008670027864e-06,
"loss": 1.7446,
"step": 246
},
{
"epoch": 0.27,
"learning_rate": 1.992996632185617e-06,
"loss": 1.7538,
"step": 248
},
{
"epoch": 0.27,
"learning_rate": 1.9927893859275457e-06,
"loss": 1.7395,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 1.9925791288570837e-06,
"loss": 1.8124,
"step": 252
},
{
"epoch": 0.27,
"learning_rate": 1.992365861611874e-06,
"loss": 1.8087,
"step": 254
},
{
"epoch": 0.28,
"learning_rate": 1.9921495848386874e-06,
"loss": 1.8488,
"step": 256
},
{
"epoch": 0.28,
"learning_rate": 1.9919302991934224e-06,
"loss": 1.7739,
"step": 258
},
{
"epoch": 0.28,
"learning_rate": 1.991708005341102e-06,
"loss": 1.8297,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 1.991482703955872e-06,
"loss": 1.718,
"step": 262
},
{
"epoch": 0.28,
"learning_rate": 1.9912543957209997e-06,
"loss": 1.8038,
"step": 264
},
{
"epoch": 0.29,
"learning_rate": 1.991023081328871e-06,
"loss": 1.7552,
"step": 266
},
{
"epoch": 0.29,
"learning_rate": 1.9907887614809888e-06,
"loss": 1.7854,
"step": 268
},
{
"epoch": 0.29,
"learning_rate": 1.990551436887969e-06,
"loss": 1.7622,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 1.990311108269542e-06,
"loss": 1.7976,
"step": 272
},
{
"epoch": 0.29,
"learning_rate": 1.990067776354547e-06,
"loss": 1.7512,
"step": 274
},
{
"epoch": 0.3,
"learning_rate": 1.9898214418809326e-06,
"loss": 1.7963,
"step": 276
},
{
"epoch": 0.3,
"learning_rate": 1.989572105595752e-06,
"loss": 1.7836,
"step": 278
},
{
"epoch": 0.3,
"learning_rate": 1.989319768255162e-06,
"loss": 1.8183,
"step": 280
},
{
"epoch": 0.3,
"learning_rate": 1.9890644306244213e-06,
"loss": 1.7846,
"step": 282
},
{
"epoch": 0.31,
"learning_rate": 1.9888060934778874e-06,
"loss": 1.7769,
"step": 284
},
{
"epoch": 0.31,
"learning_rate": 1.988544757599014e-06,
"loss": 1.7932,
"step": 286
},
{
"epoch": 0.31,
"learning_rate": 1.9882804237803485e-06,
"loss": 1.8283,
"step": 288
},
{
"epoch": 0.31,
"learning_rate": 1.9880130928235315e-06,
"loss": 1.7194,
"step": 290
},
{
"epoch": 0.31,
"learning_rate": 1.9877427655392924e-06,
"loss": 1.7643,
"step": 292
},
{
"epoch": 0.32,
"learning_rate": 1.9874694427474464e-06,
"loss": 1.8056,
"step": 294
},
{
"epoch": 0.32,
"learning_rate": 1.9871931252768952e-06,
"loss": 1.8192,
"step": 296
},
{
"epoch": 0.32,
"learning_rate": 1.98691381396562e-06,
"loss": 1.7687,
"step": 298
},
{
"epoch": 0.32,
"learning_rate": 1.986631509660684e-06,
"loss": 1.7932,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 1.9863462132182247e-06,
"loss": 1.8684,
"step": 302
},
{
"epoch": 0.33,
"learning_rate": 1.986057925503455e-06,
"loss": 1.8455,
"step": 304
},
{
"epoch": 0.33,
"learning_rate": 1.985766647390659e-06,
"loss": 1.7187,
"step": 306
},
{
"epoch": 0.33,
"learning_rate": 1.9854723797631912e-06,
"loss": 1.7546,
"step": 308
},
{
"epoch": 0.33,
"learning_rate": 1.98517512351347e-06,
"loss": 1.7731,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 1.9848748795429785e-06,
"loss": 1.7852,
"step": 312
},
{
"epoch": 0.34,
"learning_rate": 1.984571648762261e-06,
"loss": 1.7534,
"step": 314
},
{
"epoch": 0.34,
"learning_rate": 1.9842654320909194e-06,
"loss": 1.8198,
"step": 316
},
{
"epoch": 0.34,
"learning_rate": 1.98395623045761e-06,
"loss": 1.7499,
"step": 318
},
{
"epoch": 0.34,
"learning_rate": 1.983644044800044e-06,
"loss": 1.741,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 1.9833288760649787e-06,
"loss": 1.7557,
"step": 322
},
{
"epoch": 0.35,
"learning_rate": 1.983010725208221e-06,
"loss": 1.7607,
"step": 324
},
{
"epoch": 0.35,
"learning_rate": 1.9826895931946206e-06,
"loss": 1.7521,
"step": 326
},
{
"epoch": 0.35,
"learning_rate": 1.9823654809980682e-06,
"loss": 1.8065,
"step": 328
},
{
"epoch": 0.35,
"learning_rate": 1.9820383896014917e-06,
"loss": 1.7385,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 1.981708319996855e-06,
"loss": 1.866,
"step": 332
},
{
"epoch": 0.36,
"learning_rate": 1.9813752731851535e-06,
"loss": 1.7722,
"step": 334
},
{
"epoch": 0.36,
"learning_rate": 1.9810392501764116e-06,
"loss": 1.7947,
"step": 336
},
{
"epoch": 0.36,
"learning_rate": 1.9807002519896793e-06,
"loss": 1.7951,
"step": 338
},
{
"epoch": 0.37,
"learning_rate": 1.98035827965303e-06,
"loss": 1.735,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 1.980013334203556e-06,
"loss": 1.7485,
"step": 342
},
{
"epoch": 0.37,
"learning_rate": 1.9796654166873666e-06,
"loss": 1.7587,
"step": 344
},
{
"epoch": 0.37,
"learning_rate": 1.979314528159584e-06,
"loss": 1.8017,
"step": 346
},
{
"epoch": 0.37,
"learning_rate": 1.978960669684341e-06,
"loss": 1.7886,
"step": 348
},
{
"epoch": 0.38,
"learning_rate": 1.9786038423347772e-06,
"loss": 1.8133,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 1.978244047193035e-06,
"loss": 1.8114,
"step": 352
},
{
"epoch": 0.38,
"learning_rate": 1.977881285350259e-06,
"loss": 1.7753,
"step": 354
},
{
"epoch": 0.38,
"learning_rate": 1.9775155579065892e-06,
"loss": 1.8068,
"step": 356
},
{
"epoch": 0.38,
"learning_rate": 1.9771468659711594e-06,
"loss": 1.7784,
"step": 358
},
{
"epoch": 0.39,
"learning_rate": 1.9767752106620947e-06,
"loss": 1.8183,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 1.976400593106507e-06,
"loss": 1.7066,
"step": 362
},
{
"epoch": 0.39,
"learning_rate": 1.9760230144404907e-06,
"loss": 1.7062,
"step": 364
},
{
"epoch": 0.39,
"learning_rate": 1.9756424758091217e-06,
"loss": 1.7605,
"step": 366
},
{
"epoch": 0.4,
"learning_rate": 1.975258978366451e-06,
"loss": 1.7559,
"step": 368
},
{
"epoch": 0.4,
"learning_rate": 1.974872523275504e-06,
"loss": 1.8098,
"step": 370
},
{
"epoch": 0.4,
"learning_rate": 1.9744831117082755e-06,
"loss": 1.772,
"step": 372
},
{
"epoch": 0.4,
"learning_rate": 1.974090744845726e-06,
"loss": 1.8103,
"step": 374
},
{
"epoch": 0.4,
"learning_rate": 1.973695423877779e-06,
"loss": 1.7503,
"step": 376
},
{
"epoch": 0.41,
"learning_rate": 1.9732971500033156e-06,
"loss": 1.7739,
"step": 378
},
{
"epoch": 0.41,
"learning_rate": 1.9728959244301735e-06,
"loss": 1.7637,
"step": 380
},
{
"epoch": 0.41,
"learning_rate": 1.972491748375141e-06,
"loss": 1.8106,
"step": 382
},
{
"epoch": 0.41,
"learning_rate": 1.9720846230639556e-06,
"loss": 1.743,
"step": 384
},
{
"epoch": 0.41,
"learning_rate": 1.971674549731297e-06,
"loss": 1.7469,
"step": 386
},
{
"epoch": 0.42,
"learning_rate": 1.971261529620787e-06,
"loss": 1.7519,
"step": 388
},
{
"epoch": 0.42,
"learning_rate": 1.9708455639849825e-06,
"loss": 1.7682,
"step": 390
},
{
"epoch": 0.42,
"learning_rate": 1.970426654085375e-06,
"loss": 1.7515,
"step": 392
},
{
"epoch": 0.42,
"learning_rate": 1.970004801192384e-06,
"loss": 1.7138,
"step": 394
},
{
"epoch": 0.43,
"learning_rate": 1.9695800065853547e-06,
"loss": 1.8249,
"step": 396
},
{
"epoch": 0.43,
"learning_rate": 1.9691522715525517e-06,
"loss": 1.7805,
"step": 398
},
{
"epoch": 0.43,
"learning_rate": 1.9687215973911596e-06,
"loss": 1.7202,
"step": 400
},
{
"epoch": 0.43,
"learning_rate": 1.9682879854072753e-06,
"loss": 1.7721,
"step": 402
},
{
"epoch": 0.43,
"learning_rate": 1.9678514369159046e-06,
"loss": 1.7675,
"step": 404
},
{
"epoch": 0.44,
"learning_rate": 1.9674119532409598e-06,
"loss": 1.779,
"step": 406
},
{
"epoch": 0.44,
"learning_rate": 1.9669695357152538e-06,
"loss": 1.8394,
"step": 408
},
{
"epoch": 0.44,
"learning_rate": 1.9665241856804975e-06,
"loss": 1.748,
"step": 410
},
{
"epoch": 0.44,
"learning_rate": 1.9660759044872946e-06,
"loss": 1.7679,
"step": 412
},
{
"epoch": 0.45,
"learning_rate": 1.965624693495139e-06,
"loss": 1.7867,
"step": 414
},
{
"epoch": 0.45,
"learning_rate": 1.965170554072409e-06,
"loss": 1.7682,
"step": 416
},
{
"epoch": 0.45,
"learning_rate": 1.964713487596364e-06,
"loss": 1.7262,
"step": 418
},
{
"epoch": 0.45,
"learning_rate": 1.964253495453141e-06,
"loss": 1.7713,
"step": 420
},
{
"epoch": 0.45,
"learning_rate": 1.963790579037748e-06,
"loss": 1.7083,
"step": 422
},
{
"epoch": 0.46,
"learning_rate": 1.9633247397540626e-06,
"loss": 1.8342,
"step": 424
},
{
"epoch": 0.46,
"learning_rate": 1.962855979014826e-06,
"loss": 1.7704,
"step": 426
},
{
"epoch": 0.46,
"learning_rate": 1.96238429824164e-06,
"loss": 1.7844,
"step": 428
},
{
"epoch": 0.46,
"learning_rate": 1.961909698864961e-06,
"loss": 1.7696,
"step": 430
},
{
"epoch": 0.46,
"learning_rate": 1.961432182324097e-06,
"loss": 1.7241,
"step": 432
},
{
"epoch": 0.47,
"learning_rate": 1.960951750067203e-06,
"loss": 1.7886,
"step": 434
},
{
"epoch": 0.47,
"learning_rate": 1.9604684035512757e-06,
"loss": 1.7559,
"step": 436
},
{
"epoch": 0.47,
"learning_rate": 1.9599821442421505e-06,
"loss": 1.7642,
"step": 438
},
{
"epoch": 0.47,
"learning_rate": 1.9594929736144973e-06,
"loss": 1.7509,
"step": 440
},
{
"epoch": 0.48,
"learning_rate": 1.959000893151813e-06,
"loss": 1.8134,
"step": 442
},
{
"epoch": 0.48,
"learning_rate": 1.95850590434642e-06,
"loss": 1.6866,
"step": 444
},
{
"epoch": 0.48,
"learning_rate": 1.9580080086994616e-06,
"loss": 1.8187,
"step": 446
},
{
"epoch": 0.48,
"learning_rate": 1.9575072077208952e-06,
"loss": 1.7784,
"step": 448
},
{
"epoch": 0.48,
"learning_rate": 1.95700350292949e-06,
"loss": 1.761,
"step": 450
},
{
"epoch": 0.49,
"learning_rate": 1.9564968958528217e-06,
"loss": 1.7806,
"step": 452
},
{
"epoch": 0.49,
"learning_rate": 1.9559873880272677e-06,
"loss": 1.7697,
"step": 454
},
{
"epoch": 0.49,
"learning_rate": 1.955474980998001e-06,
"loss": 1.7629,
"step": 456
},
{
"epoch": 0.49,
"learning_rate": 1.954959676318989e-06,
"loss": 1.8016,
"step": 458
},
{
"epoch": 0.49,
"learning_rate": 1.9544414755529855e-06,
"loss": 1.7795,
"step": 460
},
{
"epoch": 0.5,
"learning_rate": 1.9539203802715277e-06,
"loss": 1.7224,
"step": 462
},
{
"epoch": 0.5,
"learning_rate": 1.9533963920549303e-06,
"loss": 1.7114,
"step": 464
},
{
"epoch": 0.5,
"learning_rate": 1.9528695124922823e-06,
"loss": 1.7711,
"step": 466
},
{
"epoch": 0.5,
"learning_rate": 1.952339743181441e-06,
"loss": 1.7241,
"step": 468
},
{
"epoch": 0.51,
"learning_rate": 1.9518070857290267e-06,
"loss": 1.7445,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 1.951271541750419e-06,
"loss": 1.7339,
"step": 472
},
{
"epoch": 0.51,
"learning_rate": 1.9507331128697513e-06,
"loss": 1.7487,
"step": 474
},
{
"epoch": 0.51,
"learning_rate": 1.950191800719906e-06,
"loss": 1.7585,
"step": 476
},
{
"epoch": 0.51,
"learning_rate": 1.9496476069425093e-06,
"loss": 1.7481,
"step": 478
},
{
"epoch": 0.52,
"learning_rate": 1.9491005331879276e-06,
"loss": 1.7295,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 1.9485505811152596e-06,
"loss": 1.802,
"step": 482
},
{
"epoch": 0.52,
"learning_rate": 1.9479977523923344e-06,
"loss": 1.7432,
"step": 484
},
{
"epoch": 0.52,
"learning_rate": 1.947442048695704e-06,
"loss": 1.7524,
"step": 486
},
{
"epoch": 0.52,
"learning_rate": 1.9468834717106405e-06,
"loss": 1.7055,
"step": 488
},
{
"epoch": 0.53,
"learning_rate": 1.946322023131129e-06,
"loss": 1.7729,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 1.9457577046598623e-06,
"loss": 1.7301,
"step": 492
},
{
"epoch": 0.53,
"learning_rate": 1.9451905180082392e-06,
"loss": 1.7957,
"step": 494
},
{
"epoch": 0.53,
"learning_rate": 1.9446204648963537e-06,
"loss": 1.7317,
"step": 496
},
{
"epoch": 0.54,
"learning_rate": 1.9440475470529956e-06,
"loss": 1.8001,
"step": 498
},
{
"epoch": 0.54,
"learning_rate": 1.9434717662156406e-06,
"loss": 1.794,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 1.9428931241304487e-06,
"loss": 1.7253,
"step": 502
},
{
"epoch": 0.54,
"learning_rate": 1.9423116225522545e-06,
"loss": 1.7507,
"step": 504
},
{
"epoch": 0.54,
"learning_rate": 1.9417272632445675e-06,
"loss": 1.7617,
"step": 506
},
{
"epoch": 0.55,
"learning_rate": 1.9411400479795615e-06,
"loss": 1.7473,
"step": 508
},
{
"epoch": 0.55,
"learning_rate": 1.940549978538073e-06,
"loss": 1.7441,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 1.9399570567095935e-06,
"loss": 1.7785,
"step": 512
},
{
"epoch": 0.55,
"learning_rate": 1.939361284292265e-06,
"loss": 1.7945,
"step": 514
},
{
"epoch": 0.55,
"learning_rate": 1.9387626630928748e-06,
"loss": 1.7766,
"step": 516
},
{
"epoch": 0.56,
"learning_rate": 1.9381611949268493e-06,
"loss": 1.7683,
"step": 518
},
{
"epoch": 0.56,
"learning_rate": 1.9375568816182486e-06,
"loss": 1.7753,
"step": 520
},
{
"epoch": 0.56,
"learning_rate": 1.936949724999762e-06,
"loss": 1.7494,
"step": 522
},
{
"epoch": 0.56,
"learning_rate": 1.9363397269127003e-06,
"loss": 1.7311,
"step": 524
},
{
"epoch": 0.57,
"learning_rate": 1.9357268892069932e-06,
"loss": 1.7668,
"step": 526
},
{
"epoch": 0.57,
"learning_rate": 1.935111213741181e-06,
"loss": 1.7202,
"step": 528
},
{
"epoch": 0.57,
"learning_rate": 1.934492702382411e-06,
"loss": 1.7074,
"step": 530
},
{
"epoch": 0.57,
"learning_rate": 1.933871357006429e-06,
"loss": 1.7274,
"step": 532
},
{
"epoch": 0.57,
"learning_rate": 1.9332471794975773e-06,
"loss": 1.7251,
"step": 534
},
{
"epoch": 0.58,
"learning_rate": 1.9326201717487864e-06,
"loss": 1.7558,
"step": 536
},
{
"epoch": 0.58,
"learning_rate": 1.9319903356615692e-06,
"loss": 1.7585,
"step": 538
},
{
"epoch": 0.58,
"learning_rate": 1.9313576731460187e-06,
"loss": 1.7493,
"step": 540
},
{
"epoch": 0.58,
"learning_rate": 1.9307221861207964e-06,
"loss": 1.7689,
"step": 542
},
{
"epoch": 0.58,
"learning_rate": 1.930083876513131e-06,
"loss": 1.7846,
"step": 544
},
{
"epoch": 0.59,
"learning_rate": 1.929442746258812e-06,
"loss": 1.7653,
"step": 546
},
{
"epoch": 0.59,
"learning_rate": 1.928798797302182e-06,
"loss": 1.7985,
"step": 548
},
{
"epoch": 0.59,
"learning_rate": 1.928152031596132e-06,
"loss": 1.7336,
"step": 550
},
{
"epoch": 0.59,
"learning_rate": 1.927502451102095e-06,
"loss": 1.7383,
"step": 552
},
{
"epoch": 0.6,
"learning_rate": 1.92685005779004e-06,
"loss": 1.7086,
"step": 554
},
{
"epoch": 0.6,
"learning_rate": 1.926194853638469e-06,
"loss": 1.6971,
"step": 556
},
{
"epoch": 0.6,
"learning_rate": 1.925536840634405e-06,
"loss": 1.752,
"step": 558
},
{
"epoch": 0.6,
"learning_rate": 1.9248760207733917e-06,
"loss": 1.7625,
"step": 560
},
{
"epoch": 0.6,
"learning_rate": 1.924212396059483e-06,
"loss": 1.7666,
"step": 562
},
{
"epoch": 0.61,
"learning_rate": 1.9235459685052414e-06,
"loss": 1.7895,
"step": 564
},
{
"epoch": 0.61,
"learning_rate": 1.9228767401317273e-06,
"loss": 1.7431,
"step": 566
},
{
"epoch": 0.61,
"learning_rate": 1.922204712968497e-06,
"loss": 1.7777,
"step": 568
},
{
"epoch": 0.61,
"learning_rate": 1.9215298890535935e-06,
"loss": 1.8083,
"step": 570
},
{
"epoch": 0.61,
"learning_rate": 1.9208522704335415e-06,
"loss": 1.7462,
"step": 572
},
{
"epoch": 0.62,
"learning_rate": 1.9201718591633418e-06,
"loss": 1.789,
"step": 574
},
{
"epoch": 0.62,
"learning_rate": 1.919488657306463e-06,
"loss": 1.7564,
"step": 576
},
{
"epoch": 0.62,
"learning_rate": 1.918802666934839e-06,
"loss": 1.796,
"step": 578
},
{
"epoch": 0.62,
"learning_rate": 1.9181138901288575e-06,
"loss": 1.8037,
"step": 580
},
{
"epoch": 0.63,
"learning_rate": 1.9174223289773593e-06,
"loss": 1.7606,
"step": 582
},
{
"epoch": 0.63,
"learning_rate": 1.9167279855776273e-06,
"loss": 1.7393,
"step": 584
},
{
"epoch": 0.63,
"learning_rate": 1.916030862035383e-06,
"loss": 1.782,
"step": 586
},
{
"epoch": 0.63,
"learning_rate": 1.9153309604647786e-06,
"loss": 1.7636,
"step": 588
},
{
"epoch": 0.63,
"learning_rate": 1.9146282829883923e-06,
"loss": 1.8191,
"step": 590
},
{
"epoch": 0.64,
"learning_rate": 1.9139228317372193e-06,
"loss": 1.75,
"step": 592
},
{
"epoch": 0.64,
"learning_rate": 1.913214608850667e-06,
"loss": 1.7738,
"step": 594
},
{
"epoch": 0.64,
"learning_rate": 1.91250361647655e-06,
"loss": 1.7473,
"step": 596
},
{
"epoch": 0.64,
"learning_rate": 1.9117898567710796e-06,
"loss": 1.7738,
"step": 598
},
{
"epoch": 0.64,
"learning_rate": 1.9110733318988605e-06,
"loss": 1.7436,
"step": 600
},
{
"epoch": 0.65,
"learning_rate": 1.910354044032883e-06,
"loss": 1.7629,
"step": 602
},
{
"epoch": 0.65,
"learning_rate": 1.9096319953545185e-06,
"loss": 1.7607,
"step": 604
},
{
"epoch": 0.65,
"learning_rate": 1.9089071880535074e-06,
"loss": 1.7382,
"step": 606
},
{
"epoch": 0.65,
"learning_rate": 1.9081796243279597e-06,
"loss": 1.7465,
"step": 608
},
{
"epoch": 0.66,
"learning_rate": 1.907449306384343e-06,
"loss": 1.7443,
"step": 610
},
{
"epoch": 0.66,
"learning_rate": 1.906716236437477e-06,
"loss": 1.7323,
"step": 612
},
{
"epoch": 0.66,
"learning_rate": 1.905980416710529e-06,
"loss": 1.7313,
"step": 614
},
{
"epoch": 0.66,
"learning_rate": 1.9052418494350046e-06,
"loss": 1.7307,
"step": 616
},
{
"epoch": 0.66,
"learning_rate": 1.9045005368507417e-06,
"loss": 1.7547,
"step": 618
},
{
"epoch": 0.67,
"learning_rate": 1.9037564812059039e-06,
"loss": 1.7434,
"step": 620
},
{
"epoch": 0.67,
"learning_rate": 1.903009684756974e-06,
"loss": 1.8041,
"step": 622
},
{
"epoch": 0.67,
"learning_rate": 1.902260149768747e-06,
"loss": 1.7097,
"step": 624
},
{
"epoch": 0.67,
"learning_rate": 1.9015078785143217e-06,
"loss": 1.7547,
"step": 626
},
{
"epoch": 0.68,
"learning_rate": 1.9007528732750967e-06,
"loss": 1.7094,
"step": 628
},
{
"epoch": 0.68,
"learning_rate": 1.8999951363407609e-06,
"loss": 1.7195,
"step": 630
},
{
"epoch": 0.68,
"learning_rate": 1.8992346700092879e-06,
"loss": 1.7275,
"step": 632
},
{
"epoch": 0.68,
"learning_rate": 1.8984714765869284e-06,
"loss": 1.6978,
"step": 634
},
{
"epoch": 0.68,
"learning_rate": 1.897705558388204e-06,
"loss": 1.7924,
"step": 636
},
{
"epoch": 0.69,
"learning_rate": 1.8969369177358994e-06,
"loss": 1.7121,
"step": 638
},
{
"epoch": 0.69,
"learning_rate": 1.8961655569610556e-06,
"loss": 1.714,
"step": 640
},
{
"epoch": 0.69,
"learning_rate": 1.8953914784029627e-06,
"loss": 1.7649,
"step": 642
},
{
"epoch": 0.69,
"learning_rate": 1.8946146844091535e-06,
"loss": 1.7804,
"step": 644
},
{
"epoch": 0.69,
"learning_rate": 1.8938351773353954e-06,
"loss": 1.7319,
"step": 646
},
{
"epoch": 0.7,
"learning_rate": 1.8930529595456837e-06,
"loss": 1.7672,
"step": 648
},
{
"epoch": 0.7,
"learning_rate": 1.8922680334122347e-06,
"loss": 1.7306,
"step": 650
},
{
"epoch": 0.7,
"learning_rate": 1.8914804013154782e-06,
"loss": 1.8346,
"step": 652
},
{
"epoch": 0.7,
"learning_rate": 1.8906900656440498e-06,
"loss": 1.7535,
"step": 654
},
{
"epoch": 0.71,
"learning_rate": 1.8898970287947847e-06,
"loss": 1.7585,
"step": 656
},
{
"epoch": 0.71,
"learning_rate": 1.8891012931727102e-06,
"loss": 1.7482,
"step": 658
},
{
"epoch": 0.71,
"learning_rate": 1.888302861191037e-06,
"loss": 1.7485,
"step": 660
},
{
"epoch": 0.71,
"learning_rate": 1.8875017352711545e-06,
"loss": 1.7369,
"step": 662
},
{
"epoch": 0.71,
"learning_rate": 1.8866979178426204e-06,
"loss": 1.7972,
"step": 664
},
{
"epoch": 0.72,
"learning_rate": 1.8858914113431562e-06,
"loss": 1.7787,
"step": 666
},
{
"epoch": 0.72,
"learning_rate": 1.8850822182186379e-06,
"loss": 1.7233,
"step": 668
},
{
"epoch": 0.72,
"learning_rate": 1.8842703409230888e-06,
"loss": 1.8037,
"step": 670
},
{
"epoch": 0.72,
"learning_rate": 1.883455781918673e-06,
"loss": 1.7835,
"step": 672
},
{
"epoch": 0.72,
"learning_rate": 1.8826385436756874e-06,
"loss": 1.7171,
"step": 674
},
{
"epoch": 0.73,
"learning_rate": 1.8818186286725538e-06,
"loss": 1.7468,
"step": 676
},
{
"epoch": 0.73,
"learning_rate": 1.880996039395812e-06,
"loss": 1.752,
"step": 678
},
{
"epoch": 0.73,
"learning_rate": 1.880170778340112e-06,
"loss": 1.7464,
"step": 680
},
{
"epoch": 0.73,
"learning_rate": 1.879342848008206e-06,
"loss": 1.7679,
"step": 682
},
{
"epoch": 0.74,
"learning_rate": 1.8785122509109423e-06,
"loss": 1.733,
"step": 684
},
{
"epoch": 0.74,
"learning_rate": 1.8776789895672556e-06,
"loss": 1.7939,
"step": 686
},
{
"epoch": 0.74,
"learning_rate": 1.8768430665041607e-06,
"loss": 1.7427,
"step": 688
},
{
"epoch": 0.74,
"learning_rate": 1.8760044842567449e-06,
"loss": 1.7692,
"step": 690
},
{
"epoch": 0.74,
"learning_rate": 1.8751632453681595e-06,
"loss": 1.7502,
"step": 692
},
{
"epoch": 0.75,
"learning_rate": 1.8743193523896132e-06,
"loss": 1.7305,
"step": 694
},
{
"epoch": 0.75,
"learning_rate": 1.8734728078803627e-06,
"loss": 1.7461,
"step": 696
},
{
"epoch": 0.75,
"learning_rate": 1.8726236144077068e-06,
"loss": 1.7059,
"step": 698
},
{
"epoch": 0.75,
"learning_rate": 1.8717717745469774e-06,
"loss": 1.7649,
"step": 700
},
{
"epoch": 0.75,
"learning_rate": 1.870917290881532e-06,
"loss": 1.7414,
"step": 702
},
{
"epoch": 0.76,
"learning_rate": 1.870060166002746e-06,
"loss": 1.7796,
"step": 704
},
{
"epoch": 0.76,
"learning_rate": 1.8692004025100051e-06,
"loss": 1.7181,
"step": 706
},
{
"epoch": 0.76,
"learning_rate": 1.8683380030106966e-06,
"loss": 1.7578,
"step": 708
},
{
"epoch": 0.76,
"learning_rate": 1.8674729701202017e-06,
"loss": 1.721,
"step": 710
},
{
"epoch": 0.77,
"learning_rate": 1.8666053064618886e-06,
"loss": 1.7622,
"step": 712
},
{
"epoch": 0.77,
"learning_rate": 1.8657350146671034e-06,
"loss": 1.7699,
"step": 714
},
{
"epoch": 0.77,
"learning_rate": 1.8648620973751625e-06,
"loss": 1.7665,
"step": 716
},
{
"epoch": 0.77,
"learning_rate": 1.8639865572333446e-06,
"loss": 1.7573,
"step": 718
},
{
"epoch": 0.77,
"learning_rate": 1.8631083968968825e-06,
"loss": 1.7599,
"step": 720
},
{
"epoch": 0.78,
"learning_rate": 1.8622276190289559e-06,
"loss": 1.7141,
"step": 722
},
{
"epoch": 0.78,
"learning_rate": 1.8613442263006812e-06,
"loss": 1.8069,
"step": 724
},
{
"epoch": 0.78,
"learning_rate": 1.8604582213911066e-06,
"loss": 1.7608,
"step": 726
},
{
"epoch": 0.78,
"learning_rate": 1.859569606987201e-06,
"loss": 1.7121,
"step": 728
},
{
"epoch": 0.78,
"learning_rate": 1.8586783857838476e-06,
"loss": 1.7228,
"step": 730
},
{
"epoch": 0.79,
"learning_rate": 1.8577845604838347e-06,
"loss": 1.7938,
"step": 732
},
{
"epoch": 0.79,
"learning_rate": 1.8568881337978483e-06,
"loss": 1.7717,
"step": 734
},
{
"epoch": 0.79,
"learning_rate": 1.855989108444464e-06,
"loss": 1.7354,
"step": 736
},
{
"epoch": 0.79,
"learning_rate": 1.8550874871501377e-06,
"loss": 1.7209,
"step": 738
},
{
"epoch": 0.8,
"learning_rate": 1.8541832726491979e-06,
"loss": 1.7432,
"step": 740
},
{
"epoch": 0.8,
"learning_rate": 1.8532764676838378e-06,
"loss": 1.7413,
"step": 742
},
{
"epoch": 0.8,
"learning_rate": 1.852367075004107e-06,
"loss": 1.724,
"step": 744
},
{
"epoch": 0.8,
"learning_rate": 1.8514550973679022e-06,
"loss": 1.7774,
"step": 746
},
{
"epoch": 0.8,
"learning_rate": 1.8505405375409587e-06,
"loss": 1.7286,
"step": 748
},
{
"epoch": 0.81,
"learning_rate": 1.8496233982968455e-06,
"loss": 1.7957,
"step": 750
},
{
"epoch": 0.81,
"learning_rate": 1.8487036824169505e-06,
"loss": 1.6974,
"step": 752
},
{
"epoch": 0.81,
"learning_rate": 1.8477813926904786e-06,
"loss": 1.7072,
"step": 754
},
{
"epoch": 0.81,
"learning_rate": 1.846856531914439e-06,
"loss": 1.7808,
"step": 756
},
{
"epoch": 0.81,
"learning_rate": 1.8459291028936383e-06,
"loss": 1.7283,
"step": 758
},
{
"epoch": 0.82,
"learning_rate": 1.844999108440672e-06,
"loss": 1.7755,
"step": 760
},
{
"epoch": 0.82,
"learning_rate": 1.8440665513759153e-06,
"loss": 1.7294,
"step": 762
},
{
"epoch": 0.82,
"learning_rate": 1.8431314345275157e-06,
"loss": 1.7471,
"step": 764
},
{
"epoch": 0.82,
"learning_rate": 1.8421937607313826e-06,
"loss": 1.732,
"step": 766
},
{
"epoch": 0.83,
"learning_rate": 1.8412535328311812e-06,
"loss": 1.7594,
"step": 768
},
{
"epoch": 0.83,
"learning_rate": 1.840310753678321e-06,
"loss": 1.7588,
"step": 770
},
{
"epoch": 0.83,
"learning_rate": 1.83936542613195e-06,
"loss": 1.7023,
"step": 772
},
{
"epoch": 0.83,
"learning_rate": 1.8384175530589434e-06,
"loss": 1.7175,
"step": 774
},
{
"epoch": 0.83,
"learning_rate": 1.8374671373338973e-06,
"loss": 1.7447,
"step": 776
},
{
"epoch": 0.84,
"learning_rate": 1.836514181839118e-06,
"loss": 1.7687,
"step": 778
},
{
"epoch": 0.84,
"learning_rate": 1.835558689464615e-06,
"loss": 1.7562,
"step": 780
},
{
"epoch": 0.84,
"learning_rate": 1.8346006631080902e-06,
"loss": 1.7792,
"step": 782
},
{
"epoch": 0.84,
"learning_rate": 1.833640105674931e-06,
"loss": 1.7616,
"step": 784
},
{
"epoch": 0.84,
"learning_rate": 1.8326770200782007e-06,
"loss": 1.7163,
"step": 786
},
{
"epoch": 0.85,
"learning_rate": 1.8317114092386295e-06,
"loss": 1.7233,
"step": 788
},
{
"epoch": 0.85,
"learning_rate": 1.830743276084606e-06,
"loss": 1.7349,
"step": 790
},
{
"epoch": 0.85,
"learning_rate": 1.8297726235521682e-06,
"loss": 1.7296,
"step": 792
},
{
"epoch": 0.85,
"learning_rate": 1.8287994545849945e-06,
"loss": 1.7412,
"step": 794
},
{
"epoch": 0.86,
"learning_rate": 1.8278237721343946e-06,
"loss": 1.7284,
"step": 796
},
{
"epoch": 0.86,
"learning_rate": 1.8268455791593014e-06,
"loss": 1.7835,
"step": 798
},
{
"epoch": 0.86,
"learning_rate": 1.8258648786262608e-06,
"loss": 1.7264,
"step": 800
},
{
"epoch": 0.86,
"learning_rate": 1.8248816735094236e-06,
"loss": 1.7499,
"step": 802
},
{
"epoch": 0.86,
"learning_rate": 1.8238959667905365e-06,
"loss": 1.6927,
"step": 804
},
{
"epoch": 0.87,
"learning_rate": 1.8229077614589318e-06,
"loss": 1.75,
"step": 806
},
{
"epoch": 0.87,
"learning_rate": 1.8219170605115206e-06,
"loss": 1.7551,
"step": 808
},
{
"epoch": 0.87,
"learning_rate": 1.8209238669527812e-06,
"loss": 1.7534,
"step": 810
},
{
"epoch": 0.87,
"learning_rate": 1.8199281837947517e-06,
"loss": 1.7627,
"step": 812
},
{
"epoch": 0.88,
"learning_rate": 1.8189300140570207e-06,
"loss": 1.7738,
"step": 814
},
{
"epoch": 0.88,
"learning_rate": 1.8179293607667177e-06,
"loss": 1.7273,
"step": 816
},
{
"epoch": 0.88,
"learning_rate": 1.816926226958503e-06,
"loss": 1.7357,
"step": 818
},
{
"epoch": 0.88,
"learning_rate": 1.815920615674561e-06,
"loss": 1.7591,
"step": 820
},
{
"epoch": 0.88,
"learning_rate": 1.8149125299645886e-06,
"loss": 1.7488,
"step": 822
},
{
"epoch": 0.89,
"learning_rate": 1.8139019728857869e-06,
"loss": 1.7682,
"step": 824
},
{
"epoch": 0.89,
"learning_rate": 1.8128889475028522e-06,
"loss": 1.7349,
"step": 826
},
{
"epoch": 0.89,
"learning_rate": 1.8118734568879658e-06,
"loss": 1.6782,
"step": 828
},
{
"epoch": 0.89,
"learning_rate": 1.8108555041207865e-06,
"loss": 1.7851,
"step": 830
},
{
"epoch": 0.89,
"learning_rate": 1.8098350922884383e-06,
"loss": 1.7103,
"step": 832
},
{
"epoch": 0.9,
"learning_rate": 1.808812224485504e-06,
"loss": 1.7015,
"step": 834
},
{
"epoch": 0.9,
"learning_rate": 1.807786903814014e-06,
"loss": 1.8041,
"step": 836
},
{
"epoch": 0.9,
"learning_rate": 1.806759133383438e-06,
"loss": 1.701,
"step": 838
},
{
"epoch": 0.9,
"learning_rate": 1.8057289163106745e-06,
"loss": 1.7549,
"step": 840
},
{
"epoch": 0.91,
"learning_rate": 1.8046962557200423e-06,
"loss": 1.8104,
"step": 842
},
{
"epoch": 0.91,
"learning_rate": 1.80366115474327e-06,
"loss": 1.7538,
"step": 844
},
{
"epoch": 0.91,
"learning_rate": 1.8026236165194879e-06,
"loss": 1.7609,
"step": 846
},
{
"epoch": 0.91,
"learning_rate": 1.801583644195217e-06,
"loss": 1.7476,
"step": 848
},
{
"epoch": 0.91,
"learning_rate": 1.8005412409243603e-06,
"loss": 1.7166,
"step": 850
},
{
"epoch": 0.92,
"learning_rate": 1.7994964098681936e-06,
"loss": 1.734,
"step": 852
},
{
"epoch": 0.92,
"learning_rate": 1.7984491541953548e-06,
"loss": 1.756,
"step": 854
},
{
"epoch": 0.92,
"learning_rate": 1.7973994770818355e-06,
"loss": 1.7508,
"step": 856
},
{
"epoch": 0.92,
"learning_rate": 1.7963473817109697e-06,
"loss": 1.7495,
"step": 858
},
{
"epoch": 0.92,
"learning_rate": 1.7952928712734265e-06,
"loss": 1.7661,
"step": 860
},
{
"epoch": 0.93,
"learning_rate": 1.7942359489671976e-06,
"loss": 1.74,
"step": 862
},
{
"epoch": 0.93,
"learning_rate": 1.7931766179975912e-06,
"loss": 1.7775,
"step": 864
},
{
"epoch": 0.93,
"learning_rate": 1.792114881577218e-06,
"loss": 1.7375,
"step": 866
},
{
"epoch": 0.93,
"learning_rate": 1.7910507429259854e-06,
"loss": 1.7299,
"step": 868
},
{
"epoch": 0.94,
"learning_rate": 1.7899842052710844e-06,
"loss": 1.708,
"step": 870
},
{
"epoch": 0.94,
"learning_rate": 1.7889152718469833e-06,
"loss": 1.7349,
"step": 872
},
{
"epoch": 0.94,
"learning_rate": 1.7878439458954145e-06,
"loss": 1.7186,
"step": 874
},
{
"epoch": 0.94,
"learning_rate": 1.7867702306653664e-06,
"loss": 1.7884,
"step": 876
},
{
"epoch": 0.94,
"learning_rate": 1.785694129413074e-06,
"loss": 1.7809,
"step": 878
},
{
"epoch": 0.95,
"learning_rate": 1.7846156454020073e-06,
"loss": 1.6939,
"step": 880
},
{
"epoch": 0.95,
"learning_rate": 1.783534781902864e-06,
"loss": 1.7329,
"step": 882
},
{
"epoch": 0.95,
"learning_rate": 1.7824515421935564e-06,
"loss": 1.7574,
"step": 884
},
{
"epoch": 0.95,
"learning_rate": 1.781365929559204e-06,
"loss": 1.7442,
"step": 886
},
{
"epoch": 0.95,
"learning_rate": 1.780277947292122e-06,
"loss": 1.7395,
"step": 888
},
{
"epoch": 0.96,
"learning_rate": 1.779187598691813e-06,
"loss": 1.7084,
"step": 890
},
{
"epoch": 0.96,
"learning_rate": 1.7780948870649549e-06,
"loss": 1.7761,
"step": 892
},
{
"epoch": 0.96,
"learning_rate": 1.776999815725392e-06,
"loss": 1.7553,
"step": 894
},
{
"epoch": 0.96,
"learning_rate": 1.7759023879941256e-06,
"loss": 1.7694,
"step": 896
},
{
"epoch": 0.97,
"learning_rate": 1.7748026071993026e-06,
"loss": 1.7368,
"step": 898
},
{
"epoch": 0.97,
"learning_rate": 1.7737004766762053e-06,
"loss": 1.724,
"step": 900
},
{
"epoch": 0.97,
"learning_rate": 1.772595999767244e-06,
"loss": 1.7354,
"step": 902
},
{
"epoch": 0.97,
"learning_rate": 1.771489179821943e-06,
"loss": 1.7073,
"step": 904
},
{
"epoch": 0.97,
"learning_rate": 1.7703800201969326e-06,
"loss": 1.7193,
"step": 906
},
{
"epoch": 0.98,
"learning_rate": 1.7692685242559394e-06,
"loss": 1.782,
"step": 908
},
{
"epoch": 0.98,
"learning_rate": 1.768154695369774e-06,
"loss": 1.7302,
"step": 910
},
{
"epoch": 0.98,
"learning_rate": 1.767038536916324e-06,
"loss": 1.815,
"step": 912
},
{
"epoch": 0.98,
"learning_rate": 1.7659200522805399e-06,
"loss": 1.7186,
"step": 914
},
{
"epoch": 0.98,
"learning_rate": 1.7647992448544274e-06,
"loss": 1.7699,
"step": 916
},
{
"epoch": 0.99,
"learning_rate": 1.7636761180370373e-06,
"loss": 1.7206,
"step": 918
},
{
"epoch": 0.99,
"learning_rate": 1.762550675234453e-06,
"loss": 1.7246,
"step": 920
},
{
"epoch": 0.99,
"learning_rate": 1.7614229198597825e-06,
"loss": 1.7262,
"step": 922
},
{
"epoch": 0.99,
"learning_rate": 1.760292855333147e-06,
"loss": 1.7184,
"step": 924
},
{
"epoch": 1.0,
"learning_rate": 1.7591604850816704e-06,
"loss": 1.7639,
"step": 926
},
{
"epoch": 1.0,
"learning_rate": 1.7580258125394691e-06,
"loss": 1.715,
"step": 928
},
{
"epoch": 1.0,
"learning_rate": 1.7568888411476416e-06,
"loss": 1.7317,
"step": 930
},
{
"epoch": 1.0,
"learning_rate": 1.7557495743542582e-06,
"loss": 1.7356,
"step": 932
},
{
"epoch": 1.0,
"learning_rate": 1.7546080156143503e-06,
"loss": 1.6646,
"step": 934
},
{
"epoch": 1.01,
"learning_rate": 1.7534641683899006e-06,
"loss": 1.6957,
"step": 936
},
{
"epoch": 1.01,
"learning_rate": 1.752318036149831e-06,
"loss": 1.6826,
"step": 938
},
{
"epoch": 1.01,
"learning_rate": 1.7511696223699937e-06,
"loss": 1.7156,
"step": 940
},
{
"epoch": 1.01,
"learning_rate": 1.7500189305331605e-06,
"loss": 1.7372,
"step": 942
},
{
"epoch": 1.01,
"learning_rate": 1.7488659641290108e-06,
"loss": 1.7314,
"step": 944
},
{
"epoch": 1.02,
"learning_rate": 1.747710726654123e-06,
"loss": 1.6861,
"step": 946
},
{
"epoch": 1.02,
"learning_rate": 1.7465532216119624e-06,
"loss": 1.7126,
"step": 948
},
{
"epoch": 1.02,
"learning_rate": 1.7453934525128715e-06,
"loss": 1.7377,
"step": 950
},
{
"epoch": 1.02,
"learning_rate": 1.7442314228740584e-06,
"loss": 1.7103,
"step": 952
},
{
"epoch": 1.03,
"learning_rate": 1.743067136219587e-06,
"loss": 1.7215,
"step": 954
},
{
"epoch": 1.03,
"learning_rate": 1.7419005960803663e-06,
"loss": 1.71,
"step": 956
},
{
"epoch": 1.03,
"learning_rate": 1.7407318059941386e-06,
"loss": 1.6762,
"step": 958
},
{
"epoch": 1.03,
"learning_rate": 1.7395607695054709e-06,
"loss": 1.663,
"step": 960
},
{
"epoch": 1.03,
"learning_rate": 1.7383874901657412e-06,
"loss": 1.7282,
"step": 962
},
{
"epoch": 1.04,
"learning_rate": 1.7372119715331301e-06,
"loss": 1.6706,
"step": 964
},
{
"epoch": 1.04,
"learning_rate": 1.7360342171726102e-06,
"loss": 1.6852,
"step": 966
},
{
"epoch": 1.04,
"learning_rate": 1.7348542306559325e-06,
"loss": 1.7062,
"step": 968
},
{
"epoch": 1.04,
"learning_rate": 1.7336720155616185e-06,
"loss": 1.716,
"step": 970
},
{
"epoch": 1.04,
"learning_rate": 1.7324875754749484e-06,
"loss": 1.7045,
"step": 972
},
{
"epoch": 1.05,
"learning_rate": 1.7313009139879503e-06,
"loss": 1.7872,
"step": 974
},
{
"epoch": 1.05,
"learning_rate": 1.7301120346993875e-06,
"loss": 1.7124,
"step": 976
},
{
"epoch": 1.05,
"learning_rate": 1.728920941214751e-06,
"loss": 1.6671,
"step": 978
},
{
"epoch": 1.05,
"learning_rate": 1.727727637146246e-06,
"loss": 1.7266,
"step": 980
},
{
"epoch": 1.06,
"learning_rate": 1.7265321261127816e-06,
"loss": 1.7231,
"step": 982
},
{
"epoch": 1.06,
"learning_rate": 1.72533441173996e-06,
"loss": 1.7419,
"step": 984
},
{
"epoch": 1.06,
"learning_rate": 1.7241344976600655e-06,
"loss": 1.7027,
"step": 986
},
{
"epoch": 1.06,
"learning_rate": 1.7229323875120536e-06,
"loss": 1.6814,
"step": 988
},
{
"epoch": 1.06,
"learning_rate": 1.7217280849415392e-06,
"loss": 1.7554,
"step": 990
},
{
"epoch": 1.07,
"learning_rate": 1.7205215936007869e-06,
"loss": 1.7154,
"step": 992
},
{
"epoch": 1.07,
"learning_rate": 1.7193129171486985e-06,
"loss": 1.7149,
"step": 994
},
{
"epoch": 1.07,
"learning_rate": 1.7181020592508025e-06,
"loss": 1.6895,
"step": 996
},
{
"epoch": 1.07,
"learning_rate": 1.7168890235792434e-06,
"loss": 1.7698,
"step": 998
},
{
"epoch": 1.07,
"learning_rate": 1.7156738138127704e-06,
"loss": 1.7474,
"step": 1000
},
{
"epoch": 1.08,
"learning_rate": 1.7144564336367254e-06,
"loss": 1.7258,
"step": 1002
},
{
"epoch": 1.08,
"learning_rate": 1.713236886743033e-06,
"loss": 1.7011,
"step": 1004
},
{
"epoch": 1.08,
"learning_rate": 1.712015176830188e-06,
"loss": 1.7504,
"step": 1006
},
{
"epoch": 1.08,
"learning_rate": 1.7107913076032458e-06,
"loss": 1.7323,
"step": 1008
},
{
"epoch": 1.09,
"learning_rate": 1.7095652827738103e-06,
"loss": 1.7371,
"step": 1010
},
{
"epoch": 1.09,
"learning_rate": 1.7083371060600218e-06,
"loss": 1.7088,
"step": 1012
},
{
"epoch": 1.09,
"learning_rate": 1.7071067811865474e-06,
"loss": 1.6849,
"step": 1014
},
{
"epoch": 1.09,
"learning_rate": 1.7058743118845685e-06,
"loss": 1.7234,
"step": 1016
},
{
"epoch": 1.09,
"learning_rate": 1.70463970189177e-06,
"loss": 1.7471,
"step": 1018
},
{
"epoch": 1.1,
"learning_rate": 1.7034029549523284e-06,
"loss": 1.749,
"step": 1020
},
{
"epoch": 1.1,
"learning_rate": 1.7021640748169022e-06,
"loss": 1.7243,
"step": 1022
},
{
"epoch": 1.1,
"learning_rate": 1.700923065242617e-06,
"loss": 1.6974,
"step": 1024
},
{
"epoch": 1.1,
"learning_rate": 1.6996799299930586e-06,
"loss": 1.7965,
"step": 1026
},
{
"epoch": 1.11,
"learning_rate": 1.6984346728382574e-06,
"loss": 1.7427,
"step": 1028
},
{
"epoch": 1.11,
"learning_rate": 1.6971872975546804e-06,
"loss": 1.709,
"step": 1030
},
{
"epoch": 1.11,
"learning_rate": 1.6959378079252174e-06,
"loss": 1.759,
"step": 1032
},
{
"epoch": 1.11,
"learning_rate": 1.6946862077391702e-06,
"loss": 1.7407,
"step": 1034
},
{
"epoch": 1.11,
"learning_rate": 1.6934325007922417e-06,
"loss": 1.7409,
"step": 1036
},
{
"epoch": 1.12,
"learning_rate": 1.6921766908865235e-06,
"loss": 1.708,
"step": 1038
},
{
"epoch": 1.12,
"learning_rate": 1.6909187818304853e-06,
"loss": 1.7044,
"step": 1040
},
{
"epoch": 1.12,
"learning_rate": 1.6896587774389625e-06,
"loss": 1.694,
"step": 1042
},
{
"epoch": 1.12,
"learning_rate": 1.688396681533145e-06,
"loss": 1.6885,
"step": 1044
},
{
"epoch": 1.12,
"learning_rate": 1.6871324979405654e-06,
"loss": 1.7031,
"step": 1046
},
{
"epoch": 1.13,
"learning_rate": 1.6858662304950884e-06,
"loss": 1.7578,
"step": 1048
},
{
"epoch": 1.13,
"learning_rate": 1.6845978830368974e-06,
"loss": 1.7447,
"step": 1050
},
{
"epoch": 1.13,
"learning_rate": 1.6833274594124843e-06,
"loss": 1.7132,
"step": 1052
},
{
"epoch": 1.13,
"learning_rate": 1.6820549634746372e-06,
"loss": 1.7471,
"step": 1054
},
{
"epoch": 1.14,
"learning_rate": 1.6807803990824292e-06,
"loss": 1.7291,
"step": 1056
},
{
"epoch": 1.14,
"learning_rate": 1.6795037701012055e-06,
"loss": 1.7702,
"step": 1058
},
{
"epoch": 1.14,
"learning_rate": 1.6782250804025738e-06,
"loss": 1.6608,
"step": 1060
},
{
"epoch": 1.14,
"learning_rate": 1.6769443338643903e-06,
"loss": 1.7129,
"step": 1062
},
{
"epoch": 1.14,
"learning_rate": 1.6756615343707492e-06,
"loss": 1.7026,
"step": 1064
},
{
"epoch": 1.15,
"learning_rate": 1.6743766858119707e-06,
"loss": 1.6642,
"step": 1066
},
{
"epoch": 1.15,
"learning_rate": 1.6730897920845895e-06,
"loss": 1.737,
"step": 1068
},
{
"epoch": 1.15,
"learning_rate": 1.6718008570913418e-06,
"loss": 1.7233,
"step": 1070
},
{
"epoch": 1.15,
"learning_rate": 1.6705098847411549e-06,
"loss": 1.6943,
"step": 1072
},
{
"epoch": 1.15,
"learning_rate": 1.6692168789491352e-06,
"loss": 1.7018,
"step": 1074
},
{
"epoch": 1.16,
"learning_rate": 1.6679218436365545e-06,
"loss": 1.7279,
"step": 1076
},
{
"epoch": 1.16,
"learning_rate": 1.6666247827308412e-06,
"loss": 1.7212,
"step": 1078
},
{
"epoch": 1.16,
"learning_rate": 1.665325700165565e-06,
"loss": 1.718,
"step": 1080
},
{
"epoch": 1.16,
"learning_rate": 1.6640245998804283e-06,
"loss": 1.7408,
"step": 1082
},
{
"epoch": 1.17,
"learning_rate": 1.6627214858212513e-06,
"loss": 1.726,
"step": 1084
},
{
"epoch": 1.17,
"learning_rate": 1.6614163619399614e-06,
"loss": 1.7318,
"step": 1086
},
{
"epoch": 1.17,
"learning_rate": 1.660109232194582e-06,
"loss": 1.7315,
"step": 1088
},
{
"epoch": 1.17,
"learning_rate": 1.6588001005492194e-06,
"loss": 1.7284,
"step": 1090
},
{
"epoch": 1.17,
"learning_rate": 1.6574889709740502e-06,
"loss": 1.6915,
"step": 1092
},
{
"epoch": 1.18,
"learning_rate": 1.656175847445311e-06,
"loss": 1.687,
"step": 1094
},
{
"epoch": 1.18,
"learning_rate": 1.6548607339452852e-06,
"loss": 1.7136,
"step": 1096
},
{
"epoch": 1.18,
"learning_rate": 1.6535436344622907e-06,
"loss": 1.6719,
"step": 1098
},
{
"epoch": 1.18,
"learning_rate": 1.6522245529906687e-06,
"loss": 1.7783,
"step": 1100
},
{
"epoch": 1.18,
"learning_rate": 1.6509034935307714e-06,
"loss": 1.7075,
"step": 1102
},
{
"epoch": 1.19,
"learning_rate": 1.6495804600889485e-06,
"loss": 1.7239,
"step": 1104
},
{
"epoch": 1.19,
"learning_rate": 1.6482554566775378e-06,
"loss": 1.6832,
"step": 1106
},
{
"epoch": 1.19,
"learning_rate": 1.6469284873148497e-06,
"loss": 1.698,
"step": 1108
},
{
"epoch": 1.19,
"learning_rate": 1.6455995560251582e-06,
"loss": 1.7136,
"step": 1110
},
{
"epoch": 1.2,
"learning_rate": 1.6442686668386858e-06,
"loss": 1.7593,
"step": 1112
},
{
"epoch": 1.2,
"learning_rate": 1.6429358237915936e-06,
"loss": 1.7345,
"step": 1114
},
{
"epoch": 1.2,
"learning_rate": 1.641601030925968e-06,
"loss": 1.752,
"step": 1116
},
{
"epoch": 1.2,
"learning_rate": 1.6402642922898084e-06,
"loss": 1.7478,
"step": 1118
},
{
"epoch": 1.2,
"learning_rate": 1.638925611937015e-06,
"loss": 1.6655,
"step": 1120
},
{
"epoch": 1.21,
"learning_rate": 1.637584993927377e-06,
"loss": 1.7483,
"step": 1122
},
{
"epoch": 1.21,
"learning_rate": 1.6362424423265597e-06,
"loss": 1.744,
"step": 1124
},
{
"epoch": 1.21,
"learning_rate": 1.634897961206092e-06,
"loss": 1.7287,
"step": 1126
},
{
"epoch": 1.21,
"learning_rate": 1.6335515546433551e-06,
"loss": 1.6842,
"step": 1128
},
{
"epoch": 1.21,
"learning_rate": 1.6322032267215688e-06,
"loss": 1.7137,
"step": 1130
},
{
"epoch": 1.22,
"learning_rate": 1.6308529815297803e-06,
"loss": 1.6969,
"step": 1132
},
{
"epoch": 1.22,
"learning_rate": 1.6295008231628507e-06,
"loss": 1.7182,
"step": 1134
},
{
"epoch": 1.22,
"learning_rate": 1.6281467557214436e-06,
"loss": 1.7306,
"step": 1136
},
{
"epoch": 1.22,
"learning_rate": 1.6267907833120122e-06,
"loss": 1.6912,
"step": 1138
},
{
"epoch": 1.23,
"learning_rate": 1.6254329100467868e-06,
"loss": 1.7322,
"step": 1140
},
{
"epoch": 1.23,
"learning_rate": 1.624073140043762e-06,
"loss": 1.7041,
"step": 1142
},
{
"epoch": 1.23,
"learning_rate": 1.6227114774266852e-06,
"loss": 1.7464,
"step": 1144
},
{
"epoch": 1.23,
"learning_rate": 1.6213479263250432e-06,
"loss": 1.6828,
"step": 1146
},
{
"epoch": 1.23,
"learning_rate": 1.6199824908740497e-06,
"loss": 1.71,
"step": 1148
},
{
"epoch": 1.24,
"learning_rate": 1.6186151752146334e-06,
"loss": 1.7388,
"step": 1150
},
{
"epoch": 1.24,
"learning_rate": 1.6172459834934253e-06,
"loss": 1.7104,
"step": 1152
},
{
"epoch": 1.24,
"learning_rate": 1.6158749198627454e-06,
"loss": 1.7244,
"step": 1154
},
{
"epoch": 1.24,
"learning_rate": 1.6145019884805908e-06,
"loss": 1.7424,
"step": 1156
},
{
"epoch": 1.24,
"learning_rate": 1.6131271935106227e-06,
"loss": 1.7338,
"step": 1158
},
{
"epoch": 1.25,
"learning_rate": 1.6117505391221542e-06,
"loss": 1.7144,
"step": 1160
},
{
"epoch": 1.25,
"learning_rate": 1.6103720294901377e-06,
"loss": 1.7206,
"step": 1162
},
{
"epoch": 1.25,
"learning_rate": 1.6089916687951511e-06,
"loss": 1.721,
"step": 1164
},
{
"epoch": 1.25,
"learning_rate": 1.6076094612233871e-06,
"loss": 1.7459,
"step": 1166
},
{
"epoch": 1.26,
"learning_rate": 1.606225410966638e-06,
"loss": 1.6682,
"step": 1168
},
{
"epoch": 1.26,
"learning_rate": 1.6048395222222859e-06,
"loss": 1.6769,
"step": 1170
},
{
"epoch": 1.26,
"learning_rate": 1.6034517991932871e-06,
"loss": 1.6905,
"step": 1172
},
{
"epoch": 1.26,
"learning_rate": 1.6020622460881614e-06,
"loss": 1.7261,
"step": 1174
},
{
"epoch": 1.26,
"learning_rate": 1.6006708671209792e-06,
"loss": 1.7343,
"step": 1176
},
{
"epoch": 1.27,
"learning_rate": 1.5992776665113468e-06,
"loss": 1.726,
"step": 1178
},
{
"epoch": 1.27,
"learning_rate": 1.5978826484843958e-06,
"loss": 1.755,
"step": 1180
},
{
"epoch": 1.27,
"learning_rate": 1.5964858172707695e-06,
"loss": 1.7536,
"step": 1182
},
{
"epoch": 1.27,
"learning_rate": 1.5950871771066096e-06,
"loss": 1.7137,
"step": 1184
},
{
"epoch": 1.27,
"learning_rate": 1.5936867322335444e-06,
"loss": 1.7038,
"step": 1186
},
{
"epoch": 1.28,
"learning_rate": 1.5922844868986743e-06,
"loss": 1.7289,
"step": 1188
},
{
"epoch": 1.28,
"learning_rate": 1.5908804453545606e-06,
"loss": 1.667,
"step": 1190
},
{
"epoch": 1.28,
"learning_rate": 1.5894746118592121e-06,
"loss": 1.7183,
"step": 1192
},
{
"epoch": 1.28,
"learning_rate": 1.5880669906760714e-06,
"loss": 1.712,
"step": 1194
},
{
"epoch": 1.29,
"learning_rate": 1.5866575860740034e-06,
"loss": 1.7129,
"step": 1196
},
{
"epoch": 1.29,
"learning_rate": 1.5852464023272807e-06,
"loss": 1.7167,
"step": 1198
},
{
"epoch": 1.29,
"learning_rate": 1.583833443715572e-06,
"loss": 1.7159,
"step": 1200
},
{
"epoch": 1.29,
"learning_rate": 1.5824187145239284e-06,
"loss": 1.7041,
"step": 1202
},
{
"epoch": 1.29,
"learning_rate": 1.5810022190427708e-06,
"loss": 1.7068,
"step": 1204
},
{
"epoch": 1.3,
"learning_rate": 1.5795839615678763e-06,
"loss": 1.7819,
"step": 1206
},
{
"epoch": 1.3,
"learning_rate": 1.578163946400366e-06,
"loss": 1.7145,
"step": 1208
},
{
"epoch": 1.3,
"learning_rate": 1.576742177846691e-06,
"loss": 1.7034,
"step": 1210
},
{
"epoch": 1.3,
"learning_rate": 1.5753186602186206e-06,
"loss": 1.721,
"step": 1212
},
{
"epoch": 1.31,
"learning_rate": 1.5738933978332277e-06,
"loss": 1.6848,
"step": 1214
},
{
"epoch": 1.31,
"learning_rate": 1.5724663950128774e-06,
"loss": 1.6854,
"step": 1216
},
{
"epoch": 1.31,
"learning_rate": 1.5710376560852116e-06,
"loss": 1.7422,
"step": 1218
},
{
"epoch": 1.31,
"learning_rate": 1.5696071853831387e-06,
"loss": 1.7145,
"step": 1220
},
{
"epoch": 1.31,
"learning_rate": 1.5681749872448182e-06,
"loss": 1.6522,
"step": 1222
},
{
"epoch": 1.32,
"learning_rate": 1.5667410660136487e-06,
"loss": 1.708,
"step": 1224
},
{
"epoch": 1.32,
"learning_rate": 1.5653054260382544e-06,
"loss": 1.7109,
"step": 1226
},
{
"epoch": 1.32,
"learning_rate": 1.5638680716724712e-06,
"loss": 1.722,
"step": 1228
},
{
"epoch": 1.32,
"learning_rate": 1.5624290072753352e-06,
"loss": 1.6766,
"step": 1230
},
{
"epoch": 1.32,
"learning_rate": 1.560988237211068e-06,
"loss": 1.7269,
"step": 1232
},
{
"epoch": 1.33,
"learning_rate": 1.559545765849064e-06,
"loss": 1.7164,
"step": 1234
},
{
"epoch": 1.33,
"learning_rate": 1.5581015975638767e-06,
"loss": 1.7223,
"step": 1236
},
{
"epoch": 1.33,
"learning_rate": 1.5566557367352068e-06,
"loss": 1.6917,
"step": 1238
},
{
"epoch": 1.33,
"learning_rate": 1.5552081877478868e-06,
"loss": 1.733,
"step": 1240
},
{
"epoch": 1.34,
"learning_rate": 1.5537589549918699e-06,
"loss": 1.7121,
"step": 1242
},
{
"epoch": 1.34,
"learning_rate": 1.5523080428622146e-06,
"loss": 1.748,
"step": 1244
},
{
"epoch": 1.34,
"learning_rate": 1.550855455759073e-06,
"loss": 1.7165,
"step": 1246
},
{
"epoch": 1.34,
"learning_rate": 1.5494011980876769e-06,
"loss": 1.6621,
"step": 1248
},
{
"epoch": 1.34,
"learning_rate": 1.5479452742583245e-06,
"loss": 1.7292,
"step": 1250
},
{
"epoch": 1.35,
"learning_rate": 1.5464876886863664e-06,
"loss": 1.7089,
"step": 1252
},
{
"epoch": 1.35,
"learning_rate": 1.545028445792193e-06,
"loss": 1.7536,
"step": 1254
},
{
"epoch": 1.35,
"learning_rate": 1.5435675500012212e-06,
"loss": 1.7184,
"step": 1256
},
{
"epoch": 1.35,
"learning_rate": 1.5421050057438799e-06,
"loss": 1.7835,
"step": 1258
},
{
"epoch": 1.35,
"learning_rate": 1.5406408174555977e-06,
"loss": 1.7032,
"step": 1260
},
{
"epoch": 1.36,
"learning_rate": 1.539174989576789e-06,
"loss": 1.6932,
"step": 1262
},
{
"epoch": 1.36,
"learning_rate": 1.5377075265528405e-06,
"loss": 1.7308,
"step": 1264
},
{
"epoch": 1.36,
"learning_rate": 1.5362384328340978e-06,
"loss": 1.7066,
"step": 1266
},
{
"epoch": 1.36,
"learning_rate": 1.5347677128758516e-06,
"loss": 1.6998,
"step": 1268
},
{
"epoch": 1.37,
"learning_rate": 1.5332953711383252e-06,
"loss": 1.6986,
"step": 1270
},
{
"epoch": 1.37,
"learning_rate": 1.5318214120866598e-06,
"loss": 1.7444,
"step": 1272
},
{
"epoch": 1.37,
"learning_rate": 1.530345840190901e-06,
"loss": 1.7171,
"step": 1274
},
{
"epoch": 1.37,
"learning_rate": 1.5288686599259855e-06,
"loss": 1.6862,
"step": 1276
},
{
"epoch": 1.37,
"learning_rate": 1.5273898757717292e-06,
"loss": 1.7048,
"step": 1278
},
{
"epoch": 1.38,
"learning_rate": 1.5259094922128107e-06,
"loss": 1.6982,
"step": 1280
},
{
"epoch": 1.38,
"learning_rate": 1.5244275137387592e-06,
"loss": 1.6649,
"step": 1282
},
{
"epoch": 1.38,
"learning_rate": 1.5229439448439409e-06,
"loss": 1.6761,
"step": 1284
},
{
"epoch": 1.38,
"learning_rate": 1.5214587900275455e-06,
"loss": 1.7277,
"step": 1286
},
{
"epoch": 1.38,
"learning_rate": 1.5199720537935725e-06,
"loss": 1.754,
"step": 1288
},
{
"epoch": 1.39,
"learning_rate": 1.5184837406508163e-06,
"loss": 1.7415,
"step": 1290
},
{
"epoch": 1.39,
"learning_rate": 1.5169938551128545e-06,
"loss": 1.7301,
"step": 1292
},
{
"epoch": 1.39,
"learning_rate": 1.5155024016980331e-06,
"loss": 1.7108,
"step": 1294
},
{
"epoch": 1.39,
"learning_rate": 1.5140093849294528e-06,
"loss": 1.6945,
"step": 1296
},
{
"epoch": 1.4,
"learning_rate": 1.5125148093349553e-06,
"loss": 1.6618,
"step": 1298
},
{
"epoch": 1.4,
"learning_rate": 1.5110186794471103e-06,
"loss": 1.7243,
"step": 1300
},
{
"epoch": 1.4,
"learning_rate": 1.5095209998032004e-06,
"loss": 1.7369,
"step": 1302
},
{
"epoch": 1.4,
"learning_rate": 1.5080217749452092e-06,
"loss": 1.7114,
"step": 1304
},
{
"epoch": 1.4,
"learning_rate": 1.5065210094198047e-06,
"loss": 1.6971,
"step": 1306
},
{
"epoch": 1.41,
"learning_rate": 1.505018707778329e-06,
"loss": 1.7366,
"step": 1308
},
{
"epoch": 1.41,
"learning_rate": 1.503514874576782e-06,
"loss": 1.7264,
"step": 1310
},
{
"epoch": 1.41,
"learning_rate": 1.5020095143758082e-06,
"loss": 1.784,
"step": 1312
},
{
"epoch": 1.41,
"learning_rate": 1.5005026317406833e-06,
"loss": 1.7189,
"step": 1314
},
{
"epoch": 1.41,
"learning_rate": 1.4989942312412999e-06,
"loss": 1.6925,
"step": 1316
},
{
"epoch": 1.42,
"learning_rate": 1.497484317452154e-06,
"loss": 1.6767,
"step": 1318
},
{
"epoch": 1.42,
"learning_rate": 1.4959728949523305e-06,
"loss": 1.7302,
"step": 1320
},
{
"epoch": 1.42,
"learning_rate": 1.49445996832549e-06,
"loss": 1.7876,
"step": 1322
},
{
"epoch": 1.42,
"learning_rate": 1.4929455421598552e-06,
"loss": 1.735,
"step": 1324
},
{
"epoch": 1.43,
"learning_rate": 1.4914296210481951e-06,
"loss": 1.6793,
"step": 1326
},
{
"epoch": 1.43,
"learning_rate": 1.4899122095878136e-06,
"loss": 1.7335,
"step": 1328
},
{
"epoch": 1.43,
"learning_rate": 1.4883933123805337e-06,
"loss": 1.7311,
"step": 1330
},
{
"epoch": 1.43,
"learning_rate": 1.4868729340326844e-06,
"loss": 1.7139,
"step": 1332
},
{
"epoch": 1.43,
"learning_rate": 1.4853510791550865e-06,
"loss": 1.7346,
"step": 1334
},
{
"epoch": 1.44,
"learning_rate": 1.4838277523630387e-06,
"loss": 1.7138,
"step": 1336
},
{
"epoch": 1.44,
"learning_rate": 1.4823029582763038e-06,
"loss": 1.7414,
"step": 1338
},
{
"epoch": 1.44,
"learning_rate": 1.480776701519094e-06,
"loss": 1.6534,
"step": 1340
},
{
"epoch": 1.44,
"learning_rate": 1.4792489867200568e-06,
"loss": 1.685,
"step": 1342
},
{
"epoch": 1.44,
"learning_rate": 1.4777198185122628e-06,
"loss": 1.7148,
"step": 1344
},
{
"epoch": 1.45,
"learning_rate": 1.4761892015331895e-06,
"loss": 1.6957,
"step": 1346
},
{
"epoch": 1.45,
"learning_rate": 1.4746571404247082e-06,
"loss": 1.704,
"step": 1348
},
{
"epoch": 1.45,
"learning_rate": 1.4731236398330703e-06,
"loss": 1.7824,
"step": 1350
},
{
"epoch": 1.45,
"learning_rate": 1.471588704408891e-06,
"loss": 1.7159,
"step": 1352
},
{
"epoch": 1.46,
"learning_rate": 1.470052338807139e-06,
"loss": 1.7172,
"step": 1354
},
{
"epoch": 1.46,
"learning_rate": 1.4685145476871192e-06,
"loss": 1.7338,
"step": 1356
},
{
"epoch": 1.46,
"learning_rate": 1.4669753357124596e-06,
"loss": 1.7265,
"step": 1358
},
{
"epoch": 1.46,
"learning_rate": 1.4654347075510974e-06,
"loss": 1.7153,
"step": 1360
},
{
"epoch": 1.46,
"learning_rate": 1.4638926678752648e-06,
"loss": 1.6877,
"step": 1362
},
{
"epoch": 1.47,
"learning_rate": 1.4623492213614742e-06,
"loss": 1.7213,
"step": 1364
},
{
"epoch": 1.47,
"learning_rate": 1.4608043726905049e-06,
"loss": 1.7088,
"step": 1366
},
{
"epoch": 1.47,
"learning_rate": 1.4592581265473881e-06,
"loss": 1.7151,
"step": 1368
},
{
"epoch": 1.47,
"learning_rate": 1.4577104876213944e-06,
"loss": 1.7175,
"step": 1370
},
{
"epoch": 1.47,
"learning_rate": 1.456161460606016e-06,
"loss": 1.727,
"step": 1372
},
{
"epoch": 1.48,
"learning_rate": 1.4546110501989569e-06,
"loss": 1.7406,
"step": 1374
},
{
"epoch": 1.48,
"learning_rate": 1.4530592611021143e-06,
"loss": 1.6604,
"step": 1376
},
{
"epoch": 1.48,
"learning_rate": 1.4515060980215692e-06,
"loss": 1.7018,
"step": 1378
},
{
"epoch": 1.48,
"learning_rate": 1.4499515656675675e-06,
"loss": 1.6778,
"step": 1380
},
{
"epoch": 1.49,
"learning_rate": 1.4483956687545074e-06,
"loss": 1.7269,
"step": 1382
},
{
"epoch": 1.49,
"learning_rate": 1.4468384120009271e-06,
"loss": 1.7276,
"step": 1384
},
{
"epoch": 1.49,
"learning_rate": 1.4452798001294878e-06,
"loss": 1.7092,
"step": 1386
},
{
"epoch": 1.49,
"learning_rate": 1.4437198378669597e-06,
"loss": 1.7161,
"step": 1388
},
{
"epoch": 1.49,
"learning_rate": 1.4421585299442094e-06,
"loss": 1.7091,
"step": 1390
},
{
"epoch": 1.5,
"learning_rate": 1.440595881096184e-06,
"loss": 1.6838,
"step": 1392
},
{
"epoch": 1.5,
"learning_rate": 1.4390318960618971e-06,
"loss": 1.6945,
"step": 1394
},
{
"epoch": 1.5,
"learning_rate": 1.437466579584415e-06,
"loss": 1.6958,
"step": 1396
},
{
"epoch": 1.5,
"learning_rate": 1.435899936410841e-06,
"loss": 1.6436,
"step": 1398
},
{
"epoch": 1.5,
"learning_rate": 1.4343319712923024e-06,
"loss": 1.6958,
"step": 1400
},
{
"epoch": 1.51,
"learning_rate": 1.4327626889839355e-06,
"loss": 1.7065,
"step": 1402
},
{
"epoch": 1.51,
"learning_rate": 1.4311920942448716e-06,
"loss": 1.6859,
"step": 1404
},
{
"epoch": 1.51,
"learning_rate": 1.429620191838221e-06,
"loss": 1.7051,
"step": 1406
},
{
"epoch": 1.51,
"learning_rate": 1.4280469865310612e-06,
"loss": 1.7125,
"step": 1408
},
{
"epoch": 1.52,
"learning_rate": 1.4264724830944197e-06,
"loss": 1.7075,
"step": 1410
},
{
"epoch": 1.52,
"learning_rate": 1.4248966863032617e-06,
"loss": 1.6968,
"step": 1412
},
{
"epoch": 1.52,
"learning_rate": 1.4233196009364745e-06,
"loss": 1.7106,
"step": 1414
},
{
"epoch": 1.52,
"learning_rate": 1.421741231776853e-06,
"loss": 1.7062,
"step": 1416
},
{
"epoch": 1.52,
"learning_rate": 1.4201615836110854e-06,
"loss": 1.7371,
"step": 1418
},
{
"epoch": 1.53,
"learning_rate": 1.4185806612297394e-06,
"loss": 1.7413,
"step": 1420
},
{
"epoch": 1.53,
"learning_rate": 1.4169984694272457e-06,
"loss": 1.6971,
"step": 1422
},
{
"epoch": 1.53,
"learning_rate": 1.4154150130018865e-06,
"loss": 1.6919,
"step": 1424
},
{
"epoch": 1.53,
"learning_rate": 1.4138302967557776e-06,
"loss": 1.6432,
"step": 1426
},
{
"epoch": 1.54,
"learning_rate": 1.4122443254948559e-06,
"loss": 1.6771,
"step": 1428
},
{
"epoch": 1.54,
"learning_rate": 1.4106571040288653e-06,
"loss": 1.7331,
"step": 1430
},
{
"epoch": 1.54,
"learning_rate": 1.40906863717134e-06,
"loss": 1.6976,
"step": 1432
},
{
"epoch": 1.54,
"learning_rate": 1.4074789297395912e-06,
"loss": 1.756,
"step": 1434
},
{
"epoch": 1.54,
"learning_rate": 1.4058879865546929e-06,
"loss": 1.6803,
"step": 1436
},
{
"epoch": 1.55,
"learning_rate": 1.4042958124414663e-06,
"loss": 1.7093,
"step": 1438
},
{
"epoch": 1.55,
"learning_rate": 1.4027024122284662e-06,
"loss": 1.6884,
"step": 1440
},
{
"epoch": 1.55,
"learning_rate": 1.4011077907479647e-06,
"loss": 1.7701,
"step": 1442
},
{
"epoch": 1.55,
"learning_rate": 1.3995119528359388e-06,
"loss": 1.7824,
"step": 1444
},
{
"epoch": 1.55,
"learning_rate": 1.3979149033320538e-06,
"loss": 1.6869,
"step": 1446
},
{
"epoch": 1.56,
"learning_rate": 1.39631664707965e-06,
"loss": 1.6744,
"step": 1448
},
{
"epoch": 1.56,
"learning_rate": 1.3947171889257266e-06,
"loss": 1.6944,
"step": 1450
},
{
"epoch": 1.56,
"learning_rate": 1.3931165337209277e-06,
"loss": 1.6467,
"step": 1452
},
{
"epoch": 1.56,
"learning_rate": 1.391514686319529e-06,
"loss": 1.7144,
"step": 1454
},
{
"epoch": 1.57,
"learning_rate": 1.3899116515794203e-06,
"loss": 1.7199,
"step": 1456
},
{
"epoch": 1.57,
"learning_rate": 1.388307434362093e-06,
"loss": 1.7072,
"step": 1458
},
{
"epoch": 1.57,
"learning_rate": 1.3867020395326246e-06,
"loss": 1.7341,
"step": 1460
},
{
"epoch": 1.57,
"learning_rate": 1.3850954719596632e-06,
"loss": 1.6839,
"step": 1462
},
{
"epoch": 1.57,
"learning_rate": 1.3834877365154142e-06,
"loss": 1.7171,
"step": 1464
},
{
"epoch": 1.58,
"learning_rate": 1.3818788380756243e-06,
"loss": 1.7735,
"step": 1466
},
{
"epoch": 1.58,
"learning_rate": 1.380268781519568e-06,
"loss": 1.707,
"step": 1468
},
{
"epoch": 1.58,
"learning_rate": 1.3786575717300308e-06,
"loss": 1.7312,
"step": 1470
},
{
"epoch": 1.58,
"learning_rate": 1.3770452135932967e-06,
"loss": 1.6706,
"step": 1472
},
{
"epoch": 1.58,
"learning_rate": 1.3754317119991312e-06,
"loss": 1.6678,
"step": 1474
},
{
"epoch": 1.59,
"learning_rate": 1.3738170718407686e-06,
"loss": 1.766,
"step": 1476
},
{
"epoch": 1.59,
"learning_rate": 1.3722012980148955e-06,
"loss": 1.7033,
"step": 1478
},
{
"epoch": 1.59,
"learning_rate": 1.3705843954216366e-06,
"loss": 1.741,
"step": 1480
},
{
"epoch": 1.59,
"learning_rate": 1.3689663689645398e-06,
"loss": 1.7144,
"step": 1482
},
{
"epoch": 1.6,
"learning_rate": 1.3673472235505616e-06,
"loss": 1.7407,
"step": 1484
},
{
"epoch": 1.6,
"learning_rate": 1.3657269640900516e-06,
"loss": 1.6924,
"step": 1486
},
{
"epoch": 1.6,
"learning_rate": 1.3641055954967375e-06,
"loss": 1.7044,
"step": 1488
},
{
"epoch": 1.6,
"learning_rate": 1.3624831226877118e-06,
"loss": 1.7388,
"step": 1490
},
{
"epoch": 1.6,
"learning_rate": 1.3608595505834153e-06,
"loss": 1.7409,
"step": 1492
},
{
"epoch": 1.61,
"learning_rate": 1.3592348841076223e-06,
"loss": 1.7766,
"step": 1494
},
{
"epoch": 1.61,
"learning_rate": 1.3576091281874255e-06,
"loss": 1.695,
"step": 1496
},
{
"epoch": 1.61,
"learning_rate": 1.3559822877532232e-06,
"loss": 1.7264,
"step": 1498
},
{
"epoch": 1.61,
"learning_rate": 1.354354367738701e-06,
"loss": 1.6905,
"step": 1500
},
{
"epoch": 1.61,
"learning_rate": 1.3527253730808192e-06,
"loss": 1.6954,
"step": 1502
},
{
"epoch": 1.62,
"learning_rate": 1.3510953087197972e-06,
"loss": 1.7274,
"step": 1504
},
{
"epoch": 1.62,
"learning_rate": 1.3494641795990985e-06,
"loss": 1.648,
"step": 1506
},
{
"epoch": 1.62,
"learning_rate": 1.3478319906654151e-06,
"loss": 1.6577,
"step": 1508
},
{
"epoch": 1.62,
"learning_rate": 1.346198746868654e-06,
"loss": 1.6769,
"step": 1510
},
{
"epoch": 1.63,
"learning_rate": 1.3445644531619209e-06,
"loss": 1.6664,
"step": 1512
},
{
"epoch": 1.63,
"learning_rate": 1.3429291145015047e-06,
"loss": 1.7119,
"step": 1514
},
{
"epoch": 1.63,
"learning_rate": 1.3412927358468648e-06,
"loss": 1.6691,
"step": 1516
},
{
"epoch": 1.63,
"learning_rate": 1.3396553221606137e-06,
"loss": 1.7531,
"step": 1518
},
{
"epoch": 1.63,
"learning_rate": 1.3380168784085026e-06,
"loss": 1.7171,
"step": 1520
},
{
"epoch": 1.64,
"learning_rate": 1.3363774095594074e-06,
"loss": 1.6915,
"step": 1522
},
{
"epoch": 1.64,
"learning_rate": 1.3347369205853116e-06,
"loss": 1.7239,
"step": 1524
},
{
"epoch": 1.64,
"learning_rate": 1.3330954164612936e-06,
"loss": 1.7342,
"step": 1526
},
{
"epoch": 1.64,
"learning_rate": 1.3314529021655097e-06,
"loss": 1.7195,
"step": 1528
},
{
"epoch": 1.64,
"learning_rate": 1.32980938267918e-06,
"loss": 1.6779,
"step": 1530
},
{
"epoch": 1.65,
"learning_rate": 1.3281648629865732e-06,
"loss": 1.7145,
"step": 1532
},
{
"epoch": 1.65,
"learning_rate": 1.3265193480749904e-06,
"loss": 1.6962,
"step": 1534
},
{
"epoch": 1.65,
"learning_rate": 1.3248728429347525e-06,
"loss": 1.6629,
"step": 1536
},
{
"epoch": 1.65,
"learning_rate": 1.3232253525591819e-06,
"loss": 1.7328,
"step": 1538
},
{
"epoch": 1.66,
"learning_rate": 1.3215768819445894e-06,
"loss": 1.7226,
"step": 1540
},
{
"epoch": 1.66,
"learning_rate": 1.3199274360902588e-06,
"loss": 1.7535,
"step": 1542
},
{
"epoch": 1.66,
"learning_rate": 1.318277019998432e-06,
"loss": 1.7136,
"step": 1544
},
{
"epoch": 1.66,
"learning_rate": 1.3166256386742919e-06,
"loss": 1.7045,
"step": 1546
},
{
"epoch": 1.66,
"learning_rate": 1.3149732971259493e-06,
"loss": 1.7004,
"step": 1548
},
{
"epoch": 1.67,
"learning_rate": 1.3133200003644276e-06,
"loss": 1.7544,
"step": 1550
},
{
"epoch": 1.67,
"learning_rate": 1.3116657534036466e-06,
"loss": 1.6561,
"step": 1552
},
{
"epoch": 1.67,
"learning_rate": 1.3100105612604076e-06,
"loss": 1.7337,
"step": 1554
},
{
"epoch": 1.67,
"learning_rate": 1.3083544289543784e-06,
"loss": 1.6645,
"step": 1556
},
{
"epoch": 1.67,
"learning_rate": 1.3066973615080785e-06,
"loss": 1.7252,
"step": 1558
},
{
"epoch": 1.68,
"learning_rate": 1.3050393639468627e-06,
"loss": 1.7016,
"step": 1560
},
{
"epoch": 1.68,
"learning_rate": 1.3033804412989069e-06,
"loss": 1.6807,
"step": 1562
},
{
"epoch": 1.68,
"learning_rate": 1.3017205985951924e-06,
"loss": 1.6845,
"step": 1564
},
{
"epoch": 1.68,
"learning_rate": 1.3000598408694904e-06,
"loss": 1.7144,
"step": 1566
},
{
"epoch": 1.69,
"learning_rate": 1.2983981731583483e-06,
"loss": 1.717,
"step": 1568
},
{
"epoch": 1.69,
"learning_rate": 1.2967356005010718e-06,
"loss": 1.7302,
"step": 1570
},
{
"epoch": 1.69,
"learning_rate": 1.2950721279397114e-06,
"loss": 1.6868,
"step": 1572
},
{
"epoch": 1.69,
"learning_rate": 1.2934077605190471e-06,
"loss": 1.6902,
"step": 1574
},
{
"epoch": 1.69,
"learning_rate": 1.2917425032865728e-06,
"loss": 1.7324,
"step": 1576
},
{
"epoch": 1.7,
"learning_rate": 1.29007636129248e-06,
"loss": 1.6848,
"step": 1578
},
{
"epoch": 1.7,
"learning_rate": 1.288409339589644e-06,
"loss": 1.714,
"step": 1580
},
{
"epoch": 1.7,
"learning_rate": 1.286741443233608e-06,
"loss": 1.6321,
"step": 1582
},
{
"epoch": 1.7,
"learning_rate": 1.2850726772825684e-06,
"loss": 1.682,
"step": 1584
},
{
"epoch": 1.7,
"learning_rate": 1.2834030467973571e-06,
"loss": 1.7173,
"step": 1586
},
{
"epoch": 1.71,
"learning_rate": 1.2817325568414297e-06,
"loss": 1.7706,
"step": 1588
},
{
"epoch": 1.71,
"learning_rate": 1.280061212480847e-06,
"loss": 1.7157,
"step": 1590
},
{
"epoch": 1.71,
"learning_rate": 1.2783890187842615e-06,
"loss": 1.7145,
"step": 1592
},
{
"epoch": 1.71,
"learning_rate": 1.2767159808229018e-06,
"loss": 1.6997,
"step": 1594
},
{
"epoch": 1.72,
"learning_rate": 1.2750421036705556e-06,
"loss": 1.7341,
"step": 1596
},
{
"epoch": 1.72,
"learning_rate": 1.2733673924035572e-06,
"loss": 1.7162,
"step": 1598
},
{
"epoch": 1.72,
"learning_rate": 1.2716918521007695e-06,
"loss": 1.7477,
"step": 1600
},
{
"epoch": 1.72,
"learning_rate": 1.2700154878435697e-06,
"loss": 1.7039,
"step": 1602
},
{
"epoch": 1.72,
"learning_rate": 1.2683383047158343e-06,
"loss": 1.7734,
"step": 1604
},
{
"epoch": 1.73,
"learning_rate": 1.2666603078039223e-06,
"loss": 1.7188,
"step": 1606
},
{
"epoch": 1.73,
"learning_rate": 1.264981502196662e-06,
"loss": 1.6747,
"step": 1608
},
{
"epoch": 1.73,
"learning_rate": 1.2633018929853322e-06,
"loss": 1.6853,
"step": 1610
},
{
"epoch": 1.73,
"learning_rate": 1.2616214852636507e-06,
"loss": 1.696,
"step": 1612
},
{
"epoch": 1.74,
"learning_rate": 1.2599402841277563e-06,
"loss": 1.7188,
"step": 1614
},
{
"epoch": 1.74,
"learning_rate": 1.2582582946761938e-06,
"loss": 1.7015,
"step": 1616
},
{
"epoch": 1.74,
"learning_rate": 1.2565755220098981e-06,
"loss": 1.709,
"step": 1618
},
{
"epoch": 1.74,
"learning_rate": 1.2548919712321807e-06,
"loss": 1.7432,
"step": 1620
},
{
"epoch": 1.74,
"learning_rate": 1.2532076474487121e-06,
"loss": 1.666,
"step": 1622
},
{
"epoch": 1.75,
"learning_rate": 1.251522555767507e-06,
"loss": 1.7293,
"step": 1624
},
{
"epoch": 1.75,
"learning_rate": 1.2498367012989085e-06,
"loss": 1.67,
"step": 1626
},
{
"epoch": 1.75,
"learning_rate": 1.2481500891555746e-06,
"loss": 1.7527,
"step": 1628
},
{
"epoch": 1.75,
"learning_rate": 1.2464627244524593e-06,
"loss": 1.7247,
"step": 1630
},
{
"epoch": 1.75,
"learning_rate": 1.2447746123067995e-06,
"loss": 1.7901,
"step": 1632
},
{
"epoch": 1.76,
"learning_rate": 1.2430857578380994e-06,
"loss": 1.7128,
"step": 1634
},
{
"epoch": 1.76,
"learning_rate": 1.2413961661681133e-06,
"loss": 1.745,
"step": 1636
},
{
"epoch": 1.76,
"learning_rate": 1.2397058424208326e-06,
"loss": 1.7129,
"step": 1638
},
{
"epoch": 1.76,
"learning_rate": 1.2380147917224677e-06,
"loss": 1.682,
"step": 1640
},
{
"epoch": 1.77,
"learning_rate": 1.2363230192014343e-06,
"loss": 1.7325,
"step": 1642
},
{
"epoch": 1.77,
"learning_rate": 1.2346305299883364e-06,
"loss": 1.7165,
"step": 1644
},
{
"epoch": 1.77,
"learning_rate": 1.2329373292159524e-06,
"loss": 1.7265,
"step": 1646
},
{
"epoch": 1.77,
"learning_rate": 1.2312434220192176e-06,
"loss": 1.711,
"step": 1648
},
{
"epoch": 1.77,
"learning_rate": 1.2295488135352113e-06,
"loss": 1.6986,
"step": 1650
},
{
"epoch": 1.78,
"learning_rate": 1.2278535089031377e-06,
"loss": 1.7186,
"step": 1652
},
{
"epoch": 1.78,
"learning_rate": 1.2261575132643134e-06,
"loss": 1.693,
"step": 1654
},
{
"epoch": 1.78,
"learning_rate": 1.2244608317621499e-06,
"loss": 1.6866,
"step": 1656
},
{
"epoch": 1.78,
"learning_rate": 1.2227634695421393e-06,
"loss": 1.7142,
"step": 1658
},
{
"epoch": 1.78,
"learning_rate": 1.221065431751838e-06,
"loss": 1.747,
"step": 1660
},
{
"epoch": 1.79,
"learning_rate": 1.2193667235408507e-06,
"loss": 1.6544,
"step": 1662
},
{
"epoch": 1.79,
"learning_rate": 1.2176673500608154e-06,
"loss": 1.687,
"step": 1664
},
{
"epoch": 1.79,
"learning_rate": 1.215967316465389e-06,
"loss": 1.7248,
"step": 1666
},
{
"epoch": 1.79,
"learning_rate": 1.214266627910228e-06,
"loss": 1.7385,
"step": 1668
},
{
"epoch": 1.8,
"learning_rate": 1.2125652895529766e-06,
"loss": 1.722,
"step": 1670
},
{
"epoch": 1.8,
"learning_rate": 1.2108633065532497e-06,
"loss": 1.7037,
"step": 1672
},
{
"epoch": 1.8,
"learning_rate": 1.2091606840726167e-06,
"loss": 1.7116,
"step": 1674
},
{
"epoch": 1.8,
"learning_rate": 1.2074574272745868e-06,
"loss": 1.6718,
"step": 1676
},
{
"epoch": 1.8,
"learning_rate": 1.2057535413245918e-06,
"loss": 1.6715,
"step": 1678
},
{
"epoch": 1.81,
"learning_rate": 1.2040490313899735e-06,
"loss": 1.6836,
"step": 1680
},
{
"epoch": 1.81,
"learning_rate": 1.202343902639964e-06,
"loss": 1.6968,
"step": 1682
},
{
"epoch": 1.81,
"learning_rate": 1.2006381602456732e-06,
"loss": 1.6733,
"step": 1684
},
{
"epoch": 1.81,
"learning_rate": 1.1989318093800713e-06,
"loss": 1.6851,
"step": 1686
},
{
"epoch": 1.81,
"learning_rate": 1.1972248552179753e-06,
"loss": 1.7461,
"step": 1688
},
{
"epoch": 1.82,
"learning_rate": 1.19551730293603e-06,
"loss": 1.6481,
"step": 1690
},
{
"epoch": 1.82,
"learning_rate": 1.193809157712695e-06,
"loss": 1.6965,
"step": 1692
},
{
"epoch": 1.82,
"learning_rate": 1.1921004247282275e-06,
"loss": 1.6584,
"step": 1694
},
{
"epoch": 1.82,
"learning_rate": 1.1903911091646684e-06,
"loss": 1.7731,
"step": 1696
},
{
"epoch": 1.83,
"learning_rate": 1.1886812162058241e-06,
"loss": 1.7779,
"step": 1698
},
{
"epoch": 1.83,
"learning_rate": 1.1869707510372526e-06,
"loss": 1.7142,
"step": 1700
},
{
"epoch": 1.83,
"learning_rate": 1.1852597188462474e-06,
"loss": 1.6581,
"step": 1702
},
{
"epoch": 1.83,
"learning_rate": 1.1835481248218213e-06,
"loss": 1.6806,
"step": 1704
},
{
"epoch": 1.83,
"learning_rate": 1.1818359741546912e-06,
"loss": 1.7324,
"step": 1706
},
{
"epoch": 1.84,
"learning_rate": 1.1801232720372617e-06,
"loss": 1.7549,
"step": 1708
},
{
"epoch": 1.84,
"learning_rate": 1.1784100236636097e-06,
"loss": 1.7423,
"step": 1710
},
{
"epoch": 1.84,
"learning_rate": 1.17669623422947e-06,
"loss": 1.7045,
"step": 1712
},
{
"epoch": 1.84,
"learning_rate": 1.1749819089322165e-06,
"loss": 1.7012,
"step": 1714
},
{
"epoch": 1.84,
"learning_rate": 1.1732670529708494e-06,
"loss": 1.6738,
"step": 1716
},
{
"epoch": 1.85,
"learning_rate": 1.1715516715459784e-06,
"loss": 1.7019,
"step": 1718
},
{
"epoch": 1.85,
"learning_rate": 1.1698357698598052e-06,
"loss": 1.6911,
"step": 1720
},
{
"epoch": 1.85,
"learning_rate": 1.168119353116111e-06,
"loss": 1.7288,
"step": 1722
},
{
"epoch": 1.85,
"learning_rate": 1.1664024265202376e-06,
"loss": 1.696,
"step": 1724
},
{
"epoch": 1.86,
"learning_rate": 1.1646849952790744e-06,
"loss": 1.676,
"step": 1726
},
{
"epoch": 1.86,
"learning_rate": 1.1629670646010405e-06,
"loss": 1.6942,
"step": 1728
},
{
"epoch": 1.86,
"learning_rate": 1.1612486396960694e-06,
"loss": 1.6838,
"step": 1730
},
{
"epoch": 1.86,
"learning_rate": 1.159529725775594e-06,
"loss": 1.7286,
"step": 1732
},
{
"epoch": 1.86,
"learning_rate": 1.1578103280525295e-06,
"loss": 1.7121,
"step": 1734
},
{
"epoch": 1.87,
"learning_rate": 1.1560904517412597e-06,
"loss": 1.7135,
"step": 1736
},
{
"epoch": 1.87,
"learning_rate": 1.154370102057618e-06,
"loss": 1.69,
"step": 1738
},
{
"epoch": 1.87,
"learning_rate": 1.1526492842188744e-06,
"loss": 1.7195,
"step": 1740
},
{
"epoch": 1.87,
"learning_rate": 1.150928003443719e-06,
"loss": 1.6468,
"step": 1742
},
{
"epoch": 1.87,
"learning_rate": 1.149206264952245e-06,
"loss": 1.7329,
"step": 1744
},
{
"epoch": 1.88,
"learning_rate": 1.1474840739659337e-06,
"loss": 1.6914,
"step": 1746
},
{
"epoch": 1.88,
"learning_rate": 1.1457614357076398e-06,
"loss": 1.6938,
"step": 1748
},
{
"epoch": 1.88,
"learning_rate": 1.1440383554015733e-06,
"loss": 1.6753,
"step": 1750
},
{
"epoch": 1.88,
"learning_rate": 1.1423148382732853e-06,
"loss": 1.6927,
"step": 1752
},
{
"epoch": 1.89,
"learning_rate": 1.1405908895496511e-06,
"loss": 1.7203,
"step": 1754
},
{
"epoch": 1.89,
"learning_rate": 1.1388665144588558e-06,
"loss": 1.7106,
"step": 1756
},
{
"epoch": 1.89,
"learning_rate": 1.1371417182303769e-06,
"loss": 1.7114,
"step": 1758
},
{
"epoch": 1.89,
"learning_rate": 1.135416506094969e-06,
"loss": 1.6941,
"step": 1760
},
{
"epoch": 1.89,
"learning_rate": 1.1336908832846483e-06,
"loss": 1.6957,
"step": 1762
},
{
"epoch": 1.9,
"learning_rate": 1.1319648550326769e-06,
"loss": 1.7461,
"step": 1764
},
{
"epoch": 1.9,
"learning_rate": 1.1302384265735451e-06,
"loss": 1.7403,
"step": 1766
},
{
"epoch": 1.9,
"learning_rate": 1.1285116031429583e-06,
"loss": 1.7075,
"step": 1768
},
{
"epoch": 1.9,
"learning_rate": 1.1267843899778188e-06,
"loss": 1.691,
"step": 1770
},
{
"epoch": 1.9,
"learning_rate": 1.1250567923162116e-06,
"loss": 1.6923,
"step": 1772
},
{
"epoch": 1.91,
"learning_rate": 1.1233288153973871e-06,
"loss": 1.6711,
"step": 1774
},
{
"epoch": 1.91,
"learning_rate": 1.121600464461746e-06,
"loss": 1.7347,
"step": 1776
},
{
"epoch": 1.91,
"learning_rate": 1.1198717447508238e-06,
"loss": 1.7163,
"step": 1778
},
{
"epoch": 1.91,
"learning_rate": 1.1181426615072738e-06,
"loss": 1.7165,
"step": 1780
},
{
"epoch": 1.92,
"learning_rate": 1.1164132199748517e-06,
"loss": 1.7139,
"step": 1782
},
{
"epoch": 1.92,
"learning_rate": 1.1146834253984005e-06,
"loss": 1.6898,
"step": 1784
},
{
"epoch": 1.92,
"learning_rate": 1.1129532830238328e-06,
"loss": 1.6665,
"step": 1786
},
{
"epoch": 1.92,
"learning_rate": 1.1112227980981173e-06,
"loss": 1.7527,
"step": 1788
},
{
"epoch": 1.92,
"learning_rate": 1.1094919758692603e-06,
"loss": 1.773,
"step": 1790
},
{
"epoch": 1.93,
"learning_rate": 1.1077608215862913e-06,
"loss": 1.6996,
"step": 1792
},
{
"epoch": 1.93,
"learning_rate": 1.1060293404992478e-06,
"loss": 1.7005,
"step": 1794
},
{
"epoch": 1.93,
"learning_rate": 1.1042975378591572e-06,
"loss": 1.6702,
"step": 1796
},
{
"epoch": 1.93,
"learning_rate": 1.1025654189180225e-06,
"loss": 1.6519,
"step": 1798
},
{
"epoch": 1.93,
"learning_rate": 1.1008329889288059e-06,
"loss": 1.7702,
"step": 1800
},
{
"epoch": 1.94,
"learning_rate": 1.0991002531454133e-06,
"loss": 1.6652,
"step": 1802
},
{
"epoch": 1.94,
"learning_rate": 1.0973672168226773e-06,
"loss": 1.6596,
"step": 1804
},
{
"epoch": 1.94,
"learning_rate": 1.0956338852163423e-06,
"loss": 1.6831,
"step": 1806
},
{
"epoch": 1.94,
"learning_rate": 1.0939002635830484e-06,
"loss": 1.6836,
"step": 1808
},
{
"epoch": 1.95,
"learning_rate": 1.0921663571803148e-06,
"loss": 1.7197,
"step": 1810
},
{
"epoch": 1.95,
"learning_rate": 1.0904321712665247e-06,
"loss": 1.617,
"step": 1812
},
{
"epoch": 1.95,
"learning_rate": 1.0886977111009088e-06,
"loss": 1.7049,
"step": 1814
},
{
"epoch": 1.95,
"learning_rate": 1.0869629819435295e-06,
"loss": 1.7344,
"step": 1816
},
{
"epoch": 1.95,
"learning_rate": 1.085227989055265e-06,
"loss": 1.715,
"step": 1818
},
{
"epoch": 1.96,
"learning_rate": 1.0834927376977937e-06,
"loss": 1.7326,
"step": 1820
},
{
"epoch": 1.96,
"learning_rate": 1.0817572331335766e-06,
"loss": 1.7108,
"step": 1822
},
{
"epoch": 1.96,
"learning_rate": 1.0800214806258443e-06,
"loss": 1.6798,
"step": 1824
},
{
"epoch": 1.96,
"learning_rate": 1.078285485438578e-06,
"loss": 1.7543,
"step": 1826
},
{
"epoch": 1.97,
"learning_rate": 1.076549252836496e-06,
"loss": 1.7059,
"step": 1828
},
{
"epoch": 1.97,
"learning_rate": 1.0748127880850348e-06,
"loss": 1.7489,
"step": 1830
},
{
"epoch": 1.97,
"learning_rate": 1.073076096450337e-06,
"loss": 1.7203,
"step": 1832
},
{
"epoch": 1.97,
"learning_rate": 1.0713391831992323e-06,
"loss": 1.6616,
"step": 1834
},
{
"epoch": 1.97,
"learning_rate": 1.0696020535992225e-06,
"loss": 1.681,
"step": 1836
},
{
"epoch": 1.98,
"learning_rate": 1.0678647129184652e-06,
"loss": 1.6962,
"step": 1838
},
{
"epoch": 1.98,
"learning_rate": 1.0661271664257591e-06,
"loss": 1.6594,
"step": 1840
},
{
"epoch": 1.98,
"learning_rate": 1.0643894193905264e-06,
"loss": 1.7364,
"step": 1842
},
{
"epoch": 1.98,
"learning_rate": 1.0626514770827971e-06,
"loss": 1.7061,
"step": 1844
},
{
"epoch": 1.98,
"learning_rate": 1.0609133447731941e-06,
"loss": 1.7167,
"step": 1846
},
{
"epoch": 1.99,
"learning_rate": 1.0591750277329165e-06,
"loss": 1.6882,
"step": 1848
},
{
"epoch": 1.99,
"learning_rate": 1.0574365312337234e-06,
"loss": 1.6871,
"step": 1850
},
{
"epoch": 1.99,
"learning_rate": 1.0556978605479174e-06,
"loss": 1.6935,
"step": 1852
},
{
"epoch": 1.99,
"learning_rate": 1.053959020948331e-06,
"loss": 1.7205,
"step": 1854
},
{
"epoch": 2.0,
"learning_rate": 1.0522200177083075e-06,
"loss": 1.6969,
"step": 1856
},
{
"epoch": 2.0,
"learning_rate": 1.0504808561016875e-06,
"loss": 1.7253,
"step": 1858
},
{
"epoch": 2.0,
"learning_rate": 1.048741541402791e-06,
"loss": 1.7234,
"step": 1860
},
{
"epoch": 2.0,
"learning_rate": 1.047002078886403e-06,
"loss": 1.6991,
"step": 1862
},
{
"epoch": 2.0,
"learning_rate": 1.0452624738277563e-06,
"loss": 1.6794,
"step": 1864
},
{
"epoch": 2.01,
"learning_rate": 1.043522731502516e-06,
"loss": 1.6694,
"step": 1866
},
{
"epoch": 2.01,
"learning_rate": 1.0417828571867637e-06,
"loss": 1.6975,
"step": 1868
},
{
"epoch": 2.01,
"learning_rate": 1.0400428561569817e-06,
"loss": 1.7029,
"step": 1870
},
{
"epoch": 2.01,
"learning_rate": 1.0383027336900353e-06,
"loss": 1.7138,
"step": 1872
},
{
"epoch": 2.01,
"learning_rate": 1.0365624950631596e-06,
"loss": 1.6758,
"step": 1874
},
{
"epoch": 2.02,
"learning_rate": 1.0348221455539407e-06,
"loss": 1.6867,
"step": 1876
},
{
"epoch": 2.02,
"learning_rate": 1.0330816904403019e-06,
"loss": 1.6861,
"step": 1878
},
{
"epoch": 2.02,
"learning_rate": 1.0313411350004862e-06,
"loss": 1.6985,
"step": 1880
},
{
"epoch": 2.02,
"learning_rate": 1.0296004845130412e-06,
"loss": 1.7534,
"step": 1882
},
{
"epoch": 2.03,
"learning_rate": 1.0278597442568024e-06,
"loss": 1.6866,
"step": 1884
},
{
"epoch": 2.03,
"learning_rate": 1.026118919510878e-06,
"loss": 1.6866,
"step": 1886
},
{
"epoch": 2.03,
"learning_rate": 1.0243780155546322e-06,
"loss": 1.659,
"step": 1888
},
{
"epoch": 2.03,
"learning_rate": 1.022637037667669e-06,
"loss": 1.7039,
"step": 1890
},
{
"epoch": 2.03,
"learning_rate": 1.0208959911298173e-06,
"loss": 1.6146,
"step": 1892
},
{
"epoch": 2.04,
"learning_rate": 1.0191548812211142e-06,
"loss": 1.7017,
"step": 1894
},
{
"epoch": 2.04,
"learning_rate": 1.0174137132217882e-06,
"loss": 1.7409,
"step": 1896
},
{
"epoch": 2.04,
"learning_rate": 1.0156724924122442e-06,
"loss": 1.6915,
"step": 1898
},
{
"epoch": 2.04,
"learning_rate": 1.013931224073048e-06,
"loss": 1.6647,
"step": 1900
},
{
"epoch": 2.04,
"learning_rate": 1.012189913484909e-06,
"loss": 1.6729,
"step": 1902
},
{
"epoch": 2.05,
"learning_rate": 1.0104485659286647e-06,
"loss": 1.7372,
"step": 1904
},
{
"epoch": 2.05,
"learning_rate": 1.0087071866852645e-06,
"loss": 1.6982,
"step": 1906
},
{
"epoch": 2.05,
"learning_rate": 1.006965781035755e-06,
"loss": 1.7022,
"step": 1908
},
{
"epoch": 2.05,
"learning_rate": 1.0052243542612613e-06,
"loss": 1.737,
"step": 1910
},
{
"epoch": 2.06,
"learning_rate": 1.0034829116429738e-06,
"loss": 1.6449,
"step": 1912
},
{
"epoch": 2.06,
"learning_rate": 1.0017414584621299e-06,
"loss": 1.6763,
"step": 1914
},
{
"epoch": 2.06,
"learning_rate": 1e-06,
"loss": 1.6913,
"step": 1916
},
{
"epoch": 2.06,
"learning_rate": 9.9825854153787e-07,
"loss": 1.7034,
"step": 1918
},
{
"epoch": 2.06,
"learning_rate": 9.965170883570263e-07,
"loss": 1.6671,
"step": 1920
},
{
"epoch": 2.07,
"learning_rate": 9.947756457387386e-07,
"loss": 1.6979,
"step": 1922
},
{
"epoch": 2.07,
"learning_rate": 9.93034218964245e-07,
"loss": 1.6894,
"step": 1924
},
{
"epoch": 2.07,
"learning_rate": 9.912928133147352e-07,
"loss": 1.6678,
"step": 1926
},
{
"epoch": 2.07,
"learning_rate": 9.895514340713352e-07,
"loss": 1.6519,
"step": 1928
},
{
"epoch": 2.07,
"learning_rate": 9.87810086515091e-07,
"loss": 1.6842,
"step": 1930
},
{
"epoch": 2.08,
"learning_rate": 9.860687759269523e-07,
"loss": 1.6717,
"step": 1932
},
{
"epoch": 2.08,
"learning_rate": 9.84327507587756e-07,
"loss": 1.6812,
"step": 1934
},
{
"epoch": 2.08,
"learning_rate": 9.825862867782123e-07,
"loss": 1.694,
"step": 1936
},
{
"epoch": 2.08,
"learning_rate": 9.80845118778886e-07,
"loss": 1.6862,
"step": 1938
},
{
"epoch": 2.09,
"learning_rate": 9.791040088701828e-07,
"loss": 1.6877,
"step": 1940
},
{
"epoch": 2.09,
"learning_rate": 9.77362962332331e-07,
"loss": 1.6804,
"step": 1942
},
{
"epoch": 2.09,
"learning_rate": 9.756219844453675e-07,
"loss": 1.6541,
"step": 1944
},
{
"epoch": 2.09,
"learning_rate": 9.738810804891218e-07,
"loss": 1.6688,
"step": 1946
},
{
"epoch": 2.09,
"learning_rate": 9.721402557431973e-07,
"loss": 1.6979,
"step": 1948
},
{
"epoch": 2.1,
"learning_rate": 9.703995154869587e-07,
"loss": 1.69,
"step": 1950
},
{
"epoch": 2.1,
"learning_rate": 9.686588649995137e-07,
"loss": 1.7195,
"step": 1952
},
{
"epoch": 2.1,
"learning_rate": 9.669183095596982e-07,
"loss": 1.6727,
"step": 1954
},
{
"epoch": 2.1,
"learning_rate": 9.651778544460594e-07,
"loss": 1.6705,
"step": 1956
},
{
"epoch": 2.1,
"learning_rate": 9.634375049368405e-07,
"loss": 1.6648,
"step": 1958
},
{
"epoch": 2.11,
"learning_rate": 9.616972663099646e-07,
"loss": 1.6725,
"step": 1960
},
{
"epoch": 2.11,
"learning_rate": 9.599571438430186e-07,
"loss": 1.6684,
"step": 1962
},
{
"epoch": 2.11,
"learning_rate": 9.582171428132362e-07,
"loss": 1.7053,
"step": 1964
},
{
"epoch": 2.11,
"learning_rate": 9.564772684974838e-07,
"loss": 1.7238,
"step": 1966
},
{
"epoch": 2.12,
"learning_rate": 9.547375261722436e-07,
"loss": 1.7292,
"step": 1968
},
{
"epoch": 2.12,
"learning_rate": 9.529979211135968e-07,
"loss": 1.6638,
"step": 1970
},
{
"epoch": 2.12,
"learning_rate": 9.512584585972089e-07,
"loss": 1.5997,
"step": 1972
},
{
"epoch": 2.12,
"learning_rate": 9.495191438983121e-07,
"loss": 1.7083,
"step": 1974
},
{
"epoch": 2.12,
"learning_rate": 9.477799822916923e-07,
"loss": 1.7368,
"step": 1976
},
{
"epoch": 2.13,
"learning_rate": 9.460409790516689e-07,
"loss": 1.7246,
"step": 1978
},
{
"epoch": 2.13,
"learning_rate": 9.443021394520825e-07,
"loss": 1.6992,
"step": 1980
},
{
"epoch": 2.13,
"learning_rate": 9.425634687662766e-07,
"loss": 1.6767,
"step": 1982
},
{
"epoch": 2.13,
"learning_rate": 9.408249722670836e-07,
"loss": 1.6429,
"step": 1984
},
{
"epoch": 2.13,
"learning_rate": 9.390866552268058e-07,
"loss": 1.6825,
"step": 1986
},
{
"epoch": 2.14,
"learning_rate": 9.373485229172029e-07,
"loss": 1.6945,
"step": 1988
},
{
"epoch": 2.14,
"learning_rate": 9.356105806094736e-07,
"loss": 1.6499,
"step": 1990
},
{
"epoch": 2.14,
"learning_rate": 9.338728335742405e-07,
"loss": 1.7397,
"step": 1992
},
{
"epoch": 2.14,
"learning_rate": 9.321352870815347e-07,
"loss": 1.6727,
"step": 1994
},
{
"epoch": 2.15,
"learning_rate": 9.303979464007775e-07,
"loss": 1.6525,
"step": 1996
},
{
"epoch": 2.15,
"learning_rate": 9.286608168007676e-07,
"loss": 1.698,
"step": 1998
},
{
"epoch": 2.15,
"learning_rate": 9.269239035496628e-07,
"loss": 1.7011,
"step": 2000
},
{
"epoch": 2.15,
"learning_rate": 9.260555297010704e-07,
"loss": 1.6581,
"step": 2002
},
{
"epoch": 2.15,
"learning_rate": 9.251872119149656e-07,
"loss": 1.6775,
"step": 2004
},
{
"epoch": 2.16,
"learning_rate": 9.234507471635042e-07,
"loss": 1.6328,
"step": 2006
},
{
"epoch": 2.16,
"learning_rate": 9.217145145614221e-07,
"loss": 1.6563,
"step": 2008
},
{
"epoch": 2.16,
"learning_rate": 9.199785193741557e-07,
"loss": 1.6793,
"step": 2010
},
{
"epoch": 2.16,
"learning_rate": 9.182427668664233e-07,
"loss": 1.652,
"step": 2012
},
{
"epoch": 2.17,
"learning_rate": 9.165072623022063e-07,
"loss": 1.6535,
"step": 2014
},
{
"epoch": 2.17,
"learning_rate": 9.147720109447345e-07,
"loss": 1.6903,
"step": 2016
},
{
"epoch": 2.17,
"learning_rate": 9.130370180564705e-07,
"loss": 1.6495,
"step": 2018
},
{
"epoch": 2.17,
"learning_rate": 9.113022888990911e-07,
"loss": 1.6845,
"step": 2020
},
{
"epoch": 2.17,
"learning_rate": 9.095678287334753e-07,
"loss": 1.7318,
"step": 2022
},
{
"epoch": 2.18,
"learning_rate": 9.078336428196851e-07,
"loss": 1.6889,
"step": 2024
},
{
"epoch": 2.18,
"learning_rate": 9.060997364169519e-07,
"loss": 1.6974,
"step": 2026
},
{
"epoch": 2.18,
"learning_rate": 9.043661147836578e-07,
"loss": 1.6742,
"step": 2028
},
{
"epoch": 2.18,
"learning_rate": 9.026327831773229e-07,
"loss": 1.6707,
"step": 2030
},
{
"epoch": 2.18,
"learning_rate": 9.008997468545868e-07,
"loss": 1.725,
"step": 2032
},
{
"epoch": 2.19,
"learning_rate": 8.991670110711943e-07,
"loss": 1.6996,
"step": 2034
},
{
"epoch": 2.19,
"learning_rate": 8.974345810819775e-07,
"loss": 1.6642,
"step": 2036
},
{
"epoch": 2.19,
"learning_rate": 8.957024621408431e-07,
"loss": 1.6846,
"step": 2038
},
{
"epoch": 2.19,
"learning_rate": 8.939706595007522e-07,
"loss": 1.6958,
"step": 2040
},
{
"epoch": 2.2,
"learning_rate": 8.922391784137084e-07,
"loss": 1.7046,
"step": 2042
},
{
"epoch": 2.2,
"learning_rate": 8.905080241307397e-07,
"loss": 1.6949,
"step": 2044
},
{
"epoch": 2.2,
"learning_rate": 8.887772019018825e-07,
"loss": 1.7253,
"step": 2046
},
{
"epoch": 2.2,
"learning_rate": 8.870467169761671e-07,
"loss": 1.7006,
"step": 2048
},
{
"epoch": 2.2,
"learning_rate": 8.853165746015995e-07,
"loss": 1.6521,
"step": 2050
},
{
"epoch": 2.21,
"learning_rate": 8.835867800251483e-07,
"loss": 1.651,
"step": 2052
},
{
"epoch": 2.21,
"learning_rate": 8.818573384927262e-07,
"loss": 1.7189,
"step": 2054
},
{
"epoch": 2.21,
"learning_rate": 8.801282552491763e-07,
"loss": 1.6733,
"step": 2056
},
{
"epoch": 2.21,
"learning_rate": 8.78399535538254e-07,
"loss": 1.6779,
"step": 2058
},
{
"epoch": 2.21,
"learning_rate": 8.766711846026131e-07,
"loss": 1.6881,
"step": 2060
},
{
"epoch": 2.22,
"learning_rate": 8.749432076837884e-07,
"loss": 1.6704,
"step": 2062
},
{
"epoch": 2.22,
"learning_rate": 8.732156100221815e-07,
"loss": 1.6909,
"step": 2064
},
{
"epoch": 2.22,
"learning_rate": 8.714883968570417e-07,
"loss": 1.6817,
"step": 2066
},
{
"epoch": 2.22,
"learning_rate": 8.697615734264547e-07,
"loss": 1.6882,
"step": 2068
},
{
"epoch": 2.23,
"learning_rate": 8.680351449673234e-07,
"loss": 1.6907,
"step": 2070
},
{
"epoch": 2.23,
"learning_rate": 8.663091167153514e-07,
"loss": 1.6665,
"step": 2072
},
{
"epoch": 2.23,
"learning_rate": 8.64583493905031e-07,
"loss": 1.6925,
"step": 2074
},
{
"epoch": 2.23,
"learning_rate": 8.62858281769623e-07,
"loss": 1.6217,
"step": 2076
},
{
"epoch": 2.23,
"learning_rate": 8.611334855411444e-07,
"loss": 1.6762,
"step": 2078
},
{
"epoch": 2.24,
"learning_rate": 8.594091104503489e-07,
"loss": 1.6979,
"step": 2080
},
{
"epoch": 2.24,
"learning_rate": 8.576851617267149e-07,
"loss": 1.7207,
"step": 2082
},
{
"epoch": 2.24,
"learning_rate": 8.559616445984267e-07,
"loss": 1.6718,
"step": 2084
},
{
"epoch": 2.24,
"learning_rate": 8.542385642923604e-07,
"loss": 1.66,
"step": 2086
},
{
"epoch": 2.24,
"learning_rate": 8.525159260340665e-07,
"loss": 1.6588,
"step": 2088
},
{
"epoch": 2.25,
"learning_rate": 8.507937350477552e-07,
"loss": 1.663,
"step": 2090
},
{
"epoch": 2.25,
"learning_rate": 8.490719965562812e-07,
"loss": 1.6708,
"step": 2092
},
{
"epoch": 2.25,
"learning_rate": 8.473507157811254e-07,
"loss": 1.7245,
"step": 2094
},
{
"epoch": 2.25,
"learning_rate": 8.45629897942382e-07,
"loss": 1.6641,
"step": 2096
},
{
"epoch": 2.26,
"learning_rate": 8.439095482587402e-07,
"loss": 1.6388,
"step": 2098
},
{
"epoch": 2.26,
"learning_rate": 8.421896719474704e-07,
"loss": 1.643,
"step": 2100
},
{
"epoch": 2.26,
"learning_rate": 8.404702742244061e-07,
"loss": 1.6584,
"step": 2102
},
{
"epoch": 2.26,
"learning_rate": 8.387513603039306e-07,
"loss": 1.7217,
"step": 2104
},
{
"epoch": 2.26,
"learning_rate": 8.370329353989595e-07,
"loss": 1.6549,
"step": 2106
},
{
"epoch": 2.27,
"learning_rate": 8.353150047209259e-07,
"loss": 1.6791,
"step": 2108
},
{
"epoch": 2.27,
"learning_rate": 8.335975734797626e-07,
"loss": 1.664,
"step": 2110
},
{
"epoch": 2.27,
"learning_rate": 8.318806468838895e-07,
"loss": 1.7046,
"step": 2112
},
{
"epoch": 2.27,
"learning_rate": 8.301642301401949e-07,
"loss": 1.6804,
"step": 2114
},
{
"epoch": 2.27,
"learning_rate": 8.284483284540216e-07,
"loss": 1.6934,
"step": 2116
},
{
"epoch": 2.28,
"learning_rate": 8.267329470291505e-07,
"loss": 1.6658,
"step": 2118
},
{
"epoch": 2.28,
"learning_rate": 8.250180910677833e-07,
"loss": 1.6732,
"step": 2120
},
{
"epoch": 2.28,
"learning_rate": 8.233037657705302e-07,
"loss": 1.7052,
"step": 2122
},
{
"epoch": 2.28,
"learning_rate": 8.215899763363902e-07,
"loss": 1.6471,
"step": 2124
},
{
"epoch": 2.29,
"learning_rate": 8.198767279627385e-07,
"loss": 1.7186,
"step": 2126
},
{
"epoch": 2.29,
"learning_rate": 8.181640258453088e-07,
"loss": 1.7106,
"step": 2128
},
{
"epoch": 2.29,
"learning_rate": 8.164518751781788e-07,
"loss": 1.6692,
"step": 2130
},
{
"epoch": 2.29,
"learning_rate": 8.147402811537525e-07,
"loss": 1.6377,
"step": 2132
},
{
"epoch": 2.29,
"learning_rate": 8.130292489627474e-07,
"loss": 1.7158,
"step": 2134
},
{
"epoch": 2.3,
"learning_rate": 8.11318783794176e-07,
"loss": 1.6664,
"step": 2136
},
{
"epoch": 2.3,
"learning_rate": 8.096088908353315e-07,
"loss": 1.7066,
"step": 2138
},
{
"epoch": 2.3,
"learning_rate": 8.078995752717725e-07,
"loss": 1.6674,
"step": 2140
},
{
"epoch": 2.3,
"learning_rate": 8.061908422873051e-07,
"loss": 1.674,
"step": 2142
},
{
"epoch": 2.3,
"learning_rate": 8.0448269706397e-07,
"loss": 1.6951,
"step": 2144
},
{
"epoch": 2.31,
"learning_rate": 8.027751447820246e-07,
"loss": 1.6946,
"step": 2146
},
{
"epoch": 2.31,
"learning_rate": 8.010681906199287e-07,
"loss": 1.6304,
"step": 2148
},
{
"epoch": 2.31,
"learning_rate": 7.993618397543268e-07,
"loss": 1.709,
"step": 2150
},
{
"epoch": 2.31,
"learning_rate": 7.976560973600363e-07,
"loss": 1.7127,
"step": 2152
},
{
"epoch": 2.32,
"learning_rate": 7.959509686100267e-07,
"loss": 1.6559,
"step": 2154
},
{
"epoch": 2.32,
"learning_rate": 7.942464586754082e-07,
"loss": 1.6825,
"step": 2156
},
{
"epoch": 2.32,
"learning_rate": 7.925425727254134e-07,
"loss": 1.6144,
"step": 2158
},
{
"epoch": 2.32,
"learning_rate": 7.908393159273836e-07,
"loss": 1.7109,
"step": 2160
},
{
"epoch": 2.32,
"learning_rate": 7.891366934467503e-07,
"loss": 1.696,
"step": 2162
},
{
"epoch": 2.33,
"learning_rate": 7.874347104470232e-07,
"loss": 1.6669,
"step": 2164
},
{
"epoch": 2.33,
"learning_rate": 7.857333720897721e-07,
"loss": 1.7349,
"step": 2166
},
{
"epoch": 2.33,
"learning_rate": 7.84032683534611e-07,
"loss": 1.7119,
"step": 2168
},
{
"epoch": 2.33,
"learning_rate": 7.823326499391845e-07,
"loss": 1.6871,
"step": 2170
},
{
"epoch": 2.33,
"learning_rate": 7.806332764591495e-07,
"loss": 1.7483,
"step": 2172
},
{
"epoch": 2.34,
"learning_rate": 7.789345682481622e-07,
"loss": 1.699,
"step": 2174
},
{
"epoch": 2.34,
"learning_rate": 7.772365304578608e-07,
"loss": 1.7096,
"step": 2176
},
{
"epoch": 2.34,
"learning_rate": 7.755391682378505e-07,
"loss": 1.63,
"step": 2178
},
{
"epoch": 2.34,
"learning_rate": 7.738424867356867e-07,
"loss": 1.6633,
"step": 2180
},
{
"epoch": 2.35,
"learning_rate": 7.721464910968626e-07,
"loss": 1.7003,
"step": 2182
},
{
"epoch": 2.35,
"learning_rate": 7.704511864647889e-07,
"loss": 1.6877,
"step": 2184
},
{
"epoch": 2.35,
"learning_rate": 7.687565779807823e-07,
"loss": 1.636,
"step": 2186
},
{
"epoch": 2.35,
"learning_rate": 7.670626707840477e-07,
"loss": 1.6685,
"step": 2188
},
{
"epoch": 2.35,
"learning_rate": 7.653694700116636e-07,
"loss": 1.6634,
"step": 2190
},
{
"epoch": 2.36,
"learning_rate": 7.63676980798566e-07,
"loss": 1.7052,
"step": 2192
},
{
"epoch": 2.36,
"learning_rate": 7.619852082775322e-07,
"loss": 1.6881,
"step": 2194
},
{
"epoch": 2.36,
"learning_rate": 7.602941575791674e-07,
"loss": 1.6235,
"step": 2196
},
{
"epoch": 2.36,
"learning_rate": 7.586038338318864e-07,
"loss": 1.6522,
"step": 2198
},
{
"epoch": 2.36,
"learning_rate": 7.569142421619009e-07,
"loss": 1.7054,
"step": 2200
},
{
"epoch": 2.37,
"learning_rate": 7.552253876932005e-07,
"loss": 1.6686,
"step": 2202
},
{
"epoch": 2.37,
"learning_rate": 7.53537275547541e-07,
"loss": 1.6714,
"step": 2204
},
{
"epoch": 2.37,
"learning_rate": 7.518499108444255e-07,
"loss": 1.6668,
"step": 2206
},
{
"epoch": 2.37,
"learning_rate": 7.501632987010916e-07,
"loss": 1.6984,
"step": 2208
},
{
"epoch": 2.38,
"learning_rate": 7.484774442324931e-07,
"loss": 1.6506,
"step": 2210
},
{
"epoch": 2.38,
"learning_rate": 7.467923525512878e-07,
"loss": 1.6957,
"step": 2212
},
{
"epoch": 2.38,
"learning_rate": 7.451080287678194e-07,
"loss": 1.6763,
"step": 2214
},
{
"epoch": 2.38,
"learning_rate": 7.434244779901018e-07,
"loss": 1.7088,
"step": 2216
},
{
"epoch": 2.38,
"learning_rate": 7.417417053238064e-07,
"loss": 1.6602,
"step": 2218
},
{
"epoch": 2.39,
"learning_rate": 7.400597158722435e-07,
"loss": 1.6661,
"step": 2220
},
{
"epoch": 2.39,
"learning_rate": 7.383785147363493e-07,
"loss": 1.7322,
"step": 2222
},
{
"epoch": 2.39,
"learning_rate": 7.366981070146678e-07,
"loss": 1.6725,
"step": 2224
},
{
"epoch": 2.39,
"learning_rate": 7.350184978033385e-07,
"loss": 1.6806,
"step": 2226
},
{
"epoch": 2.4,
"learning_rate": 7.333396921960776e-07,
"loss": 1.6918,
"step": 2228
},
{
"epoch": 2.4,
"learning_rate": 7.316616952841661e-07,
"loss": 1.6206,
"step": 2230
},
{
"epoch": 2.4,
"learning_rate": 7.299845121564303e-07,
"loss": 1.6846,
"step": 2232
},
{
"epoch": 2.4,
"learning_rate": 7.283081478992307e-07,
"loss": 1.689,
"step": 2234
},
{
"epoch": 2.4,
"learning_rate": 7.266326075964428e-07,
"loss": 1.6578,
"step": 2236
},
{
"epoch": 2.41,
"learning_rate": 7.249578963294441e-07,
"loss": 1.7333,
"step": 2238
},
{
"epoch": 2.41,
"learning_rate": 7.232840191770983e-07,
"loss": 1.675,
"step": 2240
},
{
"epoch": 2.41,
"learning_rate": 7.216109812157382e-07,
"loss": 1.6838,
"step": 2242
},
{
"epoch": 2.41,
"learning_rate": 7.19938787519153e-07,
"loss": 1.6929,
"step": 2244
},
{
"epoch": 2.41,
"learning_rate": 7.182674431585702e-07,
"loss": 1.6498,
"step": 2246
},
{
"epoch": 2.42,
"learning_rate": 7.165969532026429e-07,
"loss": 1.6689,
"step": 2248
},
{
"epoch": 2.42,
"learning_rate": 7.149273227174318e-07,
"loss": 1.6738,
"step": 2250
},
{
"epoch": 2.42,
"learning_rate": 7.132585567663922e-07,
"loss": 1.6882,
"step": 2252
},
{
"epoch": 2.42,
"learning_rate": 7.115906604103563e-07,
"loss": 1.7063,
"step": 2254
},
{
"epoch": 2.43,
"learning_rate": 7.099236387075203e-07,
"loss": 1.6674,
"step": 2256
},
{
"epoch": 2.43,
"learning_rate": 7.082574967134274e-07,
"loss": 1.7112,
"step": 2258
},
{
"epoch": 2.43,
"learning_rate": 7.065922394809525e-07,
"loss": 1.6887,
"step": 2260
},
{
"epoch": 2.43,
"learning_rate": 7.049278720602886e-07,
"loss": 1.6402,
"step": 2262
},
{
"epoch": 2.43,
"learning_rate": 7.032643994989282e-07,
"loss": 1.659,
"step": 2264
},
{
"epoch": 2.44,
"learning_rate": 7.016018268416517e-07,
"loss": 1.6611,
"step": 2266
},
{
"epoch": 2.44,
"learning_rate": 6.999401591305092e-07,
"loss": 1.7139,
"step": 2268
},
{
"epoch": 2.44,
"learning_rate": 6.982794014048077e-07,
"loss": 1.6484,
"step": 2270
},
{
"epoch": 2.44,
"learning_rate": 6.96619558701093e-07,
"loss": 1.6803,
"step": 2272
},
{
"epoch": 2.44,
"learning_rate": 6.949606360531375e-07,
"loss": 1.6501,
"step": 2274
},
{
"epoch": 2.45,
"learning_rate": 6.933026384919215e-07,
"loss": 1.6921,
"step": 2276
},
{
"epoch": 2.45,
"learning_rate": 6.916455710456215e-07,
"loss": 1.668,
"step": 2278
},
{
"epoch": 2.45,
"learning_rate": 6.899894387395924e-07,
"loss": 1.6566,
"step": 2280
},
{
"epoch": 2.45,
"learning_rate": 6.883342465963536e-07,
"loss": 1.6592,
"step": 2282
},
{
"epoch": 2.46,
"learning_rate": 6.866799996355724e-07,
"loss": 1.6973,
"step": 2284
},
{
"epoch": 2.46,
"learning_rate": 6.850267028740506e-07,
"loss": 1.6754,
"step": 2286
},
{
"epoch": 2.46,
"learning_rate": 6.833743613257084e-07,
"loss": 1.7442,
"step": 2288
},
{
"epoch": 2.46,
"learning_rate": 6.817229800015681e-07,
"loss": 1.7135,
"step": 2290
},
{
"epoch": 2.46,
"learning_rate": 6.800725639097411e-07,
"loss": 1.6156,
"step": 2292
},
{
"epoch": 2.47,
"learning_rate": 6.784231180554106e-07,
"loss": 1.6559,
"step": 2294
},
{
"epoch": 2.47,
"learning_rate": 6.767746474408185e-07,
"loss": 1.6719,
"step": 2296
},
{
"epoch": 2.47,
"learning_rate": 6.751271570652476e-07,
"loss": 1.7022,
"step": 2298
},
{
"epoch": 2.47,
"learning_rate": 6.734806519250095e-07,
"loss": 1.6923,
"step": 2300
},
{
"epoch": 2.47,
"learning_rate": 6.71835137013427e-07,
"loss": 1.6442,
"step": 2302
},
{
"epoch": 2.48,
"learning_rate": 6.701906173208203e-07,
"loss": 1.6474,
"step": 2304
},
{
"epoch": 2.48,
"learning_rate": 6.685470978344905e-07,
"loss": 1.6584,
"step": 2306
},
{
"epoch": 2.48,
"learning_rate": 6.669045835387066e-07,
"loss": 1.6675,
"step": 2308
},
{
"epoch": 2.48,
"learning_rate": 6.652630794146884e-07,
"loss": 1.6566,
"step": 2310
},
{
"epoch": 2.49,
"learning_rate": 6.636225904405925e-07,
"loss": 1.7168,
"step": 2312
},
{
"epoch": 2.49,
"learning_rate": 6.619831215914973e-07,
"loss": 1.7003,
"step": 2314
},
{
"epoch": 2.49,
"learning_rate": 6.603446778393862e-07,
"loss": 1.6721,
"step": 2316
},
{
"epoch": 2.49,
"learning_rate": 6.58707264153135e-07,
"loss": 1.6589,
"step": 2318
},
{
"epoch": 2.49,
"learning_rate": 6.57070885498495e-07,
"loss": 1.666,
"step": 2320
},
{
"epoch": 2.5,
"learning_rate": 6.554355468380795e-07,
"loss": 1.7522,
"step": 2322
},
{
"epoch": 2.5,
"learning_rate": 6.538012531313459e-07,
"loss": 1.6406,
"step": 2324
},
{
"epoch": 2.5,
"learning_rate": 6.521680093345851e-07,
"loss": 1.6841,
"step": 2326
},
{
"epoch": 2.5,
"learning_rate": 6.505358204009017e-07,
"loss": 1.6939,
"step": 2328
},
{
"epoch": 2.5,
"learning_rate": 6.48904691280203e-07,
"loss": 1.7099,
"step": 2330
},
{
"epoch": 2.51,
"learning_rate": 6.472746269191808e-07,
"loss": 1.6797,
"step": 2332
},
{
"epoch": 2.51,
"learning_rate": 6.456456322612989e-07,
"loss": 1.6792,
"step": 2334
},
{
"epoch": 2.51,
"learning_rate": 6.440177122467768e-07,
"loss": 1.7058,
"step": 2336
},
{
"epoch": 2.51,
"learning_rate": 6.423908718125742e-07,
"loss": 1.7208,
"step": 2338
},
{
"epoch": 2.52,
"learning_rate": 6.407651158923777e-07,
"loss": 1.6497,
"step": 2340
},
{
"epoch": 2.52,
"learning_rate": 6.391404494165844e-07,
"loss": 1.6367,
"step": 2342
},
{
"epoch": 2.52,
"learning_rate": 6.375168773122881e-07,
"loss": 1.6693,
"step": 2344
},
{
"epoch": 2.52,
"learning_rate": 6.358944045032626e-07,
"loss": 1.6637,
"step": 2346
},
{
"epoch": 2.52,
"learning_rate": 6.342730359099489e-07,
"loss": 1.683,
"step": 2348
},
{
"epoch": 2.53,
"learning_rate": 6.326527764494384e-07,
"loss": 1.7098,
"step": 2350
},
{
"epoch": 2.53,
"learning_rate": 6.310336310354604e-07,
"loss": 1.6868,
"step": 2352
},
{
"epoch": 2.53,
"learning_rate": 6.294156045783634e-07,
"loss": 1.7512,
"step": 2354
},
{
"epoch": 2.53,
"learning_rate": 6.277987019851045e-07,
"loss": 1.6898,
"step": 2356
},
{
"epoch": 2.53,
"learning_rate": 6.261829281592312e-07,
"loss": 1.6715,
"step": 2358
},
{
"epoch": 2.54,
"learning_rate": 6.245682880008685e-07,
"loss": 1.7164,
"step": 2360
},
{
"epoch": 2.54,
"learning_rate": 6.229547864067033e-07,
"loss": 1.6406,
"step": 2362
},
{
"epoch": 2.54,
"learning_rate": 6.213424282699688e-07,
"loss": 1.686,
"step": 2364
},
{
"epoch": 2.54,
"learning_rate": 6.19731218480432e-07,
"loss": 1.6529,
"step": 2366
},
{
"epoch": 2.55,
"learning_rate": 6.181211619243756e-07,
"loss": 1.6868,
"step": 2368
},
{
"epoch": 2.55,
"learning_rate": 6.165122634845859e-07,
"loss": 1.6871,
"step": 2370
},
{
"epoch": 2.55,
"learning_rate": 6.149045280403369e-07,
"loss": 1.6746,
"step": 2372
},
{
"epoch": 2.55,
"learning_rate": 6.132979604673758e-07,
"loss": 1.669,
"step": 2374
},
{
"epoch": 2.55,
"learning_rate": 6.11692565637907e-07,
"loss": 1.648,
"step": 2376
},
{
"epoch": 2.56,
"learning_rate": 6.100883484205799e-07,
"loss": 1.6893,
"step": 2378
},
{
"epoch": 2.56,
"learning_rate": 6.084853136804711e-07,
"loss": 1.7075,
"step": 2380
},
{
"epoch": 2.56,
"learning_rate": 6.068834662790722e-07,
"loss": 1.6784,
"step": 2382
},
{
"epoch": 2.56,
"learning_rate": 6.052828110742736e-07,
"loss": 1.6885,
"step": 2384
},
{
"epoch": 2.56,
"learning_rate": 6.036833529203499e-07,
"loss": 1.6594,
"step": 2386
},
{
"epoch": 2.57,
"learning_rate": 6.02085096667946e-07,
"loss": 1.6347,
"step": 2388
},
{
"epoch": 2.57,
"learning_rate": 6.004880471640611e-07,
"loss": 1.6649,
"step": 2390
},
{
"epoch": 2.57,
"learning_rate": 5.988922092520353e-07,
"loss": 1.6519,
"step": 2392
},
{
"epoch": 2.57,
"learning_rate": 5.972975877715338e-07,
"loss": 1.6736,
"step": 2394
},
{
"epoch": 2.58,
"learning_rate": 5.957041875585339e-07,
"loss": 1.6814,
"step": 2396
},
{
"epoch": 2.58,
"learning_rate": 5.941120134453073e-07,
"loss": 1.6911,
"step": 2398
},
{
"epoch": 2.58,
"learning_rate": 5.92521070260409e-07,
"loss": 1.6841,
"step": 2400
},
{
"epoch": 2.58,
"learning_rate": 5.9093136282866e-07,
"loss": 1.7432,
"step": 2402
},
{
"epoch": 2.58,
"learning_rate": 5.893428959711349e-07,
"loss": 1.6946,
"step": 2404
},
{
"epoch": 2.59,
"learning_rate": 5.877556745051439e-07,
"loss": 1.6804,
"step": 2406
},
{
"epoch": 2.59,
"learning_rate": 5.861697032442226e-07,
"loss": 1.6391,
"step": 2408
},
{
"epoch": 2.59,
"learning_rate": 5.845849869981136e-07,
"loss": 1.7019,
"step": 2410
},
{
"epoch": 2.59,
"learning_rate": 5.830015305727542e-07,
"loss": 1.6807,
"step": 2412
},
{
"epoch": 2.6,
"learning_rate": 5.814193387702609e-07,
"loss": 1.6717,
"step": 2414
},
{
"epoch": 2.6,
"learning_rate": 5.798384163889147e-07,
"loss": 1.6516,
"step": 2416
},
{
"epoch": 2.6,
"learning_rate": 5.782587682231472e-07,
"loss": 1.704,
"step": 2418
},
{
"epoch": 2.6,
"learning_rate": 5.766803990635254e-07,
"loss": 1.6612,
"step": 2420
},
{
"epoch": 2.6,
"learning_rate": 5.751033136967384e-07,
"loss": 1.6555,
"step": 2422
},
{
"epoch": 2.61,
"learning_rate": 5.735275169055803e-07,
"loss": 1.7314,
"step": 2424
},
{
"epoch": 2.61,
"learning_rate": 5.719530134689389e-07,
"loss": 1.696,
"step": 2426
},
{
"epoch": 2.61,
"learning_rate": 5.703798081617789e-07,
"loss": 1.6956,
"step": 2428
},
{
"epoch": 2.61,
"learning_rate": 5.688079057551282e-07,
"loss": 1.7311,
"step": 2430
},
{
"epoch": 2.61,
"learning_rate": 5.672373110160647e-07,
"loss": 1.687,
"step": 2432
},
{
"epoch": 2.62,
"learning_rate": 5.656680287076976e-07,
"loss": 1.6902,
"step": 2434
},
{
"epoch": 2.62,
"learning_rate": 5.641000635891591e-07,
"loss": 1.6872,
"step": 2436
},
{
"epoch": 2.62,
"learning_rate": 5.625334204155852e-07,
"loss": 1.678,
"step": 2438
},
{
"epoch": 2.62,
"learning_rate": 5.609681039381029e-07,
"loss": 1.68,
"step": 2440
},
{
"epoch": 2.63,
"learning_rate": 5.594041189038157e-07,
"loss": 1.7455,
"step": 2442
},
{
"epoch": 2.63,
"learning_rate": 5.578414700557907e-07,
"loss": 1.7074,
"step": 2444
},
{
"epoch": 2.63,
"learning_rate": 5.562801621330402e-07,
"loss": 1.6827,
"step": 2446
},
{
"epoch": 2.63,
"learning_rate": 5.547201998705123e-07,
"loss": 1.691,
"step": 2448
},
{
"epoch": 2.63,
"learning_rate": 5.531615879990729e-07,
"loss": 1.6659,
"step": 2450
},
{
"epoch": 2.64,
"learning_rate": 5.516043312454927e-07,
"loss": 1.7509,
"step": 2452
},
{
"epoch": 2.64,
"learning_rate": 5.50048434332433e-07,
"loss": 1.7094,
"step": 2454
},
{
"epoch": 2.64,
"learning_rate": 5.484939019784305e-07,
"loss": 1.6719,
"step": 2456
},
{
"epoch": 2.64,
"learning_rate": 5.469407388978854e-07,
"loss": 1.6651,
"step": 2458
},
{
"epoch": 2.64,
"learning_rate": 5.453889498010433e-07,
"loss": 1.7097,
"step": 2460
},
{
"epoch": 2.65,
"learning_rate": 5.43838539393984e-07,
"loss": 1.6689,
"step": 2462
},
{
"epoch": 2.65,
"learning_rate": 5.422895123786058e-07,
"loss": 1.6613,
"step": 2464
},
{
"epoch": 2.65,
"learning_rate": 5.407418734526118e-07,
"loss": 1.6762,
"step": 2466
},
{
"epoch": 2.65,
"learning_rate": 5.391956273094951e-07,
"loss": 1.693,
"step": 2468
},
{
"epoch": 2.66,
"learning_rate": 5.376507786385263e-07,
"loss": 1.6739,
"step": 2470
},
{
"epoch": 2.66,
"learning_rate": 5.361073321247354e-07,
"loss": 1.6348,
"step": 2472
},
{
"epoch": 2.66,
"learning_rate": 5.345652924489027e-07,
"loss": 1.6836,
"step": 2474
},
{
"epoch": 2.66,
"learning_rate": 5.330246642875406e-07,
"loss": 1.7196,
"step": 2476
},
{
"epoch": 2.66,
"learning_rate": 5.31485452312881e-07,
"loss": 1.6465,
"step": 2478
},
{
"epoch": 2.67,
"learning_rate": 5.299476611928607e-07,
"loss": 1.715,
"step": 2480
},
{
"epoch": 2.67,
"learning_rate": 5.284112955911088e-07,
"loss": 1.6288,
"step": 2482
},
{
"epoch": 2.67,
"learning_rate": 5.268763601669299e-07,
"loss": 1.6751,
"step": 2484
},
{
"epoch": 2.67,
"learning_rate": 5.253428595752916e-07,
"loss": 1.6313,
"step": 2486
},
{
"epoch": 2.67,
"learning_rate": 5.238107984668105e-07,
"loss": 1.7374,
"step": 2488
},
{
"epoch": 2.68,
"learning_rate": 5.222801814877369e-07,
"loss": 1.7189,
"step": 2490
},
{
"epoch": 2.68,
"learning_rate": 5.207510132799436e-07,
"loss": 1.6231,
"step": 2492
},
{
"epoch": 2.68,
"learning_rate": 5.192232984809062e-07,
"loss": 1.7164,
"step": 2494
},
{
"epoch": 2.68,
"learning_rate": 5.17697041723696e-07,
"loss": 1.6455,
"step": 2496
},
{
"epoch": 2.69,
"learning_rate": 5.161722476369612e-07,
"loss": 1.7,
"step": 2498
},
{
"epoch": 2.69,
"learning_rate": 5.146489208449136e-07,
"loss": 1.67,
"step": 2500
},
{
"epoch": 2.69,
"learning_rate": 5.131270659673155e-07,
"loss": 1.6286,
"step": 2502
},
{
"epoch": 2.69,
"learning_rate": 5.116066876194662e-07,
"loss": 1.6824,
"step": 2504
},
{
"epoch": 2.69,
"learning_rate": 5.100877904121864e-07,
"loss": 1.7038,
"step": 2506
},
{
"epoch": 2.7,
"learning_rate": 5.085703789518049e-07,
"loss": 1.6302,
"step": 2508
},
{
"epoch": 2.7,
"learning_rate": 5.07054457840145e-07,
"loss": 1.6829,
"step": 2510
},
{
"epoch": 2.7,
"learning_rate": 5.055400316745095e-07,
"loss": 1.6355,
"step": 2512
},
{
"epoch": 2.7,
"learning_rate": 5.040271050476697e-07,
"loss": 1.6683,
"step": 2514
},
{
"epoch": 2.7,
"learning_rate": 5.02515682547846e-07,
"loss": 1.6439,
"step": 2516
},
{
"epoch": 2.71,
"learning_rate": 5.010057687587e-07,
"loss": 1.6893,
"step": 2518
},
{
"epoch": 2.71,
"learning_rate": 4.994973682593167e-07,
"loss": 1.6663,
"step": 2520
},
{
"epoch": 2.71,
"learning_rate": 4.97990485624192e-07,
"loss": 1.6837,
"step": 2522
},
{
"epoch": 2.71,
"learning_rate": 4.964851254232183e-07,
"loss": 1.6524,
"step": 2524
},
{
"epoch": 2.72,
"learning_rate": 4.949812922216713e-07,
"loss": 1.7032,
"step": 2526
},
{
"epoch": 2.72,
"learning_rate": 4.934789905801954e-07,
"loss": 1.6978,
"step": 2528
},
{
"epoch": 2.72,
"learning_rate": 4.919782250547911e-07,
"loss": 1.6881,
"step": 2530
},
{
"epoch": 2.72,
"learning_rate": 4.904790001967996e-07,
"loss": 1.671,
"step": 2532
},
{
"epoch": 2.72,
"learning_rate": 4.889813205528894e-07,
"loss": 1.7022,
"step": 2534
},
{
"epoch": 2.73,
"learning_rate": 4.874851906650448e-07,
"loss": 1.7007,
"step": 2536
},
{
"epoch": 2.73,
"learning_rate": 4.859906150705471e-07,
"loss": 1.6365,
"step": 2538
},
{
"epoch": 2.73,
"learning_rate": 4.844975983019668e-07,
"loss": 1.6813,
"step": 2540
},
{
"epoch": 2.73,
"learning_rate": 4.830061448871454e-07,
"loss": 1.6612,
"step": 2542
},
{
"epoch": 2.73,
"learning_rate": 4.815162593491838e-07,
"loss": 1.6307,
"step": 2544
},
{
"epoch": 2.74,
"learning_rate": 4.800279462064278e-07,
"loss": 1.6695,
"step": 2546
},
{
"epoch": 2.74,
"learning_rate": 4.785412099724546e-07,
"loss": 1.6348,
"step": 2548
},
{
"epoch": 2.74,
"learning_rate": 4.770560551560589e-07,
"loss": 1.6561,
"step": 2550
},
{
"epoch": 2.74,
"learning_rate": 4.7557248626124093e-07,
"loss": 1.6805,
"step": 2552
},
{
"epoch": 2.75,
"learning_rate": 4.740905077871894e-07,
"loss": 1.6929,
"step": 2554
},
{
"epoch": 2.75,
"learning_rate": 4.7261012422827074e-07,
"loss": 1.6704,
"step": 2556
},
{
"epoch": 2.75,
"learning_rate": 4.7113134007401443e-07,
"loss": 1.7108,
"step": 2558
},
{
"epoch": 2.75,
"learning_rate": 4.696541598090991e-07,
"loss": 1.6612,
"step": 2560
},
{
"epoch": 2.75,
"learning_rate": 4.681785879133402e-07,
"loss": 1.6299,
"step": 2562
},
{
"epoch": 2.76,
"learning_rate": 4.667046288616746e-07,
"loss": 1.6696,
"step": 2564
},
{
"epoch": 2.76,
"learning_rate": 4.652322871241483e-07,
"loss": 1.6444,
"step": 2566
},
{
"epoch": 2.76,
"learning_rate": 4.637615671659024e-07,
"loss": 1.6816,
"step": 2568
},
{
"epoch": 2.76,
"learning_rate": 4.6229247344715983e-07,
"loss": 1.6689,
"step": 2570
},
{
"epoch": 2.76,
"learning_rate": 4.60825010423211e-07,
"loss": 1.6677,
"step": 2572
},
{
"epoch": 2.77,
"learning_rate": 4.5935918254440274e-07,
"loss": 1.6505,
"step": 2574
},
{
"epoch": 2.77,
"learning_rate": 4.578949942561202e-07,
"loss": 1.6733,
"step": 2576
},
{
"epoch": 2.77,
"learning_rate": 4.5643244999877896e-07,
"loss": 1.68,
"step": 2578
},
{
"epoch": 2.77,
"learning_rate": 4.5497155420780696e-07,
"loss": 1.6563,
"step": 2580
},
{
"epoch": 2.78,
"learning_rate": 4.5351231131363333e-07,
"loss": 1.6426,
"step": 2582
},
{
"epoch": 2.78,
"learning_rate": 4.5205472574167567e-07,
"loss": 1.717,
"step": 2584
},
{
"epoch": 2.78,
"learning_rate": 4.505988019123228e-07,
"loss": 1.7117,
"step": 2586
},
{
"epoch": 2.78,
"learning_rate": 4.4914454424092696e-07,
"loss": 1.7123,
"step": 2588
},
{
"epoch": 2.78,
"learning_rate": 4.4769195713778554e-07,
"loss": 1.6705,
"step": 2590
},
{
"epoch": 2.79,
"learning_rate": 4.4624104500813033e-07,
"loss": 1.6447,
"step": 2592
},
{
"epoch": 2.79,
"learning_rate": 4.447918122521128e-07,
"loss": 1.681,
"step": 2594
},
{
"epoch": 2.79,
"learning_rate": 4.4334426326479336e-07,
"loss": 1.6716,
"step": 2596
},
{
"epoch": 2.79,
"learning_rate": 4.418984024361231e-07,
"loss": 1.6941,
"step": 2598
},
{
"epoch": 2.79,
"learning_rate": 4.40454234150936e-07,
"loss": 1.6666,
"step": 2600
},
{
"epoch": 2.8,
"learning_rate": 4.3901176278893194e-07,
"loss": 1.6906,
"step": 2602
},
{
"epoch": 2.8,
"learning_rate": 4.3757099272466445e-07,
"loss": 1.6618,
"step": 2604
},
{
"epoch": 2.8,
"learning_rate": 4.361319283275289e-07,
"loss": 1.6624,
"step": 2606
},
{
"epoch": 2.8,
"learning_rate": 4.3469457396174556e-07,
"loss": 1.6755,
"step": 2608
},
{
"epoch": 2.81,
"learning_rate": 4.332589339863512e-07,
"loss": 1.7124,
"step": 2610
},
{
"epoch": 2.81,
"learning_rate": 4.318250127551817e-07,
"loss": 1.6608,
"step": 2612
},
{
"epoch": 2.81,
"learning_rate": 4.303928146168614e-07,
"loss": 1.7228,
"step": 2614
},
{
"epoch": 2.81,
"learning_rate": 4.2896234391478815e-07,
"loss": 1.6907,
"step": 2616
},
{
"epoch": 2.81,
"learning_rate": 4.27533604987123e-07,
"loss": 1.6645,
"step": 2618
},
{
"epoch": 2.82,
"learning_rate": 4.2610660216677206e-07,
"loss": 1.6969,
"step": 2620
},
{
"epoch": 2.82,
"learning_rate": 4.246813397813794e-07,
"loss": 1.6414,
"step": 2622
},
{
"epoch": 2.82,
"learning_rate": 4.2325782215330897e-07,
"loss": 1.7107,
"step": 2624
},
{
"epoch": 2.82,
"learning_rate": 4.218360535996338e-07,
"loss": 1.7069,
"step": 2626
},
{
"epoch": 2.83,
"learning_rate": 4.2041603843212395e-07,
"loss": 1.6569,
"step": 2628
},
{
"epoch": 2.83,
"learning_rate": 4.1899778095722915e-07,
"loss": 1.7065,
"step": 2630
},
{
"epoch": 2.83,
"learning_rate": 4.1758128547607155e-07,
"loss": 1.6701,
"step": 2632
},
{
"epoch": 2.83,
"learning_rate": 4.16166556284428e-07,
"loss": 1.6951,
"step": 2634
},
{
"epoch": 2.83,
"learning_rate": 4.1475359767271934e-07,
"loss": 1.7141,
"step": 2636
},
{
"epoch": 2.84,
"learning_rate": 4.133424139259968e-07,
"loss": 1.6782,
"step": 2638
},
{
"epoch": 2.84,
"learning_rate": 4.119330093239287e-07,
"loss": 1.672,
"step": 2640
},
{
"epoch": 2.84,
"learning_rate": 4.1052538814078784e-07,
"loss": 1.6418,
"step": 2642
},
{
"epoch": 2.84,
"learning_rate": 4.0911955464543976e-07,
"loss": 1.6769,
"step": 2644
},
{
"epoch": 2.84,
"learning_rate": 4.077155131013258e-07,
"loss": 1.7021,
"step": 2646
},
{
"epoch": 2.85,
"learning_rate": 4.063132677664557e-07,
"loss": 1.6438,
"step": 2648
},
{
"epoch": 2.85,
"learning_rate": 4.049128228933902e-07,
"loss": 1.6945,
"step": 2650
},
{
"epoch": 2.85,
"learning_rate": 4.035141827292301e-07,
"loss": 1.6318,
"step": 2652
},
{
"epoch": 2.85,
"learning_rate": 4.0211735151560386e-07,
"loss": 1.7213,
"step": 2654
},
{
"epoch": 2.86,
"learning_rate": 4.0072233348865304e-07,
"loss": 1.7055,
"step": 2656
},
{
"epoch": 2.86,
"learning_rate": 3.993291328790208e-07,
"loss": 1.6711,
"step": 2658
},
{
"epoch": 2.86,
"learning_rate": 3.9793775391183846e-07,
"loss": 1.7406,
"step": 2660
},
{
"epoch": 2.86,
"learning_rate": 3.9654820080671314e-07,
"loss": 1.7186,
"step": 2662
},
{
"epoch": 2.86,
"learning_rate": 3.951604777777141e-07,
"loss": 1.6811,
"step": 2664
},
{
"epoch": 2.87,
"learning_rate": 3.9377458903336223e-07,
"loss": 1.679,
"step": 2666
},
{
"epoch": 2.87,
"learning_rate": 3.92390538776613e-07,
"loss": 1.6272,
"step": 2668
},
{
"epoch": 2.87,
"learning_rate": 3.9100833120484876e-07,
"loss": 1.639,
"step": 2670
},
{
"epoch": 2.87,
"learning_rate": 3.896279705098623e-07,
"loss": 1.6719,
"step": 2672
},
{
"epoch": 2.87,
"learning_rate": 3.8824946087784536e-07,
"loss": 1.6864,
"step": 2674
},
{
"epoch": 2.88,
"learning_rate": 3.8687280648937703e-07,
"loss": 1.6651,
"step": 2676
},
{
"epoch": 2.88,
"learning_rate": 3.8549801151940906e-07,
"loss": 1.7015,
"step": 2678
},
{
"epoch": 2.88,
"learning_rate": 3.841250801372544e-07,
"loss": 1.6805,
"step": 2680
},
{
"epoch": 2.88,
"learning_rate": 3.827540165065746e-07,
"loss": 1.6918,
"step": 2682
},
{
"epoch": 2.89,
"learning_rate": 3.813848247853665e-07,
"loss": 1.6806,
"step": 2684
},
{
"epoch": 2.89,
"learning_rate": 3.800175091259501e-07,
"loss": 1.6735,
"step": 2686
},
{
"epoch": 2.89,
"learning_rate": 3.786520736749571e-07,
"loss": 1.7098,
"step": 2688
},
{
"epoch": 2.89,
"learning_rate": 3.7728852257331467e-07,
"loss": 1.6358,
"step": 2690
},
{
"epoch": 2.89,
"learning_rate": 3.75926859956238e-07,
"loss": 1.6875,
"step": 2692
},
{
"epoch": 2.9,
"learning_rate": 3.7456708995321327e-07,
"loss": 1.6994,
"step": 2694
},
{
"epoch": 2.9,
"learning_rate": 3.7320921668798775e-07,
"loss": 1.6525,
"step": 2696
},
{
"epoch": 2.9,
"learning_rate": 3.7185324427855647e-07,
"loss": 1.7098,
"step": 2698
},
{
"epoch": 2.9,
"learning_rate": 3.7049917683714915e-07,
"loss": 1.6688,
"step": 2700
},
{
"epoch": 2.9,
"learning_rate": 3.691470184702197e-07,
"loss": 1.6341,
"step": 2702
},
{
"epoch": 2.91,
"learning_rate": 3.6779677327843105e-07,
"loss": 1.6446,
"step": 2704
},
{
"epoch": 2.91,
"learning_rate": 3.664484453566449e-07,
"loss": 1.6291,
"step": 2706
},
{
"epoch": 2.91,
"learning_rate": 3.6510203879390756e-07,
"loss": 1.6933,
"step": 2708
},
{
"epoch": 2.91,
"learning_rate": 3.6375755767344043e-07,
"loss": 1.6932,
"step": 2710
},
{
"epoch": 2.92,
"learning_rate": 3.624150060726227e-07,
"loss": 1.6898,
"step": 2712
},
{
"epoch": 2.92,
"learning_rate": 3.6107438806298487e-07,
"loss": 1.6837,
"step": 2714
},
{
"epoch": 2.92,
"learning_rate": 3.5973570771019155e-07,
"loss": 1.7272,
"step": 2716
},
{
"epoch": 2.92,
"learning_rate": 3.583989690740321e-07,
"loss": 1.6672,
"step": 2718
},
{
"epoch": 2.92,
"learning_rate": 3.570641762084066e-07,
"loss": 1.6944,
"step": 2720
},
{
"epoch": 2.93,
"learning_rate": 3.5573133316131445e-07,
"loss": 1.6733,
"step": 2722
},
{
"epoch": 2.93,
"learning_rate": 3.544004439748418e-07,
"loss": 1.7539,
"step": 2724
},
{
"epoch": 2.93,
"learning_rate": 3.5307151268515024e-07,
"loss": 1.6343,
"step": 2726
},
{
"epoch": 2.93,
"learning_rate": 3.517445433224623e-07,
"loss": 1.6285,
"step": 2728
},
{
"epoch": 2.93,
"learning_rate": 3.5041953991105154e-07,
"loss": 1.7435,
"step": 2730
},
{
"epoch": 2.94,
"learning_rate": 3.4909650646922894e-07,
"loss": 1.6805,
"step": 2732
},
{
"epoch": 2.94,
"learning_rate": 3.4777544700933114e-07,
"loss": 1.6832,
"step": 2734
},
{
"epoch": 2.94,
"learning_rate": 3.464563655377094e-07,
"loss": 1.6731,
"step": 2736
},
{
"epoch": 2.94,
"learning_rate": 3.45139266054715e-07,
"loss": 1.6178,
"step": 2738
},
{
"epoch": 2.95,
"learning_rate": 3.43824152554689e-07,
"loss": 1.6611,
"step": 2740
},
{
"epoch": 2.95,
"learning_rate": 3.4251102902594985e-07,
"loss": 1.6671,
"step": 2742
},
{
"epoch": 2.95,
"learning_rate": 3.411998994507808e-07,
"loss": 1.6669,
"step": 2744
},
{
"epoch": 2.95,
"learning_rate": 3.398907678054177e-07,
"loss": 1.6837,
"step": 2746
},
{
"epoch": 2.95,
"learning_rate": 3.385836380600384e-07,
"loss": 1.6484,
"step": 2748
},
{
"epoch": 2.96,
"learning_rate": 3.3727851417874875e-07,
"loss": 1.6734,
"step": 2750
},
{
"epoch": 2.96,
"learning_rate": 3.359754001195716e-07,
"loss": 1.6938,
"step": 2752
},
{
"epoch": 2.96,
"learning_rate": 3.3467429983443476e-07,
"loss": 1.7249,
"step": 2754
},
{
"epoch": 2.96,
"learning_rate": 3.3337521726915853e-07,
"loss": 1.6563,
"step": 2756
},
{
"epoch": 2.96,
"learning_rate": 3.320781563634455e-07,
"loss": 1.6845,
"step": 2758
},
{
"epoch": 2.97,
"learning_rate": 3.307831210508648e-07,
"loss": 1.6449,
"step": 2760
},
{
"epoch": 2.97,
"learning_rate": 3.2949011525884497e-07,
"loss": 1.709,
"step": 2762
},
{
"epoch": 2.97,
"learning_rate": 3.2819914290865835e-07,
"loss": 1.7084,
"step": 2764
},
{
"epoch": 2.97,
"learning_rate": 3.269102079154107e-07,
"loss": 1.6734,
"step": 2766
},
{
"epoch": 2.98,
"learning_rate": 3.25623314188029e-07,
"loss": 1.6561,
"step": 2768
},
{
"epoch": 2.98,
"learning_rate": 3.2433846562925103e-07,
"loss": 1.7016,
"step": 2770
},
{
"epoch": 2.98,
"learning_rate": 3.2305566613560964e-07,
"loss": 1.6527,
"step": 2772
},
{
"epoch": 2.98,
"learning_rate": 3.217749195974262e-07,
"loss": 1.7127,
"step": 2774
},
{
"epoch": 2.98,
"learning_rate": 3.204962298987944e-07,
"loss": 1.662,
"step": 2776
},
{
"epoch": 2.99,
"learning_rate": 3.1921960091757073e-07,
"loss": 1.6959,
"step": 2778
},
{
"epoch": 2.99,
"learning_rate": 3.17945036525363e-07,
"loss": 1.68,
"step": 2780
},
{
"epoch": 2.99,
"learning_rate": 3.166725405875157e-07,
"loss": 1.6603,
"step": 2782
},
{
"epoch": 2.99,
"learning_rate": 3.154021169631026e-07,
"loss": 1.6363,
"step": 2784
},
{
"epoch": 2.99,
"learning_rate": 3.1413376950491166e-07,
"loss": 1.6702,
"step": 2786
},
{
"epoch": 3.0,
"learning_rate": 3.128675020594347e-07,
"loss": 1.6898,
"step": 2788
},
{
"epoch": 3.0,
"learning_rate": 3.1160331846685526e-07,
"loss": 1.688,
"step": 2790
},
{
"epoch": 3.0,
"learning_rate": 3.103412225610378e-07,
"loss": 1.6445,
"step": 2792
},
{
"epoch": 3.0,
"learning_rate": 3.090812181695146e-07,
"loss": 1.6745,
"step": 2794
},
{
"epoch": 3.01,
"learning_rate": 3.078233091134764e-07,
"loss": 1.6506,
"step": 2796
},
{
"epoch": 3.01,
"learning_rate": 3.065674992077584e-07,
"loss": 1.6474,
"step": 2798
},
{
"epoch": 3.01,
"learning_rate": 3.053137922608295e-07,
"loss": 1.661,
"step": 2800
},
{
"epoch": 3.01,
"learning_rate": 3.040621920747827e-07,
"loss": 1.6831,
"step": 2802
},
{
"epoch": 3.01,
"learning_rate": 3.028127024453193e-07,
"loss": 1.6901,
"step": 2804
},
{
"epoch": 3.02,
"learning_rate": 3.0156532716174243e-07,
"loss": 1.6924,
"step": 2806
},
{
"epoch": 3.02,
"learning_rate": 3.003200700069415e-07,
"loss": 1.6815,
"step": 2808
},
{
"epoch": 3.02,
"learning_rate": 2.9907693475738303e-07,
"loss": 1.6765,
"step": 2810
},
{
"epoch": 3.02,
"learning_rate": 2.978359251830981e-07,
"loss": 1.6304,
"step": 2812
},
{
"epoch": 3.02,
"learning_rate": 2.9659704504767157e-07,
"loss": 1.6442,
"step": 2814
},
{
"epoch": 3.03,
"learning_rate": 2.9536029810822994e-07,
"loss": 1.6585,
"step": 2816
},
{
"epoch": 3.03,
"learning_rate": 2.941256881154317e-07,
"loss": 1.6403,
"step": 2818
},
{
"epoch": 3.03,
"learning_rate": 2.9289321881345254e-07,
"loss": 1.6504,
"step": 2820
},
{
"epoch": 3.03,
"learning_rate": 2.916628939399779e-07,
"loss": 1.672,
"step": 2822
},
{
"epoch": 3.04,
"learning_rate": 2.904347172261897e-07,
"loss": 1.653,
"step": 2824
},
{
"epoch": 3.04,
"learning_rate": 2.8920869239675383e-07,
"loss": 1.6278,
"step": 2826
},
{
"epoch": 3.04,
"learning_rate": 2.879848231698119e-07,
"loss": 1.6327,
"step": 2828
},
{
"epoch": 3.04,
"learning_rate": 2.867631132569671e-07,
"loss": 1.6616,
"step": 2830
},
{
"epoch": 3.04,
"learning_rate": 2.855435663632746e-07,
"loss": 1.6865,
"step": 2832
},
{
"epoch": 3.05,
"learning_rate": 2.843261861872296e-07,
"loss": 1.6742,
"step": 2834
},
{
"epoch": 3.05,
"learning_rate": 2.8311097642075657e-07,
"loss": 1.6369,
"step": 2836
},
{
"epoch": 3.05,
"learning_rate": 2.8189794074919735e-07,
"loss": 1.6254,
"step": 2838
},
{
"epoch": 3.05,
"learning_rate": 2.8068708285130184e-07,
"loss": 1.6118,
"step": 2840
},
{
"epoch": 3.06,
"learning_rate": 2.7947840639921303e-07,
"loss": 1.677,
"step": 2842
},
{
"epoch": 3.06,
"learning_rate": 2.782719150584607e-07,
"loss": 1.6502,
"step": 2844
},
{
"epoch": 3.06,
"learning_rate": 2.770676124879464e-07,
"loss": 1.6279,
"step": 2846
},
{
"epoch": 3.06,
"learning_rate": 2.758655023399342e-07,
"loss": 1.615,
"step": 2848
},
{
"epoch": 3.06,
"learning_rate": 2.7466558826003996e-07,
"loss": 1.6452,
"step": 2850
},
{
"epoch": 3.07,
"learning_rate": 2.7346787388721835e-07,
"loss": 1.6349,
"step": 2852
},
{
"epoch": 3.07,
"learning_rate": 2.72272362853754e-07,
"loss": 1.7027,
"step": 2854
},
{
"epoch": 3.07,
"learning_rate": 2.710790587852491e-07,
"loss": 1.7175,
"step": 2856
},
{
"epoch": 3.07,
"learning_rate": 2.6988796530061265e-07,
"loss": 1.6837,
"step": 2858
},
{
"epoch": 3.07,
"learning_rate": 2.686990860120497e-07,
"loss": 1.678,
"step": 2860
},
{
"epoch": 3.08,
"learning_rate": 2.6751242452505163e-07,
"loss": 1.691,
"step": 2862
},
{
"epoch": 3.08,
"learning_rate": 2.6632798443838145e-07,
"loss": 1.6405,
"step": 2864
},
{
"epoch": 3.08,
"learning_rate": 2.651457693440677e-07,
"loss": 1.6452,
"step": 2866
},
{
"epoch": 3.08,
"learning_rate": 2.6396578282739015e-07,
"loss": 1.6385,
"step": 2868
},
{
"epoch": 3.09,
"learning_rate": 2.6278802846686966e-07,
"loss": 1.6936,
"step": 2870
},
{
"epoch": 3.09,
"learning_rate": 2.616125098342591e-07,
"loss": 1.6382,
"step": 2872
},
{
"epoch": 3.09,
"learning_rate": 2.604392304945291e-07,
"loss": 1.6935,
"step": 2874
},
{
"epoch": 3.09,
"learning_rate": 2.592681940058611e-07,
"loss": 1.6619,
"step": 2876
},
{
"epoch": 3.09,
"learning_rate": 2.580994039196337e-07,
"loss": 1.65,
"step": 2878
},
{
"epoch": 3.1,
"learning_rate": 2.5693286378041293e-07,
"loss": 1.7102,
"step": 2880
},
{
"epoch": 3.1,
"learning_rate": 2.5576857712594135e-07,
"loss": 1.6367,
"step": 2882
},
{
"epoch": 3.1,
"learning_rate": 2.5460654748712864e-07,
"loss": 1.6511,
"step": 2884
},
{
"epoch": 3.1,
"learning_rate": 2.534467783880373e-07,
"loss": 1.6729,
"step": 2886
},
{
"epoch": 3.1,
"learning_rate": 2.522892733458769e-07,
"loss": 1.7258,
"step": 2888
},
{
"epoch": 3.11,
"learning_rate": 2.5113403587098913e-07,
"loss": 1.6821,
"step": 2890
},
{
"epoch": 3.11,
"learning_rate": 2.499810694668396e-07,
"loss": 1.6606,
"step": 2892
},
{
"epoch": 3.11,
"learning_rate": 2.4883037763000635e-07,
"loss": 1.6669,
"step": 2894
},
{
"epoch": 3.11,
"learning_rate": 2.476819638501689e-07,
"loss": 1.6648,
"step": 2896
},
{
"epoch": 3.12,
"learning_rate": 2.465358316100994e-07,
"loss": 1.6439,
"step": 2898
},
{
"epoch": 3.12,
"learning_rate": 2.4539198438564944e-07,
"loss": 1.6422,
"step": 2900
},
{
"epoch": 3.12,
"learning_rate": 2.4425042564574185e-07,
"loss": 1.731,
"step": 2902
},
{
"epoch": 3.12,
"learning_rate": 2.4311115885235843e-07,
"loss": 1.6503,
"step": 2904
},
{
"epoch": 3.12,
"learning_rate": 2.41974187460531e-07,
"loss": 1.6699,
"step": 2906
},
{
"epoch": 3.13,
"learning_rate": 2.408395149183294e-07,
"loss": 1.672,
"step": 2908
},
{
"epoch": 3.13,
"learning_rate": 2.397071446668528e-07,
"loss": 1.6862,
"step": 2910
},
{
"epoch": 3.13,
"learning_rate": 2.3857708014021736e-07,
"loss": 1.6478,
"step": 2912
},
{
"epoch": 3.13,
"learning_rate": 2.3744932476554714e-07,
"loss": 1.6619,
"step": 2914
},
{
"epoch": 3.13,
"learning_rate": 2.3632388196296294e-07,
"loss": 1.712,
"step": 2916
},
{
"epoch": 3.14,
"learning_rate": 2.3520075514557235e-07,
"loss": 1.6427,
"step": 2918
},
{
"epoch": 3.14,
"learning_rate": 2.3407994771946016e-07,
"loss": 1.6813,
"step": 2920
},
{
"epoch": 3.14,
"learning_rate": 2.3296146308367593e-07,
"loss": 1.6614,
"step": 2922
},
{
"epoch": 3.14,
"learning_rate": 2.3184530463022577e-07,
"loss": 1.664,
"step": 2924
},
{
"epoch": 3.15,
"learning_rate": 2.3073147574406083e-07,
"loss": 1.6342,
"step": 2926
},
{
"epoch": 3.15,
"learning_rate": 2.2961997980306745e-07,
"loss": 1.6329,
"step": 2928
},
{
"epoch": 3.15,
"learning_rate": 2.28510820178057e-07,
"loss": 1.6314,
"step": 2930
},
{
"epoch": 3.15,
"learning_rate": 2.274040002327562e-07,
"loss": 1.6135,
"step": 2932
},
{
"epoch": 3.15,
"learning_rate": 2.2629952332379444e-07,
"loss": 1.6362,
"step": 2934
},
{
"epoch": 3.16,
"learning_rate": 2.2519739280069762e-07,
"loss": 1.633,
"step": 2936
},
{
"epoch": 3.16,
"learning_rate": 2.240976120058745e-07,
"loss": 1.6842,
"step": 2938
},
{
"epoch": 3.16,
"learning_rate": 2.2300018427460809e-07,
"loss": 1.6551,
"step": 2940
},
{
"epoch": 3.16,
"learning_rate": 2.219051129350451e-07,
"loss": 1.645,
"step": 2942
},
{
"epoch": 3.16,
"learning_rate": 2.208124013081869e-07,
"loss": 1.6249,
"step": 2944
},
{
"epoch": 3.17,
"learning_rate": 2.197220527078778e-07,
"loss": 1.645,
"step": 2946
},
{
"epoch": 3.17,
"learning_rate": 2.1863407044079606e-07,
"loss": 1.6616,
"step": 2948
},
{
"epoch": 3.17,
"learning_rate": 2.175484578064436e-07,
"loss": 1.638,
"step": 2950
},
{
"epoch": 3.17,
"learning_rate": 2.164652180971358e-07,
"loss": 1.6651,
"step": 2952
},
{
"epoch": 3.18,
"learning_rate": 2.1538435459799264e-07,
"loss": 1.6273,
"step": 2954
},
{
"epoch": 3.18,
"learning_rate": 2.1430587058692606e-07,
"loss": 1.6759,
"step": 2956
},
{
"epoch": 3.18,
"learning_rate": 2.1322976933463354e-07,
"loss": 1.6511,
"step": 2958
},
{
"epoch": 3.18,
"learning_rate": 2.121560541045856e-07,
"loss": 1.6723,
"step": 2960
},
{
"epoch": 3.18,
"learning_rate": 2.110847281530167e-07,
"loss": 1.6751,
"step": 2962
},
{
"epoch": 3.19,
"learning_rate": 2.100157947289155e-07,
"loss": 1.6742,
"step": 2964
},
{
"epoch": 3.19,
"learning_rate": 2.0894925707401488e-07,
"loss": 1.6711,
"step": 2966
},
{
"epoch": 3.19,
"learning_rate": 2.0788511842278177e-07,
"loss": 1.6633,
"step": 2968
},
{
"epoch": 3.19,
"learning_rate": 2.0682338200240878e-07,
"loss": 1.6559,
"step": 2970
},
{
"epoch": 3.19,
"learning_rate": 2.0576405103280213e-07,
"loss": 1.6424,
"step": 2972
},
{
"epoch": 3.2,
"learning_rate": 2.0470712872657348e-07,
"loss": 1.6524,
"step": 2974
},
{
"epoch": 3.2,
"learning_rate": 2.0365261828903035e-07,
"loss": 1.68,
"step": 2976
},
{
"epoch": 3.2,
"learning_rate": 2.0260052291816443e-07,
"loss": 1.6301,
"step": 2978
},
{
"epoch": 3.2,
"learning_rate": 2.0155084580464498e-07,
"loss": 1.6836,
"step": 2980
},
{
"epoch": 3.21,
"learning_rate": 2.005035901318063e-07,
"loss": 1.6594,
"step": 2982
},
{
"epoch": 3.21,
"learning_rate": 1.9945875907563968e-07,
"loss": 1.672,
"step": 2984
},
{
"epoch": 3.21,
"learning_rate": 1.9841635580478322e-07,
"loss": 1.688,
"step": 2986
},
{
"epoch": 3.21,
"learning_rate": 1.9737638348051233e-07,
"loss": 1.6405,
"step": 2988
},
{
"epoch": 3.21,
"learning_rate": 1.9633884525672983e-07,
"loss": 1.6533,
"step": 2990
},
{
"epoch": 3.22,
"learning_rate": 1.9530374427995766e-07,
"loss": 1.637,
"step": 2992
},
{
"epoch": 3.22,
"learning_rate": 1.9427108368932533e-07,
"loss": 1.6396,
"step": 2994
},
{
"epoch": 3.22,
"learning_rate": 1.9324086661656168e-07,
"loss": 1.6993,
"step": 2996
},
{
"epoch": 3.22,
"learning_rate": 1.9221309618598602e-07,
"loss": 1.7117,
"step": 2998
},
{
"epoch": 3.22,
"learning_rate": 1.9118777551449595e-07,
"loss": 1.6908,
"step": 3000
},
{
"epoch": 3.23,
"learning_rate": 1.901649077115617e-07,
"loss": 1.6728,
"step": 3002
},
{
"epoch": 3.23,
"learning_rate": 1.8914449587921367e-07,
"loss": 1.662,
"step": 3004
},
{
"epoch": 3.23,
"learning_rate": 1.8812654311203412e-07,
"loss": 1.6658,
"step": 3006
},
{
"epoch": 3.23,
"learning_rate": 1.8711105249714798e-07,
"loss": 1.698,
"step": 3008
},
{
"epoch": 3.24,
"learning_rate": 1.866042314595e-07,
"loss": 1.65,
"step": 3010
},
{
"epoch": 3.24,
"learning_rate": 1.8559243984507645e-07,
"loss": 1.6631,
"step": 3012
},
{
"epoch": 3.24,
"learning_rate": 1.845831180680706e-07,
"loss": 1.6182,
"step": 3014
},
{
"epoch": 3.24,
"learning_rate": 1.8357626918943204e-07,
"loss": 1.6959,
"step": 3016
},
{
"epoch": 3.24,
"learning_rate": 1.8257189626261105e-07,
"loss": 1.6473,
"step": 3018
},
{
"epoch": 3.25,
"learning_rate": 1.8157000233354915e-07,
"loss": 1.6782,
"step": 3020
},
{
"epoch": 3.25,
"learning_rate": 1.8106998594297917e-07,
"loss": 1.6507,
"step": 3022
},
{
"epoch": 3.25,
"learning_rate": 1.8007181620524804e-07,
"loss": 1.6444,
"step": 3024
},
{
"epoch": 3.25,
"learning_rate": 1.7907613304721903e-07,
"loss": 1.6327,
"step": 3026
},
{
"epoch": 3.26,
"learning_rate": 1.780829394884794e-07,
"loss": 1.6667,
"step": 3028
},
{
"epoch": 3.26,
"learning_rate": 1.7709223854106802e-07,
"loss": 1.6786,
"step": 3030
},
{
"epoch": 3.26,
"learning_rate": 1.7610403320946353e-07,
"loss": 1.6811,
"step": 3032
},
{
"epoch": 3.26,
"learning_rate": 1.7511832649057624e-07,
"loss": 1.6612,
"step": 3034
},
{
"epoch": 3.26,
"learning_rate": 1.7413512137373897e-07,
"loss": 1.6821,
"step": 3036
},
{
"epoch": 3.27,
"learning_rate": 1.7315442084069865e-07,
"loss": 1.6305,
"step": 3038
},
{
"epoch": 3.27,
"learning_rate": 1.7217622786560525e-07,
"loss": 1.6646,
"step": 3040
},
{
"epoch": 3.27,
"learning_rate": 1.712005454150055e-07,
"loss": 1.6486,
"step": 3042
},
{
"epoch": 3.27,
"learning_rate": 1.702273764478318e-07,
"loss": 1.6482,
"step": 3044
},
{
"epoch": 3.27,
"learning_rate": 1.6925672391539382e-07,
"loss": 1.6928,
"step": 3046
},
{
"epoch": 3.28,
"learning_rate": 1.682885907613707e-07,
"loss": 1.7189,
"step": 3048
},
{
"epoch": 3.28,
"learning_rate": 1.6732297992179933e-07,
"loss": 1.6629,
"step": 3050
},
{
"epoch": 3.28,
"learning_rate": 1.6635989432506904e-07,
"loss": 1.6371,
"step": 3052
},
{
"epoch": 3.28,
"learning_rate": 1.6539933689190988e-07,
"loss": 1.7218,
"step": 3054
},
{
"epoch": 3.29,
"learning_rate": 1.6444131053538512e-07,
"loss": 1.6245,
"step": 3056
},
{
"epoch": 3.29,
"learning_rate": 1.634858181608816e-07,
"loss": 1.6936,
"step": 3058
},
{
"epoch": 3.29,
"learning_rate": 1.6253286266610278e-07,
"loss": 1.6722,
"step": 3060
},
{
"epoch": 3.29,
"learning_rate": 1.615824469410565e-07,
"loss": 1.6761,
"step": 3062
},
{
"epoch": 3.29,
"learning_rate": 1.6063457386805003e-07,
"loss": 1.6712,
"step": 3064
},
{
"epoch": 3.3,
"learning_rate": 1.596892463216789e-07,
"loss": 1.6428,
"step": 3066
},
{
"epoch": 3.3,
"learning_rate": 1.5874646716881868e-07,
"loss": 1.6976,
"step": 3068
},
{
"epoch": 3.3,
"learning_rate": 1.5780623926861736e-07,
"loss": 1.6576,
"step": 3070
},
{
"epoch": 3.3,
"learning_rate": 1.5686856547248428e-07,
"loss": 1.6432,
"step": 3072
},
{
"epoch": 3.3,
"learning_rate": 1.5593344862408454e-07,
"loss": 1.6876,
"step": 3074
},
{
"epoch": 3.31,
"learning_rate": 1.5500089155932804e-07,
"loss": 1.6723,
"step": 3076
},
{
"epoch": 3.31,
"learning_rate": 1.540708971063618e-07,
"loss": 1.6702,
"step": 3078
},
{
"epoch": 3.31,
"learning_rate": 1.5314346808556111e-07,
"loss": 1.7136,
"step": 3080
},
{
"epoch": 3.31,
"learning_rate": 1.522186073095215e-07,
"loss": 1.685,
"step": 3082
},
{
"epoch": 3.32,
"learning_rate": 1.512963175830494e-07,
"loss": 1.6599,
"step": 3084
},
{
"epoch": 3.32,
"learning_rate": 1.503766017031547e-07,
"loss": 1.639,
"step": 3086
},
{
"epoch": 3.32,
"learning_rate": 1.4945946245904095e-07,
"loss": 1.6334,
"step": 3088
},
{
"epoch": 3.32,
"learning_rate": 1.4854490263209797e-07,
"loss": 1.6169,
"step": 3090
},
{
"epoch": 3.32,
"learning_rate": 1.4763292499589298e-07,
"loss": 1.6248,
"step": 3092
},
{
"epoch": 3.33,
"learning_rate": 1.4672353231616186e-07,
"loss": 1.6857,
"step": 3094
},
{
"epoch": 3.33,
"learning_rate": 1.4581672735080198e-07,
"loss": 1.6417,
"step": 3096
},
{
"epoch": 3.33,
"learning_rate": 1.4491251284986227e-07,
"loss": 1.7102,
"step": 3098
},
{
"epoch": 3.33,
"learning_rate": 1.440108915555358e-07,
"loss": 1.6613,
"step": 3100
},
{
"epoch": 3.33,
"learning_rate": 1.4311186620215154e-07,
"loss": 1.7211,
"step": 3102
},
{
"epoch": 3.34,
"learning_rate": 1.4221543951616532e-07,
"loss": 1.6401,
"step": 3104
},
{
"epoch": 3.34,
"learning_rate": 1.413216142161523e-07,
"loss": 1.6696,
"step": 3106
},
{
"epoch": 3.34,
"learning_rate": 1.4043039301279903e-07,
"loss": 1.7063,
"step": 3108
},
{
"epoch": 3.34,
"learning_rate": 1.3954177860889327e-07,
"loss": 1.6578,
"step": 3110
},
{
"epoch": 3.35,
"learning_rate": 1.3865577369931868e-07,
"loss": 1.6273,
"step": 3112
},
{
"epoch": 3.35,
"learning_rate": 1.3777238097104426e-07,
"loss": 1.6556,
"step": 3114
},
{
"epoch": 3.35,
"learning_rate": 1.368916031031172e-07,
"loss": 1.6406,
"step": 3116
},
{
"epoch": 3.35,
"learning_rate": 1.3601344276665527e-07,
"loss": 1.6864,
"step": 3118
},
{
"epoch": 3.35,
"learning_rate": 1.3513790262483738e-07,
"loss": 1.6016,
"step": 3120
},
{
"epoch": 3.36,
"learning_rate": 1.3426498533289654e-07,
"loss": 1.6372,
"step": 3122
},
{
"epoch": 3.36,
"learning_rate": 1.3339469353811138e-07,
"loss": 1.6766,
"step": 3124
},
{
"epoch": 3.36,
"learning_rate": 1.3252702987979836e-07,
"loss": 1.6493,
"step": 3126
},
{
"epoch": 3.36,
"learning_rate": 1.3166199698930337e-07,
"loss": 1.7053,
"step": 3128
},
{
"epoch": 3.36,
"learning_rate": 1.3079959748999493e-07,
"loss": 1.6686,
"step": 3130
},
{
"epoch": 3.37,
"learning_rate": 1.2993983399725372e-07,
"loss": 1.6379,
"step": 3132
},
{
"epoch": 3.37,
"learning_rate": 1.2908270911846785e-07,
"loss": 1.6551,
"step": 3134
},
{
"epoch": 3.37,
"learning_rate": 1.282282254530226e-07,
"loss": 1.6568,
"step": 3136
},
{
"epoch": 3.37,
"learning_rate": 1.2737638559229314e-07,
"loss": 1.6266,
"step": 3138
},
{
"epoch": 3.38,
"learning_rate": 1.2652719211963725e-07,
"loss": 1.6982,
"step": 3140
},
{
"epoch": 3.38,
"learning_rate": 1.2568064761038665e-07,
"loss": 1.6939,
"step": 3142
},
{
"epoch": 3.38,
"learning_rate": 1.2483675463184018e-07,
"loss": 1.6788,
"step": 3144
},
{
"epoch": 3.38,
"learning_rate": 1.2399551574325496e-07,
"loss": 1.6979,
"step": 3146
},
{
"epoch": 3.38,
"learning_rate": 1.2315693349583923e-07,
"loss": 1.6756,
"step": 3148
},
{
"epoch": 3.39,
"learning_rate": 1.2232101043274435e-07,
"loss": 1.6593,
"step": 3150
},
{
"epoch": 3.39,
"learning_rate": 1.2148774908905778e-07,
"loss": 1.6466,
"step": 3152
},
{
"epoch": 3.39,
"learning_rate": 1.2065715199179383e-07,
"loss": 1.6645,
"step": 3154
},
{
"epoch": 3.39,
"learning_rate": 1.1982922165988807e-07,
"loss": 1.686,
"step": 3156
},
{
"epoch": 3.39,
"learning_rate": 1.1900396060418794e-07,
"loss": 1.6871,
"step": 3158
},
{
"epoch": 3.4,
"learning_rate": 1.1818137132744621e-07,
"loss": 1.6692,
"step": 3160
},
{
"epoch": 3.4,
"learning_rate": 1.173614563243126e-07,
"loss": 1.6918,
"step": 3162
},
{
"epoch": 3.4,
"learning_rate": 1.1654421808132686e-07,
"loss": 1.6722,
"step": 3164
},
{
"epoch": 3.4,
"learning_rate": 1.1572965907691124e-07,
"loss": 1.6424,
"step": 3166
},
{
"epoch": 3.41,
"learning_rate": 1.1491778178136224e-07,
"loss": 1.6632,
"step": 3168
},
{
"epoch": 3.41,
"learning_rate": 1.141085886568437e-07,
"loss": 1.6738,
"step": 3170
},
{
"epoch": 3.41,
"learning_rate": 1.1330208215737935e-07,
"loss": 1.6415,
"step": 3172
},
{
"epoch": 3.41,
"learning_rate": 1.1249826472884571e-07,
"loss": 1.7036,
"step": 3174
},
{
"epoch": 3.41,
"learning_rate": 1.1169713880896281e-07,
"loss": 1.6395,
"step": 3176
},
{
"epoch": 3.42,
"learning_rate": 1.1089870682728985e-07,
"loss": 1.6563,
"step": 3178
},
{
"epoch": 3.42,
"learning_rate": 1.1010297120521528e-07,
"loss": 1.6361,
"step": 3180
},
{
"epoch": 3.42,
"learning_rate": 1.0930993435595026e-07,
"loss": 1.6285,
"step": 3182
},
{
"epoch": 3.42,
"learning_rate": 1.0851959868452198e-07,
"loss": 1.6754,
"step": 3184
},
{
"epoch": 3.42,
"learning_rate": 1.0773196658776529e-07,
"loss": 1.6357,
"step": 3186
},
{
"epoch": 3.43,
"learning_rate": 1.0694704045431602e-07,
"loss": 1.6388,
"step": 3188
},
{
"epoch": 3.43,
"learning_rate": 1.0616482266460447e-07,
"loss": 1.6697,
"step": 3190
},
{
"epoch": 3.43,
"learning_rate": 1.0538531559084641e-07,
"loss": 1.7182,
"step": 3192
},
{
"epoch": 3.43,
"learning_rate": 1.0460852159703715e-07,
"loss": 1.6484,
"step": 3194
},
{
"epoch": 3.44,
"learning_rate": 1.038344430389445e-07,
"loss": 1.7152,
"step": 3196
},
{
"epoch": 3.44,
"learning_rate": 1.0306308226410054e-07,
"loss": 1.7203,
"step": 3198
},
{
"epoch": 3.44,
"learning_rate": 1.0229444161179612e-07,
"loss": 1.6617,
"step": 3200
},
{
"epoch": 3.44,
"learning_rate": 1.015285234130716e-07,
"loss": 1.6696,
"step": 3202
},
{
"epoch": 3.44,
"learning_rate": 1.0076532999071219e-07,
"loss": 1.6612,
"step": 3204
},
{
"epoch": 3.45,
"learning_rate": 1.000048636592391e-07,
"loss": 1.7077,
"step": 3206
},
{
"epoch": 3.45,
"learning_rate": 9.924712672490331e-08,
"loss": 1.6499,
"step": 3208
},
{
"epoch": 3.45,
"learning_rate": 9.849212148567798e-08,
"loss": 1.6717,
"step": 3210
},
{
"epoch": 3.45,
"learning_rate": 9.773985023125308e-08,
"loss": 1.6788,
"step": 3212
},
{
"epoch": 3.45,
"learning_rate": 9.69903152430257e-08,
"loss": 1.6249,
"step": 3214
},
{
"epoch": 3.46,
"learning_rate": 9.624351879409598e-08,
"loss": 1.6898,
"step": 3216
},
{
"epoch": 3.46,
"learning_rate": 9.549946314925839e-08,
"loss": 1.641,
"step": 3218
},
{
"epoch": 3.46,
"learning_rate": 9.475815056499526e-08,
"loss": 1.635,
"step": 3220
},
{
"epoch": 3.46,
"learning_rate": 9.401958328947102e-08,
"loss": 1.6742,
"step": 3222
},
{
"epoch": 3.47,
"learning_rate": 9.328376356252288e-08,
"loss": 1.6689,
"step": 3224
},
{
"epoch": 3.47,
"learning_rate": 9.255069361565715e-08,
"loss": 1.7212,
"step": 3226
},
{
"epoch": 3.47,
"learning_rate": 9.182037567204016e-08,
"loss": 1.6297,
"step": 3228
},
{
"epoch": 3.47,
"learning_rate": 9.109281194649243e-08,
"loss": 1.6644,
"step": 3230
},
{
"epoch": 3.47,
"learning_rate": 9.036800464548156e-08,
"loss": 1.7154,
"step": 3232
},
{
"epoch": 3.48,
"learning_rate": 8.964595596711667e-08,
"loss": 1.7058,
"step": 3234
},
{
"epoch": 3.48,
"learning_rate": 8.892666810113958e-08,
"loss": 1.6416,
"step": 3236
},
{
"epoch": 3.48,
"learning_rate": 8.821014322892051e-08,
"loss": 1.649,
"step": 3238
},
{
"epoch": 3.48,
"learning_rate": 8.749638352345001e-08,
"loss": 1.6482,
"step": 3240
},
{
"epoch": 3.49,
"learning_rate": 8.678539114933259e-08,
"loss": 1.6535,
"step": 3242
},
{
"epoch": 3.49,
"learning_rate": 8.607716826278089e-08,
"loss": 1.6195,
"step": 3244
},
{
"epoch": 3.49,
"learning_rate": 8.537171701160762e-08,
"loss": 1.6657,
"step": 3246
},
{
"epoch": 3.49,
"learning_rate": 8.466903953522109e-08,
"loss": 1.6363,
"step": 3248
},
{
"epoch": 3.49,
"learning_rate": 8.396913796461703e-08,
"loss": 1.6807,
"step": 3250
},
{
"epoch": 3.5,
"learning_rate": 8.327201442237274e-08,
"loss": 1.6893,
"step": 3252
},
{
"epoch": 3.5,
"learning_rate": 8.257767102264079e-08,
"loss": 1.6344,
"step": 3254
},
{
"epoch": 3.5,
"learning_rate": 8.188610987114241e-08,
"loss": 1.6344,
"step": 3256
},
{
"epoch": 3.5,
"learning_rate": 8.119733306516108e-08,
"loss": 1.7071,
"step": 3258
},
{
"epoch": 3.5,
"learning_rate": 8.051134269353687e-08,
"loss": 1.6781,
"step": 3260
},
{
"epoch": 3.51,
"learning_rate": 7.982814083665823e-08,
"loss": 1.7103,
"step": 3262
},
{
"epoch": 3.51,
"learning_rate": 7.91477295664581e-08,
"loss": 1.6994,
"step": 3264
},
{
"epoch": 3.51,
"learning_rate": 7.847011094640633e-08,
"loss": 1.6686,
"step": 3266
},
{
"epoch": 3.51,
"learning_rate": 7.779528703150262e-08,
"loss": 1.6597,
"step": 3268
},
{
"epoch": 3.52,
"learning_rate": 7.71232598682724e-08,
"loss": 1.6923,
"step": 3270
},
{
"epoch": 3.52,
"learning_rate": 7.64540314947586e-08,
"loss": 1.7059,
"step": 3272
},
{
"epoch": 3.52,
"learning_rate": 7.578760394051687e-08,
"loss": 1.6724,
"step": 3274
},
{
"epoch": 3.52,
"learning_rate": 7.512397922660852e-08,
"loss": 1.6546,
"step": 3276
},
{
"epoch": 3.52,
"learning_rate": 7.446315936559488e-08,
"loss": 1.6656,
"step": 3278
},
{
"epoch": 3.53,
"learning_rate": 7.380514636153079e-08,
"loss": 1.6757,
"step": 3280
},
{
"epoch": 3.53,
"learning_rate": 7.314994220995974e-08,
"loss": 1.6955,
"step": 3282
},
{
"epoch": 3.53,
"learning_rate": 7.249754889790538e-08,
"loss": 1.7442,
"step": 3284
},
{
"epoch": 3.53,
"learning_rate": 7.184796840386809e-08,
"loss": 1.6814,
"step": 3286
},
{
"epoch": 3.53,
"learning_rate": 7.120120269781792e-08,
"loss": 1.7133,
"step": 3288
},
{
"epoch": 3.54,
"learning_rate": 7.05572537411876e-08,
"loss": 1.6284,
"step": 3290
},
{
"epoch": 3.54,
"learning_rate": 6.99161234868686e-08,
"loss": 1.6831,
"step": 3292
},
{
"epoch": 3.54,
"learning_rate": 6.927781387920362e-08,
"loss": 1.6694,
"step": 3294
},
{
"epoch": 3.54,
"learning_rate": 6.864232685398141e-08,
"loss": 1.7051,
"step": 3296
},
{
"epoch": 3.55,
"learning_rate": 6.800966433843048e-08,
"loss": 1.7096,
"step": 3298
},
{
"epoch": 3.55,
"learning_rate": 6.737982825121391e-08,
"loss": 1.6188,
"step": 3300
},
{
"epoch": 3.55,
"learning_rate": 6.67528205024227e-08,
"loss": 1.6744,
"step": 3302
},
{
"epoch": 3.55,
"learning_rate": 6.612864299357112e-08,
"loss": 1.5937,
"step": 3304
},
{
"epoch": 3.55,
"learning_rate": 6.550729761758899e-08,
"loss": 1.6218,
"step": 3306
},
{
"epoch": 3.56,
"learning_rate": 6.488878625881866e-08,
"loss": 1.6318,
"step": 3308
},
{
"epoch": 3.56,
"learning_rate": 6.427311079300668e-08,
"loss": 1.6842,
"step": 3310
},
{
"epoch": 3.56,
"learning_rate": 6.36602730872996e-08,
"loss": 1.6758,
"step": 3312
},
{
"epoch": 3.56,
"learning_rate": 6.30502750002384e-08,
"loss": 1.6575,
"step": 3314
},
{
"epoch": 3.56,
"learning_rate": 6.244311838175143e-08,
"loss": 1.6651,
"step": 3316
},
{
"epoch": 3.57,
"learning_rate": 6.183880507315075e-08,
"loss": 1.7021,
"step": 3318
},
{
"epoch": 3.57,
"learning_rate": 6.123733690712518e-08,
"loss": 1.6429,
"step": 3320
},
{
"epoch": 3.57,
"learning_rate": 6.063871570773493e-08,
"loss": 1.7242,
"step": 3322
},
{
"epoch": 3.57,
"learning_rate": 6.004294329040638e-08,
"loss": 1.6261,
"step": 3324
},
{
"epoch": 3.58,
"learning_rate": 5.9450021461927125e-08,
"loss": 1.6618,
"step": 3326
},
{
"epoch": 3.58,
"learning_rate": 5.885995202043847e-08,
"loss": 1.6459,
"step": 3328
},
{
"epoch": 3.58,
"learning_rate": 5.827273675543265e-08,
"loss": 1.6774,
"step": 3330
},
{
"epoch": 3.58,
"learning_rate": 5.7688377447745465e-08,
"loss": 1.678,
"step": 3332
},
{
"epoch": 3.58,
"learning_rate": 5.710687586955143e-08,
"loss": 1.6966,
"step": 3334
},
{
"epoch": 3.59,
"learning_rate": 5.652823378435911e-08,
"loss": 1.6546,
"step": 3336
},
{
"epoch": 3.59,
"learning_rate": 5.595245294700424e-08,
"loss": 1.6564,
"step": 3338
},
{
"epoch": 3.59,
"learning_rate": 5.5379535103646125e-08,
"loss": 1.6682,
"step": 3340
},
{
"epoch": 3.59,
"learning_rate": 5.4809481991761056e-08,
"loss": 1.7469,
"step": 3342
},
{
"epoch": 3.59,
"learning_rate": 5.4242295340137576e-08,
"loss": 1.6954,
"step": 3344
},
{
"epoch": 3.6,
"learning_rate": 5.36779768688711e-08,
"loss": 1.6665,
"step": 3346
},
{
"epoch": 3.6,
"learning_rate": 5.311652828935942e-08,
"loss": 1.6482,
"step": 3348
},
{
"epoch": 3.6,
"learning_rate": 5.2557951304295747e-08,
"loss": 1.6741,
"step": 3350
},
{
"epoch": 3.6,
"learning_rate": 5.2002247607665586e-08,
"loss": 1.6335,
"step": 3352
},
{
"epoch": 3.61,
"learning_rate": 5.14494188847403e-08,
"loss": 1.645,
"step": 3354
},
{
"epoch": 3.61,
"learning_rate": 5.0899466812072464e-08,
"loss": 1.6584,
"step": 3356
},
{
"epoch": 3.61,
"learning_rate": 5.035239305749062e-08,
"loss": 1.6438,
"step": 3358
},
{
"epoch": 3.61,
"learning_rate": 4.9808199280094055e-08,
"loss": 1.6484,
"step": 3360
},
{
"epoch": 3.61,
"learning_rate": 4.9266887130248734e-08,
"loss": 1.6708,
"step": 3362
},
{
"epoch": 3.62,
"learning_rate": 4.872845824958105e-08,
"loss": 1.6236,
"step": 3364
},
{
"epoch": 3.62,
"learning_rate": 4.819291427097327e-08,
"loss": 1.6732,
"step": 3366
},
{
"epoch": 3.62,
"learning_rate": 4.7660256818558783e-08,
"loss": 1.7199,
"step": 3368
},
{
"epoch": 3.62,
"learning_rate": 4.713048750771731e-08,
"loss": 1.7204,
"step": 3370
},
{
"epoch": 3.62,
"learning_rate": 4.6603607945069456e-08,
"loss": 1.6991,
"step": 3372
},
{
"epoch": 3.63,
"learning_rate": 4.6079619728472515e-08,
"loss": 1.6393,
"step": 3374
},
{
"epoch": 3.63,
"learning_rate": 4.555852444701447e-08,
"loss": 1.6464,
"step": 3376
},
{
"epoch": 3.63,
"learning_rate": 4.5040323681011074e-08,
"loss": 1.6666,
"step": 3378
},
{
"epoch": 3.63,
"learning_rate": 4.452501900199901e-08,
"loss": 1.6701,
"step": 3380
},
{
"epoch": 3.64,
"learning_rate": 4.401261197273254e-08,
"loss": 1.7052,
"step": 3382
},
{
"epoch": 3.64,
"learning_rate": 4.350310414717806e-08,
"loss": 1.6852,
"step": 3384
},
{
"epoch": 3.64,
"learning_rate": 4.299649707050979e-08,
"loss": 1.6899,
"step": 3386
},
{
"epoch": 3.64,
"learning_rate": 4.249279227910485e-08,
"loss": 1.6644,
"step": 3388
},
{
"epoch": 3.64,
"learning_rate": 4.199199130053854e-08,
"loss": 1.6361,
"step": 3390
},
{
"epoch": 3.65,
"learning_rate": 4.1494095653579974e-08,
"loss": 1.6708,
"step": 3392
},
{
"epoch": 3.65,
"learning_rate": 4.099910684818697e-08,
"loss": 1.6374,
"step": 3394
},
{
"epoch": 3.65,
"learning_rate": 4.050702638550274e-08,
"loss": 1.6507,
"step": 3396
},
{
"epoch": 3.65,
"learning_rate": 4.0017855757849105e-08,
"loss": 1.6768,
"step": 3398
},
{
"epoch": 3.65,
"learning_rate": 3.953159644872439e-08,
"loss": 1.6593,
"step": 3400
},
{
"epoch": 3.66,
"learning_rate": 3.9048249932797425e-08,
"loss": 1.6431,
"step": 3402
},
{
"epoch": 3.66,
"learning_rate": 3.856781767590334e-08,
"loss": 1.7,
"step": 3404
},
{
"epoch": 3.66,
"learning_rate": 3.809030113503919e-08,
"loss": 1.6935,
"step": 3406
},
{
"epoch": 3.66,
"learning_rate": 3.761570175836015e-08,
"loss": 1.647,
"step": 3408
},
{
"epoch": 3.67,
"learning_rate": 3.7144020985173994e-08,
"loss": 1.6749,
"step": 3410
},
{
"epoch": 3.67,
"learning_rate": 3.667526024593759e-08,
"loss": 1.6753,
"step": 3412
},
{
"epoch": 3.67,
"learning_rate": 3.6209420962252104e-08,
"loss": 1.6501,
"step": 3414
},
{
"epoch": 3.67,
"learning_rate": 3.574650454685901e-08,
"loss": 1.6958,
"step": 3416
},
{
"epoch": 3.67,
"learning_rate": 3.528651240363567e-08,
"loss": 1.6502,
"step": 3418
},
{
"epoch": 3.68,
"learning_rate": 3.482944592759085e-08,
"loss": 1.6681,
"step": 3420
},
{
"epoch": 3.68,
"learning_rate": 3.437530650486098e-08,
"loss": 1.6767,
"step": 3422
},
{
"epoch": 3.68,
"learning_rate": 3.3924095512705477e-08,
"loss": 1.6495,
"step": 3424
},
{
"epoch": 3.68,
"learning_rate": 3.347581431950286e-08,
"loss": 1.6932,
"step": 3426
},
{
"epoch": 3.69,
"learning_rate": 3.303046428474643e-08,
"loss": 1.6783,
"step": 3428
},
{
"epoch": 3.69,
"learning_rate": 3.258804675904037e-08,
"loss": 1.6615,
"step": 3430
},
{
"epoch": 3.69,
"learning_rate": 3.2148563084095306e-08,
"loss": 1.7301,
"step": 3432
},
{
"epoch": 3.69,
"learning_rate": 3.1712014592724656e-08,
"loss": 1.7104,
"step": 3434
},
{
"epoch": 3.69,
"learning_rate": 3.127840260884018e-08,
"loss": 1.6831,
"step": 3436
},
{
"epoch": 3.7,
"learning_rate": 3.08477284474481e-08,
"loss": 1.6193,
"step": 3438
},
{
"epoch": 3.7,
"learning_rate": 3.041999341464563e-08,
"loss": 1.645,
"step": 3440
},
{
"epoch": 3.7,
"learning_rate": 2.9995198807615695e-08,
"loss": 1.6467,
"step": 3442
},
{
"epoch": 3.7,
"learning_rate": 2.9573345914624794e-08,
"loss": 1.6273,
"step": 3444
},
{
"epoch": 3.7,
"learning_rate": 2.9154436015017435e-08,
"loss": 1.6477,
"step": 3446
},
{
"epoch": 3.71,
"learning_rate": 2.8738470379213398e-08,
"loss": 1.662,
"step": 3448
},
{
"epoch": 3.71,
"learning_rate": 2.8325450268703145e-08,
"loss": 1.6946,
"step": 3450
},
{
"epoch": 3.71,
"learning_rate": 2.7915376936044622e-08,
"loss": 1.7115,
"step": 3452
},
{
"epoch": 3.71,
"learning_rate": 2.75082516248587e-08,
"loss": 1.6926,
"step": 3454
},
{
"epoch": 3.72,
"learning_rate": 2.7104075569826413e-08,
"loss": 1.7017,
"step": 3456
},
{
"epoch": 3.72,
"learning_rate": 2.6702849996684263e-08,
"loss": 1.6817,
"step": 3458
},
{
"epoch": 3.72,
"learning_rate": 2.6304576122221034e-08,
"loss": 1.6707,
"step": 3460
},
{
"epoch": 3.72,
"learning_rate": 2.5909255154273667e-08,
"loss": 1.6643,
"step": 3462
},
{
"epoch": 3.72,
"learning_rate": 2.551688829172416e-08,
"loss": 1.6317,
"step": 3464
},
{
"epoch": 3.73,
"learning_rate": 2.5127476724495778e-08,
"loss": 1.6635,
"step": 3466
},
{
"epoch": 3.73,
"learning_rate": 2.4741021633549076e-08,
"loss": 1.6439,
"step": 3468
},
{
"epoch": 3.73,
"learning_rate": 2.4357524190878665e-08,
"loss": 1.6555,
"step": 3470
},
{
"epoch": 3.73,
"learning_rate": 2.3976985559509333e-08,
"loss": 1.6431,
"step": 3472
},
{
"epoch": 3.73,
"learning_rate": 2.3599406893493157e-08,
"loss": 1.654,
"step": 3474
},
{
"epoch": 3.74,
"learning_rate": 2.322478933790506e-08,
"loss": 1.6237,
"step": 3476
},
{
"epoch": 3.74,
"learning_rate": 2.2853134028840594e-08,
"loss": 1.6238,
"step": 3478
},
{
"epoch": 3.74,
"learning_rate": 2.2484442093410826e-08,
"loss": 1.6644,
"step": 3480
},
{
"epoch": 3.74,
"learning_rate": 2.211871464974091e-08,
"loss": 1.6945,
"step": 3482
},
{
"epoch": 3.75,
"learning_rate": 2.1755952806964627e-08,
"loss": 1.647,
"step": 3484
},
{
"epoch": 3.75,
"learning_rate": 2.1396157665222737e-08,
"loss": 1.6786,
"step": 3486
},
{
"epoch": 3.75,
"learning_rate": 2.1039330315658964e-08,
"loss": 1.6438,
"step": 3488
},
{
"epoch": 3.75,
"learning_rate": 2.0685471840415913e-08,
"loss": 1.6849,
"step": 3490
},
{
"epoch": 3.75,
"learning_rate": 2.0334583312633378e-08,
"loss": 1.6909,
"step": 3492
},
{
"epoch": 3.76,
"learning_rate": 1.9986665796443926e-08,
"loss": 1.6418,
"step": 3494
},
{
"epoch": 3.76,
"learning_rate": 1.9641720346969982e-08,
"loss": 1.6534,
"step": 3496
},
{
"epoch": 3.76,
"learning_rate": 1.9299748010320527e-08,
"loss": 1.6602,
"step": 3498
},
{
"epoch": 3.76,
"learning_rate": 1.8960749823588527e-08,
"loss": 1.6439,
"step": 3500
},
{
"epoch": 3.76,
"learning_rate": 1.8624726814846504e-08,
"loss": 1.7139,
"step": 3502
},
{
"epoch": 3.77,
"learning_rate": 1.8291680003145073e-08,
"loss": 1.6126,
"step": 3504
},
{
"epoch": 3.77,
"learning_rate": 1.796161039850841e-08,
"loss": 1.6808,
"step": 3506
},
{
"epoch": 3.77,
"learning_rate": 1.7634519001931914e-08,
"loss": 1.7076,
"step": 3508
},
{
"epoch": 3.77,
"learning_rate": 1.7310406805379207e-08,
"loss": 1.6675,
"step": 3510
},
{
"epoch": 3.78,
"learning_rate": 1.6989274791778697e-08,
"loss": 1.6708,
"step": 3512
},
{
"epoch": 3.78,
"learning_rate": 1.6671123935021125e-08,
"loss": 1.6062,
"step": 3514
},
{
"epoch": 3.78,
"learning_rate": 1.635595519995614e-08,
"loss": 1.6837,
"step": 3516
},
{
"epoch": 3.78,
"learning_rate": 1.6043769542389617e-08,
"loss": 1.6469,
"step": 3518
},
{
"epoch": 3.78,
"learning_rate": 1.5734567909080565e-08,
"loss": 1.6466,
"step": 3520
},
{
"epoch": 3.79,
"learning_rate": 1.542835123773889e-08,
"loss": 1.7045,
"step": 3522
},
{
"epoch": 3.79,
"learning_rate": 1.5125120457021302e-08,
"loss": 1.6338,
"step": 3524
},
{
"epoch": 3.79,
"learning_rate": 1.482487648653008e-08,
"loss": 1.6408,
"step": 3526
},
{
"epoch": 3.79,
"learning_rate": 1.4527620236808868e-08,
"loss": 1.6534,
"step": 3528
},
{
"epoch": 3.79,
"learning_rate": 1.4233352609340665e-08,
"loss": 1.6566,
"step": 3530
},
{
"epoch": 3.8,
"learning_rate": 1.3942074496545165e-08,
"loss": 1.6992,
"step": 3532
},
{
"epoch": 3.8,
"learning_rate": 1.3653786781775422e-08,
"loss": 1.6856,
"step": 3534
},
{
"epoch": 3.8,
"learning_rate": 1.3368490339315974e-08,
"loss": 1.6908,
"step": 3536
},
{
"epoch": 3.8,
"learning_rate": 1.308618603437961e-08,
"loss": 1.6859,
"step": 3538
},
{
"epoch": 3.81,
"learning_rate": 1.2806874723104822e-08,
"loss": 1.7119,
"step": 3540
},
{
"epoch": 3.81,
"learning_rate": 1.2530557252553364e-08,
"loss": 1.6799,
"step": 3542
},
{
"epoch": 3.81,
"learning_rate": 1.2257234460707699e-08,
"loss": 1.6392,
"step": 3544
},
{
"epoch": 3.81,
"learning_rate": 1.198690717646833e-08,
"loss": 1.6594,
"step": 3546
},
{
"epoch": 3.81,
"learning_rate": 1.1719576219651584e-08,
"loss": 1.6087,
"step": 3548
},
{
"epoch": 3.82,
"learning_rate": 1.1455242400986276e-08,
"loss": 1.7065,
"step": 3550
},
{
"epoch": 3.82,
"learning_rate": 1.1193906522112607e-08,
"loss": 1.6663,
"step": 3552
},
{
"epoch": 3.82,
"learning_rate": 1.0935569375578602e-08,
"loss": 1.6866,
"step": 3554
},
{
"epoch": 3.82,
"learning_rate": 1.0680231744837897e-08,
"loss": 1.6843,
"step": 3556
},
{
"epoch": 3.82,
"learning_rate": 1.0427894404248072e-08,
"loss": 1.6836,
"step": 3558
},
{
"epoch": 3.83,
"learning_rate": 1.0178558119067315e-08,
"loss": 1.6905,
"step": 3560
},
{
"epoch": 3.83,
"learning_rate": 9.932223645452763e-09,
"loss": 1.6477,
"step": 3562
},
{
"epoch": 3.83,
"learning_rate": 9.68889173045806e-09,
"loss": 1.6528,
"step": 3564
},
{
"epoch": 3.83,
"learning_rate": 9.448563112031127e-09,
"loss": 1.6646,
"step": 3566
},
{
"epoch": 3.84,
"learning_rate": 9.2112385190114e-09,
"loss": 1.6845,
"step": 3568
},
{
"epoch": 3.84,
"learning_rate": 8.97691867112882e-09,
"loss": 1.7005,
"step": 3570
},
{
"epoch": 3.84,
"learning_rate": 8.745604279000175e-09,
"loss": 1.7016,
"step": 3572
},
{
"epoch": 3.84,
"learning_rate": 8.517296044127986e-09,
"loss": 1.6726,
"step": 3574
},
{
"epoch": 3.84,
"learning_rate": 8.291994658898182e-09,
"loss": 1.6214,
"step": 3576
},
{
"epoch": 3.85,
"learning_rate": 8.06970080657765e-09,
"loss": 1.6927,
"step": 3578
},
{
"epoch": 3.85,
"learning_rate": 7.850415161312462e-09,
"loss": 1.642,
"step": 3580
},
{
"epoch": 3.85,
"learning_rate": 7.634138388125877e-09,
"loss": 1.6652,
"step": 3582
},
{
"epoch": 3.85,
"learning_rate": 7.420871142916119e-09,
"loss": 1.6784,
"step": 3584
},
{
"epoch": 3.85,
"learning_rate": 7.210614072454269e-09,
"loss": 1.6558,
"step": 3586
},
{
"epoch": 3.86,
"learning_rate": 7.003367814382933e-09,
"loss": 1.6801,
"step": 3588
},
{
"epoch": 3.86,
"learning_rate": 6.799132997213464e-09,
"loss": 1.7099,
"step": 3590
},
{
"epoch": 3.86,
"learning_rate": 6.5979102403249664e-09,
"loss": 1.6444,
"step": 3592
},
{
"epoch": 3.86,
"learning_rate": 6.3997001539614074e-09,
"loss": 1.6845,
"step": 3594
},
{
"epoch": 3.87,
"learning_rate": 6.204503339230504e-09,
"loss": 1.6861,
"step": 3596
},
{
"epoch": 3.87,
"learning_rate": 6.012320388101955e-09,
"loss": 1.652,
"step": 3598
},
{
"epoch": 3.87,
"learning_rate": 5.823151883404876e-09,
"loss": 1.7028,
"step": 3600
},
{
"epoch": 3.87,
"learning_rate": 5.6369983988269195e-09,
"loss": 1.6658,
"step": 3602
},
{
"epoch": 3.87,
"learning_rate": 5.453860498911944e-09,
"loss": 1.6543,
"step": 3604
},
{
"epoch": 3.88,
"learning_rate": 5.273738739058675e-09,
"loss": 1.6666,
"step": 3606
},
{
"epoch": 3.88,
"learning_rate": 5.096633665518601e-09,
"loss": 1.6808,
"step": 3608
},
{
"epoch": 3.88,
"learning_rate": 4.922545815394863e-09,
"loss": 1.6442,
"step": 3610
},
{
"epoch": 3.88,
"learning_rate": 4.75147571664003e-09,
"loss": 1.6584,
"step": 3612
},
{
"epoch": 3.88,
"learning_rate": 4.583423888055105e-09,
"loss": 1.6805,
"step": 3614
},
{
"epoch": 3.89,
"learning_rate": 4.4183908392873005e-09,
"loss": 1.6294,
"step": 3616
},
{
"epoch": 3.89,
"learning_rate": 4.256377070829264e-09,
"loss": 1.688,
"step": 3618
},
{
"epoch": 3.89,
"learning_rate": 4.097383074016636e-09,
"loss": 1.7435,
"step": 3620
},
{
"epoch": 3.89,
"learning_rate": 3.9414093310274895e-09,
"loss": 1.6952,
"step": 3622
},
{
"epoch": 3.9,
"learning_rate": 3.7884563148802286e-09,
"loss": 1.6994,
"step": 3624
},
{
"epoch": 3.9,
"learning_rate": 3.6385244894323596e-09,
"loss": 1.636,
"step": 3626
},
{
"epoch": 3.9,
"learning_rate": 3.4916143093790538e-09,
"loss": 1.6797,
"step": 3628
},
{
"epoch": 3.9,
"learning_rate": 3.347726220251923e-09,
"loss": 1.6591,
"step": 3630
},
{
"epoch": 3.9,
"learning_rate": 3.2068606584174652e-09,
"loss": 1.6905,
"step": 3632
},
{
"epoch": 3.91,
"learning_rate": 3.0690180510758444e-09,
"loss": 1.6511,
"step": 3634
},
{
"epoch": 3.91,
"learning_rate": 2.934198816259559e-09,
"loss": 1.6138,
"step": 3636
},
{
"epoch": 3.91,
"learning_rate": 2.8024033628321066e-09,
"loss": 1.6458,
"step": 3638
},
{
"epoch": 3.91,
"learning_rate": 2.673632090487099e-09,
"loss": 1.6346,
"step": 3640
},
{
"epoch": 3.92,
"learning_rate": 2.5478853897464848e-09,
"loss": 1.6801,
"step": 3642
},
{
"epoch": 3.92,
"learning_rate": 2.42516364195966e-09,
"loss": 1.6812,
"step": 3644
},
{
"epoch": 3.92,
"learning_rate": 2.3054672193024704e-09,
"loss": 1.6319,
"step": 3646
},
{
"epoch": 3.92,
"learning_rate": 2.18879648477599e-09,
"loss": 1.6834,
"step": 3648
},
{
"epoch": 3.92,
"learning_rate": 2.0751517922048546e-09,
"loss": 1.7166,
"step": 3650
},
{
"epoch": 3.93,
"learning_rate": 1.9645334862373743e-09,
"loss": 1.7328,
"step": 3652
},
{
"epoch": 3.93,
"learning_rate": 1.8569419023433119e-09,
"loss": 1.6523,
"step": 3654
},
{
"epoch": 3.93,
"learning_rate": 1.7523773668135512e-09,
"loss": 1.6912,
"step": 3656
},
{
"epoch": 3.93,
"learning_rate": 1.6508401967588736e-09,
"loss": 1.7194,
"step": 3658
},
{
"epoch": 3.93,
"learning_rate": 1.5523307001088503e-09,
"loss": 1.6586,
"step": 3660
},
{
"epoch": 3.94,
"learning_rate": 1.4568491756115075e-09,
"loss": 1.6649,
"step": 3662
},
{
"epoch": 3.94,
"learning_rate": 1.3643959128314398e-09,
"loss": 1.693,
"step": 3664
},
{
"epoch": 3.94,
"learning_rate": 1.2749711921500318e-09,
"loss": 1.6872,
"step": 3666
},
{
"epoch": 3.94,
"learning_rate": 1.188575284763793e-09,
"loss": 1.68,
"step": 3668
},
{
"epoch": 3.95,
"learning_rate": 1.1052084526838035e-09,
"loss": 1.6039,
"step": 3670
},
{
"epoch": 3.95,
"learning_rate": 1.0248709487349349e-09,
"loss": 1.6443,
"step": 3672
},
{
"epoch": 3.95,
"learning_rate": 9.475630165552973e-10,
"loss": 1.7098,
"step": 3674
},
{
"epoch": 3.95,
"learning_rate": 8.732848905947942e-10,
"loss": 1.6615,
"step": 3676
},
{
"epoch": 3.95,
"learning_rate": 8.020367961155683e-10,
"loss": 1.6977,
"step": 3678
},
{
"epoch": 3.96,
"learning_rate": 7.338189491900015e-10,
"loss": 1.6328,
"step": 3680
},
{
"epoch": 3.96,
"learning_rate": 6.686315567010492e-10,
"loss": 1.6473,
"step": 3682
},
{
"epoch": 3.96,
"learning_rate": 6.064748163413513e-10,
"loss": 1.6573,
"step": 3684
},
{
"epoch": 3.96,
"learning_rate": 5.473489166122335e-10,
"loss": 1.6143,
"step": 3686
},
{
"epoch": 3.96,
"learning_rate": 4.912540368237072e-10,
"loss": 1.6866,
"step": 3688
},
{
"epoch": 3.97,
"learning_rate": 4.3819034709358105e-10,
"loss": 1.6848,
"step": 3690
},
{
"epoch": 3.97,
"learning_rate": 3.88158008346906e-10,
"loss": 1.6417,
"step": 3692
},
{
"epoch": 3.97,
"learning_rate": 3.4115717231597564e-10,
"loss": 1.6711,
"step": 3694
},
{
"epoch": 3.97,
"learning_rate": 2.971879815391043e-10,
"loss": 1.6467,
"step": 3696
},
{
"epoch": 3.98,
"learning_rate": 2.5625056936084966e-10,
"loss": 1.649,
"step": 3698
},
{
"epoch": 3.98,
"learning_rate": 2.1834505993134633e-10,
"loss": 1.6476,
"step": 3700
},
{
"epoch": 3.98,
"learning_rate": 1.834715682056398e-10,
"loss": 1.637,
"step": 3702
},
{
"epoch": 3.98,
"learning_rate": 1.516301999441305e-10,
"loss": 1.7084,
"step": 3704
},
{
"epoch": 3.98,
"learning_rate": 1.228210517113526e-10,
"loss": 1.7026,
"step": 3706
},
{
"epoch": 3.99,
"learning_rate": 9.704421087619596e-11,
"loss": 1.6253,
"step": 3708
},
{
"epoch": 3.99,
"learning_rate": 7.42997556115732e-11,
"loss": 1.6609,
"step": 3710
},
{
"epoch": 3.99,
"learning_rate": 5.458775489430856e-11,
"loss": 1.6664,
"step": 3712
},
{
"epoch": 3.99,
"learning_rate": 3.7908268504471816e-11,
"loss": 1.6594,
"step": 3714
},
{
"epoch": 3.99,
"learning_rate": 2.426134702548932e-11,
"loss": 1.6718,
"step": 3716
},
{
"epoch": 4.0,
"learning_rate": 1.3647031844365997e-11,
"loss": 1.6577,
"step": 3718
},
{
"epoch": 4.0,
"learning_rate": 6.06535515068618e-12,
"loss": 1.6686,
"step": 3720
},
{
"epoch": 4.0,
"step": 3720,
"total_flos": 2.1177725398863053e+17,
"train_loss": 1.7057791815650079,
"train_runtime": 39113.1965,
"train_samples_per_second": 6.088,
"train_steps_per_second": 0.095
}
],
"logging_steps": 2,
"max_steps": 3720,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 4000,
"total_flos": 2.1177725398863053e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}