diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6471 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9944341372912802, + "eval_steps": 800, + "global_step": 4300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0, + "loss": 2.4801, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0, + "loss": 2.4284, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 2.2651, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 2.411, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 0, + "loss": 2.8299, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 2.2188, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 1.3082402064781276e-06, + "loss": 1.345, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 1.9623603097171917e-06, + "loss": 0.5695, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 2.262883767531511e-06, + "loss": 0.8812, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 2.5555756797431724e-06, + "loss": 0.8725, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 2.7786547836457785e-06, + "loss": 0.4851, + "step": 44 + }, + { + "epoch": 0.02, + "learning_rate": 2.9589528137043157e-06, + "loss": 0.8354, + "step": 48 + }, + { + "epoch": 0.02, + "learning_rate": 3.110267503805303e-06, + "loss": 0.4873, + "step": 52 + }, + { + "epoch": 0.03, + "learning_rate": 3.2406394020168525e-06, + "loss": 0.6584, + "step": 56 + }, + { + "epoch": 0.03, + "learning_rate": 3.3551671365864186e-06, + "loss": 0.529, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 3.4572878450621517e-06, + "loss": 0.5812, + "step": 64 + }, + { + "epoch": 0.03, + "learning_rate": 3.5494288615482305e-06, + "loss": 0.5163, + "step": 68 + }, + { + "epoch": 0.03, + "learning_rate": 3.6333682331099297e-06, + "loss": 0.6595, + "step": 72 + }, + { + "epoch": 0.04, + "learning_rate": 3.710447450306277e-06, + "loss": 0.4775, + "step": 76 + }, + { + "epoch": 0.04, + "learning_rate": 3.7817036126729157e-06, + "loss": 0.682, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 3.84795507876713e-06, + "loss": 0.7048, + "step": 84 + }, + { + "epoch": 0.04, + "learning_rate": 3.909858960648549e-06, + "loss": 0.9478, + "step": 88 + }, + { + "epoch": 0.04, + "learning_rate": 3.9679508875196075e-06, + "loss": 0.5932, + "step": 92 + }, + { + "epoch": 0.04, + "learning_rate": 4.022673220704539e-06, + "loss": 0.5733, + "step": 96 + }, + { + "epoch": 0.05, + "learning_rate": 4.074395524884577e-06, + "loss": 0.5817, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 4.123429713794031e-06, + "loss": 0.6372, + "step": 104 + }, + { + "epoch": 0.05, + "learning_rate": 4.170041450985754e-06, + "loss": 0.5108, + "step": 108 + }, + { + "epoch": 0.05, + "learning_rate": 4.214458864668026e-06, + "loss": 0.6262, + "step": 112 + }, + { + "epoch": 0.05, + "learning_rate": 4.256879301905398e-06, + "loss": 0.8594, + "step": 116 + }, + { + "epoch": 0.06, + "learning_rate": 4.297474628787183e-06, + "loss": 0.7241, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 4.336395436735046e-06, + "loss": 0.6471, + "step": 124 + }, + { + "epoch": 0.06, + "learning_rate": 4.373774415149143e-06, + "loss": 0.6144, + "step": 128 + }, + { + "epoch": 0.06, + "learning_rate": 4.409729081127459e-06, + "loss": 0.672, + "step": 132 + }, + { + "epoch": 0.06, + "learning_rate": 4.444364007946065e-06, + "loss": 0.6802, + "step": 136 + }, + { + "epoch": 0.06, + "learning_rate": 4.4777726588457195e-06, + "loss": 0.514, + "step": 140 + }, + { + "epoch": 0.07, + "learning_rate": 4.510038907149524e-06, + "loss": 0.715, + "step": 144 + }, + { + "epoch": 0.07, + "learning_rate": 4.541238304971202e-06, + "loss": 0.5189, + "step": 148 + }, + { + "epoch": 0.07, + "learning_rate": 4.5714391488166745e-06, + "loss": 0.7188, + "step": 152 + }, + { + "epoch": 0.07, + "learning_rate": 4.600703379889684e-06, + "loss": 0.5829, + "step": 156 + }, + { + "epoch": 0.07, + "learning_rate": 4.629087348946707e-06, + "loss": 0.5551, + "step": 160 + }, + { + "epoch": 0.08, + "learning_rate": 4.656642469442713e-06, + "loss": 0.7016, + "step": 164 + }, + { + "epoch": 0.08, + "learning_rate": 4.683415777991895e-06, + "loss": 0.5357, + "step": 168 + }, + { + "epoch": 0.08, + "learning_rate": 4.709450417491796e-06, + "loss": 0.6232, + "step": 172 + }, + { + "epoch": 0.08, + "learning_rate": 4.734786055373451e-06, + "loss": 0.7218, + "step": 176 + }, + { + "epoch": 0.08, + "learning_rate": 4.759459247158257e-06, + "loss": 0.51, + "step": 180 + }, + { + "epoch": 0.09, + "learning_rate": 4.783503753685794e-06, + "loss": 0.8871, + "step": 184 + }, + { + "epoch": 0.09, + "learning_rate": 4.806950818921448e-06, + "loss": 0.8177, + "step": 188 + }, + { + "epoch": 0.09, + "learning_rate": 4.8298294140798465e-06, + "loss": 0.5602, + "step": 192 + }, + { + "epoch": 0.09, + "learning_rate": 4.852166452849314e-06, + "loss": 0.7395, + "step": 196 + }, + { + "epoch": 0.09, + "learning_rate": 4.8739869817278244e-06, + "loss": 0.7008, + "step": 200 + }, + { + "epoch": 0.09, + "learning_rate": 4.89531434884623e-06, + "loss": 0.7096, + "step": 204 + }, + { + "epoch": 0.1, + "learning_rate": 4.916170354132174e-06, + "loss": 0.7617, + "step": 208 + }, + { + "epoch": 0.1, + "learning_rate": 4.936575383236021e-06, + "loss": 0.637, + "step": 212 + }, + { + "epoch": 0.1, + "learning_rate": 4.956548527281403e-06, + "loss": 0.7149, + "step": 216 + }, + { + "epoch": 0.1, + "learning_rate": 4.976107690203556e-06, + "loss": 0.585, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 4.995269685187989e-06, + "loss": 0.4153, + "step": 224 + }, + { + "epoch": 0.11, + "learning_rate": 4.998404594767071e-06, + "loss": 0.8215, + "step": 228 + }, + { + "epoch": 0.11, + "learning_rate": 4.995213784301213e-06, + "loss": 0.6051, + "step": 232 + }, + { + "epoch": 0.11, + "learning_rate": 4.992022973835355e-06, + "loss": 0.6785, + "step": 236 + }, + { + "epoch": 0.11, + "learning_rate": 4.988832163369496e-06, + "loss": 0.6774, + "step": 240 + }, + { + "epoch": 0.11, + "learning_rate": 4.985641352903638e-06, + "loss": 0.5039, + "step": 244 + }, + { + "epoch": 0.12, + "learning_rate": 4.982450542437779e-06, + "loss": 0.5724, + "step": 248 + }, + { + "epoch": 0.12, + "learning_rate": 4.979259731971921e-06, + "loss": 0.8469, + "step": 252 + }, + { + "epoch": 0.12, + "learning_rate": 4.976068921506063e-06, + "loss": 0.9735, + "step": 256 + }, + { + "epoch": 0.12, + "learning_rate": 4.972878111040205e-06, + "loss": 0.8594, + "step": 260 + }, + { + "epoch": 0.12, + "learning_rate": 4.969687300574346e-06, + "loss": 0.6523, + "step": 264 + }, + { + "epoch": 0.12, + "learning_rate": 4.9664964901084875e-06, + "loss": 0.6641, + "step": 268 + }, + { + "epoch": 0.13, + "learning_rate": 4.96330567964263e-06, + "loss": 0.6586, + "step": 272 + }, + { + "epoch": 0.13, + "learning_rate": 4.960114869176771e-06, + "loss": 0.6019, + "step": 276 + }, + { + "epoch": 0.13, + "learning_rate": 4.956924058710913e-06, + "loss": 0.7117, + "step": 280 + }, + { + "epoch": 0.13, + "learning_rate": 4.953733248245054e-06, + "loss": 0.4632, + "step": 284 + }, + { + "epoch": 0.13, + "learning_rate": 4.950542437779196e-06, + "loss": 0.733, + "step": 288 + }, + { + "epoch": 0.14, + "learning_rate": 4.947351627313338e-06, + "loss": 0.7205, + "step": 292 + }, + { + "epoch": 0.14, + "learning_rate": 4.9441608168474795e-06, + "loss": 0.6121, + "step": 296 + }, + { + "epoch": 0.14, + "learning_rate": 4.940970006381621e-06, + "loss": 0.7859, + "step": 300 + }, + { + "epoch": 0.14, + "learning_rate": 4.9377791959157625e-06, + "loss": 0.8041, + "step": 304 + }, + { + "epoch": 0.14, + "learning_rate": 4.934588385449905e-06, + "loss": 0.7834, + "step": 308 + }, + { + "epoch": 0.14, + "learning_rate": 4.931397574984046e-06, + "loss": 0.6704, + "step": 312 + }, + { + "epoch": 0.15, + "learning_rate": 4.928206764518188e-06, + "loss": 0.8402, + "step": 316 + }, + { + "epoch": 0.15, + "learning_rate": 4.925015954052329e-06, + "loss": 0.7851, + "step": 320 + }, + { + "epoch": 0.15, + "learning_rate": 4.9218251435864715e-06, + "loss": 0.501, + "step": 324 + }, + { + "epoch": 0.15, + "learning_rate": 4.918634333120613e-06, + "loss": 0.6039, + "step": 328 + }, + { + "epoch": 0.15, + "learning_rate": 4.9154435226547544e-06, + "loss": 0.64, + "step": 332 + }, + { + "epoch": 0.16, + "learning_rate": 4.912252712188896e-06, + "loss": 0.5726, + "step": 336 + }, + { + "epoch": 0.16, + "learning_rate": 4.909061901723038e-06, + "loss": 0.6605, + "step": 340 + }, + { + "epoch": 0.16, + "learning_rate": 4.90587109125718e-06, + "loss": 0.8105, + "step": 344 + }, + { + "epoch": 0.16, + "learning_rate": 4.902680280791321e-06, + "loss": 0.8422, + "step": 348 + }, + { + "epoch": 0.16, + "learning_rate": 4.8994894703254635e-06, + "loss": 0.5242, + "step": 352 + }, + { + "epoch": 0.17, + "learning_rate": 4.896298659859605e-06, + "loss": 0.6062, + "step": 356 + }, + { + "epoch": 0.17, + "learning_rate": 4.8931078493937464e-06, + "loss": 0.7289, + "step": 360 + }, + { + "epoch": 0.17, + "learning_rate": 4.889917038927888e-06, + "loss": 0.6916, + "step": 364 + }, + { + "epoch": 0.17, + "learning_rate": 4.88672622846203e-06, + "loss": 0.8526, + "step": 368 + }, + { + "epoch": 0.17, + "learning_rate": 4.883535417996172e-06, + "loss": 1.0668, + "step": 372 + }, + { + "epoch": 0.17, + "learning_rate": 4.880344607530313e-06, + "loss": 0.6912, + "step": 376 + }, + { + "epoch": 0.18, + "learning_rate": 4.877153797064455e-06, + "loss": 0.7383, + "step": 380 + }, + { + "epoch": 0.18, + "learning_rate": 4.873962986598597e-06, + "loss": 0.77, + "step": 384 + }, + { + "epoch": 0.18, + "learning_rate": 4.870772176132738e-06, + "loss": 0.8328, + "step": 388 + }, + { + "epoch": 0.18, + "learning_rate": 4.86758136566688e-06, + "loss": 0.7135, + "step": 392 + }, + { + "epoch": 0.18, + "learning_rate": 4.864390555201021e-06, + "loss": 0.7976, + "step": 396 + }, + { + "epoch": 0.19, + "learning_rate": 4.861199744735164e-06, + "loss": 0.5799, + "step": 400 + }, + { + "epoch": 0.19, + "learning_rate": 4.858008934269305e-06, + "loss": 0.5246, + "step": 404 + }, + { + "epoch": 0.19, + "learning_rate": 4.854818123803447e-06, + "loss": 0.5895, + "step": 408 + }, + { + "epoch": 0.19, + "learning_rate": 4.851627313337588e-06, + "loss": 0.7751, + "step": 412 + }, + { + "epoch": 0.19, + "learning_rate": 4.84843650287173e-06, + "loss": 0.7469, + "step": 416 + }, + { + "epoch": 0.19, + "learning_rate": 4.845245692405872e-06, + "loss": 0.5013, + "step": 420 + }, + { + "epoch": 0.2, + "learning_rate": 4.842054881940013e-06, + "loss": 0.5398, + "step": 424 + }, + { + "epoch": 0.2, + "learning_rate": 4.838864071474155e-06, + "loss": 0.4547, + "step": 428 + }, + { + "epoch": 0.2, + "learning_rate": 4.835673261008296e-06, + "loss": 0.8732, + "step": 432 + }, + { + "epoch": 0.2, + "learning_rate": 4.832482450542439e-06, + "loss": 0.7671, + "step": 436 + }, + { + "epoch": 0.2, + "learning_rate": 4.82929164007658e-06, + "loss": 0.6574, + "step": 440 + }, + { + "epoch": 0.21, + "learning_rate": 4.8261008296107215e-06, + "loss": 0.7173, + "step": 444 + }, + { + "epoch": 0.21, + "learning_rate": 4.822910019144863e-06, + "loss": 0.4371, + "step": 448 + }, + { + "epoch": 0.21, + "learning_rate": 4.819719208679005e-06, + "loss": 0.6992, + "step": 452 + }, + { + "epoch": 0.21, + "learning_rate": 4.816528398213147e-06, + "loss": 0.6827, + "step": 456 + }, + { + "epoch": 0.21, + "learning_rate": 4.813337587747288e-06, + "loss": 0.4919, + "step": 460 + }, + { + "epoch": 0.22, + "learning_rate": 4.81014677728143e-06, + "loss": 0.9571, + "step": 464 + }, + { + "epoch": 0.22, + "learning_rate": 4.806955966815571e-06, + "loss": 0.5202, + "step": 468 + }, + { + "epoch": 0.22, + "learning_rate": 4.8037651563497135e-06, + "loss": 0.7919, + "step": 472 + }, + { + "epoch": 0.22, + "learning_rate": 4.800574345883855e-06, + "loss": 0.5517, + "step": 476 + }, + { + "epoch": 0.22, + "learning_rate": 4.7973835354179965e-06, + "loss": 0.3889, + "step": 480 + }, + { + "epoch": 0.22, + "learning_rate": 4.794192724952138e-06, + "loss": 0.5933, + "step": 484 + }, + { + "epoch": 0.23, + "learning_rate": 4.79100191448628e-06, + "loss": 0.9298, + "step": 488 + }, + { + "epoch": 0.23, + "learning_rate": 4.787811104020422e-06, + "loss": 0.4758, + "step": 492 + }, + { + "epoch": 0.23, + "learning_rate": 4.784620293554563e-06, + "loss": 0.5162, + "step": 496 + }, + { + "epoch": 0.23, + "learning_rate": 4.781429483088705e-06, + "loss": 0.6675, + "step": 500 + }, + { + "epoch": 0.23, + "learning_rate": 4.778238672622846e-06, + "loss": 0.8493, + "step": 504 + }, + { + "epoch": 0.24, + "learning_rate": 4.7750478621569885e-06, + "loss": 0.6583, + "step": 508 + }, + { + "epoch": 0.24, + "learning_rate": 4.77185705169113e-06, + "loss": 0.4897, + "step": 512 + }, + { + "epoch": 0.24, + "learning_rate": 4.768666241225271e-06, + "loss": 0.6633, + "step": 516 + }, + { + "epoch": 0.24, + "learning_rate": 4.765475430759413e-06, + "loss": 0.782, + "step": 520 + }, + { + "epoch": 0.24, + "learning_rate": 4.762284620293555e-06, + "loss": 0.815, + "step": 524 + }, + { + "epoch": 0.24, + "learning_rate": 4.759093809827697e-06, + "loss": 0.4498, + "step": 528 + }, + { + "epoch": 0.25, + "learning_rate": 4.755902999361838e-06, + "loss": 0.6006, + "step": 532 + }, + { + "epoch": 0.25, + "learning_rate": 4.75271218889598e-06, + "loss": 0.9473, + "step": 536 + }, + { + "epoch": 0.25, + "learning_rate": 4.749521378430121e-06, + "loss": 0.4036, + "step": 540 + }, + { + "epoch": 0.25, + "learning_rate": 4.746330567964263e-06, + "loss": 0.555, + "step": 544 + }, + { + "epoch": 0.25, + "learning_rate": 4.743139757498405e-06, + "loss": 0.7843, + "step": 548 + }, + { + "epoch": 0.26, + "learning_rate": 4.739948947032546e-06, + "loss": 0.8376, + "step": 552 + }, + { + "epoch": 0.26, + "learning_rate": 4.736758136566688e-06, + "loss": 0.5423, + "step": 556 + }, + { + "epoch": 0.26, + "learning_rate": 4.73356732610083e-06, + "loss": 0.5533, + "step": 560 + }, + { + "epoch": 0.26, + "learning_rate": 4.7303765156349716e-06, + "loss": 0.5212, + "step": 564 + }, + { + "epoch": 0.26, + "learning_rate": 4.727185705169113e-06, + "loss": 0.8054, + "step": 568 + }, + { + "epoch": 0.27, + "learning_rate": 4.7239948947032545e-06, + "loss": 0.438, + "step": 572 + }, + { + "epoch": 0.27, + "learning_rate": 4.720804084237397e-06, + "loss": 0.6025, + "step": 576 + }, + { + "epoch": 0.27, + "learning_rate": 4.717613273771538e-06, + "loss": 0.8118, + "step": 580 + }, + { + "epoch": 0.27, + "learning_rate": 4.71442246330568e-06, + "loss": 0.6911, + "step": 584 + }, + { + "epoch": 0.27, + "learning_rate": 4.711231652839821e-06, + "loss": 0.7022, + "step": 588 + }, + { + "epoch": 0.27, + "learning_rate": 4.7080408423739636e-06, + "loss": 0.5918, + "step": 592 + }, + { + "epoch": 0.28, + "learning_rate": 4.704850031908105e-06, + "loss": 0.6012, + "step": 596 + }, + { + "epoch": 0.28, + "learning_rate": 4.7016592214422465e-06, + "loss": 0.8031, + "step": 600 + }, + { + "epoch": 0.28, + "learning_rate": 4.698468410976389e-06, + "loss": 0.7864, + "step": 604 + }, + { + "epoch": 0.28, + "learning_rate": 4.69527760051053e-06, + "loss": 0.6361, + "step": 608 + }, + { + "epoch": 0.28, + "learning_rate": 4.692086790044672e-06, + "loss": 0.6619, + "step": 612 + }, + { + "epoch": 0.29, + "learning_rate": 4.688895979578813e-06, + "loss": 0.5132, + "step": 616 + }, + { + "epoch": 0.29, + "learning_rate": 4.6857051691129555e-06, + "loss": 0.6111, + "step": 620 + }, + { + "epoch": 0.29, + "learning_rate": 4.682514358647097e-06, + "loss": 0.7884, + "step": 624 + }, + { + "epoch": 0.29, + "learning_rate": 4.6793235481812385e-06, + "loss": 0.4355, + "step": 628 + }, + { + "epoch": 0.29, + "learning_rate": 4.67613273771538e-06, + "loss": 0.7325, + "step": 632 + }, + { + "epoch": 0.29, + "learning_rate": 4.672941927249522e-06, + "loss": 0.5633, + "step": 636 + }, + { + "epoch": 0.3, + "learning_rate": 4.669751116783664e-06, + "loss": 0.6415, + "step": 640 + }, + { + "epoch": 0.3, + "learning_rate": 4.666560306317805e-06, + "loss": 0.6508, + "step": 644 + }, + { + "epoch": 0.3, + "learning_rate": 4.663369495851947e-06, + "loss": 0.5909, + "step": 648 + }, + { + "epoch": 0.3, + "learning_rate": 4.660178685386089e-06, + "loss": 0.5651, + "step": 652 + }, + { + "epoch": 0.3, + "learning_rate": 4.6569878749202305e-06, + "loss": 0.6729, + "step": 656 + }, + { + "epoch": 0.31, + "learning_rate": 4.653797064454372e-06, + "loss": 0.842, + "step": 660 + }, + { + "epoch": 0.31, + "learning_rate": 4.650606253988513e-06, + "loss": 0.5844, + "step": 664 + }, + { + "epoch": 0.31, + "learning_rate": 4.647415443522656e-06, + "loss": 0.7394, + "step": 668 + }, + { + "epoch": 0.31, + "learning_rate": 4.644224633056797e-06, + "loss": 0.6725, + "step": 672 + }, + { + "epoch": 0.31, + "learning_rate": 4.641033822590939e-06, + "loss": 0.6416, + "step": 676 + }, + { + "epoch": 0.32, + "learning_rate": 4.63784301212508e-06, + "loss": 0.7926, + "step": 680 + }, + { + "epoch": 0.32, + "learning_rate": 4.634652201659222e-06, + "loss": 0.5941, + "step": 684 + }, + { + "epoch": 0.32, + "learning_rate": 4.631461391193364e-06, + "loss": 0.9582, + "step": 688 + }, + { + "epoch": 0.32, + "learning_rate": 4.628270580727505e-06, + "loss": 0.4289, + "step": 692 + }, + { + "epoch": 0.32, + "learning_rate": 4.625079770261647e-06, + "loss": 0.6518, + "step": 696 + }, + { + "epoch": 0.32, + "learning_rate": 4.621888959795788e-06, + "loss": 0.8722, + "step": 700 + }, + { + "epoch": 0.33, + "learning_rate": 4.618698149329931e-06, + "loss": 0.5419, + "step": 704 + }, + { + "epoch": 0.33, + "learning_rate": 4.615507338864072e-06, + "loss": 0.6891, + "step": 708 + }, + { + "epoch": 0.33, + "learning_rate": 4.612316528398214e-06, + "loss": 0.5157, + "step": 712 + }, + { + "epoch": 0.33, + "learning_rate": 4.609125717932355e-06, + "loss": 0.7015, + "step": 716 + }, + { + "epoch": 0.33, + "learning_rate": 4.6059349074664965e-06, + "loss": 0.546, + "step": 720 + }, + { + "epoch": 0.34, + "learning_rate": 4.602744097000639e-06, + "loss": 0.6735, + "step": 724 + }, + { + "epoch": 0.34, + "learning_rate": 4.59955328653478e-06, + "loss": 0.5564, + "step": 728 + }, + { + "epoch": 0.34, + "learning_rate": 4.596362476068922e-06, + "loss": 0.5182, + "step": 732 + }, + { + "epoch": 0.34, + "learning_rate": 4.593171665603063e-06, + "loss": 0.4053, + "step": 736 + }, + { + "epoch": 0.34, + "learning_rate": 4.5899808551372056e-06, + "loss": 0.4039, + "step": 740 + }, + { + "epoch": 0.35, + "learning_rate": 4.586790044671347e-06, + "loss": 0.6502, + "step": 744 + }, + { + "epoch": 0.35, + "learning_rate": 4.5835992342054885e-06, + "loss": 0.8062, + "step": 748 + }, + { + "epoch": 0.35, + "learning_rate": 4.58040842373963e-06, + "loss": 0.4143, + "step": 752 + }, + { + "epoch": 0.35, + "learning_rate": 4.5772176132737715e-06, + "loss": 0.5539, + "step": 756 + }, + { + "epoch": 0.35, + "learning_rate": 4.574026802807914e-06, + "loss": 0.5926, + "step": 760 + }, + { + "epoch": 0.35, + "learning_rate": 4.570835992342055e-06, + "loss": 0.751, + "step": 764 + }, + { + "epoch": 0.36, + "learning_rate": 4.567645181876197e-06, + "loss": 0.5886, + "step": 768 + }, + { + "epoch": 0.36, + "learning_rate": 4.564454371410338e-06, + "loss": 0.677, + "step": 772 + }, + { + "epoch": 0.36, + "learning_rate": 4.5612635609444805e-06, + "loss": 0.7097, + "step": 776 + }, + { + "epoch": 0.36, + "learning_rate": 4.558072750478622e-06, + "loss": 0.56, + "step": 780 + }, + { + "epoch": 0.36, + "learning_rate": 4.5548819400127634e-06, + "loss": 0.4481, + "step": 784 + }, + { + "epoch": 0.37, + "learning_rate": 4.551691129546905e-06, + "loss": 0.4959, + "step": 788 + }, + { + "epoch": 0.37, + "learning_rate": 4.548500319081046e-06, + "loss": 0.8399, + "step": 792 + }, + { + "epoch": 0.37, + "learning_rate": 4.545309508615189e-06, + "loss": 0.6904, + "step": 796 + }, + { + "epoch": 0.37, + "learning_rate": 4.54211869814933e-06, + "loss": 0.8689, + "step": 800 + }, + { + "epoch": 0.37, + "learning_rate": 4.538927887683472e-06, + "loss": 0.6232, + "step": 804 + }, + { + "epoch": 0.37, + "learning_rate": 4.535737077217613e-06, + "loss": 0.6428, + "step": 808 + }, + { + "epoch": 0.38, + "learning_rate": 4.5325462667517554e-06, + "loss": 0.7462, + "step": 812 + }, + { + "epoch": 0.38, + "learning_rate": 4.529355456285897e-06, + "loss": 0.529, + "step": 816 + }, + { + "epoch": 0.38, + "learning_rate": 4.526164645820038e-06, + "loss": 0.4875, + "step": 820 + }, + { + "epoch": 0.38, + "learning_rate": 4.52297383535418e-06, + "loss": 0.6747, + "step": 824 + }, + { + "epoch": 0.38, + "learning_rate": 4.519783024888322e-06, + "loss": 0.7061, + "step": 828 + }, + { + "epoch": 0.39, + "learning_rate": 4.516592214422464e-06, + "loss": 0.7865, + "step": 832 + }, + { + "epoch": 0.39, + "learning_rate": 4.513401403956605e-06, + "loss": 0.5122, + "step": 836 + }, + { + "epoch": 0.39, + "learning_rate": 4.5102105934907466e-06, + "loss": 0.4014, + "step": 840 + }, + { + "epoch": 0.39, + "learning_rate": 4.507019783024889e-06, + "loss": 0.7509, + "step": 844 + }, + { + "epoch": 0.39, + "learning_rate": 4.50382897255903e-06, + "loss": 0.8073, + "step": 848 + }, + { + "epoch": 0.4, + "learning_rate": 4.500638162093172e-06, + "loss": 0.3459, + "step": 852 + }, + { + "epoch": 0.4, + "learning_rate": 4.497447351627314e-06, + "loss": 0.6814, + "step": 856 + }, + { + "epoch": 0.4, + "learning_rate": 4.494256541161456e-06, + "loss": 1.1027, + "step": 860 + }, + { + "epoch": 0.4, + "learning_rate": 4.491065730695597e-06, + "loss": 0.5254, + "step": 864 + }, + { + "epoch": 0.4, + "learning_rate": 4.4878749202297385e-06, + "loss": 0.7436, + "step": 868 + }, + { + "epoch": 0.4, + "learning_rate": 4.484684109763881e-06, + "loss": 0.4877, + "step": 872 + }, + { + "epoch": 0.41, + "learning_rate": 4.481493299298022e-06, + "loss": 0.657, + "step": 876 + }, + { + "epoch": 0.41, + "learning_rate": 4.478302488832164e-06, + "loss": 0.7193, + "step": 880 + }, + { + "epoch": 0.41, + "learning_rate": 4.475111678366305e-06, + "loss": 0.5461, + "step": 884 + }, + { + "epoch": 0.41, + "learning_rate": 4.471920867900448e-06, + "loss": 0.5707, + "step": 888 + }, + { + "epoch": 0.41, + "learning_rate": 4.468730057434589e-06, + "loss": 0.9755, + "step": 892 + }, + { + "epoch": 0.42, + "learning_rate": 4.4655392469687305e-06, + "loss": 0.551, + "step": 896 + }, + { + "epoch": 0.42, + "learning_rate": 4.462348436502872e-06, + "loss": 0.499, + "step": 900 + }, + { + "epoch": 0.42, + "learning_rate": 4.459157626037014e-06, + "loss": 0.4268, + "step": 904 + }, + { + "epoch": 0.42, + "learning_rate": 4.455966815571156e-06, + "loss": 0.6658, + "step": 908 + }, + { + "epoch": 0.42, + "learning_rate": 4.452776005105297e-06, + "loss": 0.5642, + "step": 912 + }, + { + "epoch": 0.42, + "learning_rate": 4.449585194639439e-06, + "loss": 0.6943, + "step": 916 + }, + { + "epoch": 0.43, + "learning_rate": 4.446394384173581e-06, + "loss": 0.5404, + "step": 920 + }, + { + "epoch": 0.43, + "learning_rate": 4.4432035737077225e-06, + "loss": 0.7934, + "step": 924 + }, + { + "epoch": 0.43, + "learning_rate": 4.440012763241864e-06, + "loss": 0.7138, + "step": 928 + }, + { + "epoch": 0.43, + "learning_rate": 4.4368219527760055e-06, + "loss": 0.5249, + "step": 932 + }, + { + "epoch": 0.43, + "learning_rate": 4.433631142310147e-06, + "loss": 0.9614, + "step": 936 + }, + { + "epoch": 0.44, + "learning_rate": 4.430440331844289e-06, + "loss": 0.5915, + "step": 940 + }, + { + "epoch": 0.44, + "learning_rate": 4.427249521378431e-06, + "loss": 0.6766, + "step": 944 + }, + { + "epoch": 0.44, + "learning_rate": 4.424058710912572e-06, + "loss": 0.6641, + "step": 948 + }, + { + "epoch": 0.44, + "learning_rate": 4.420867900446714e-06, + "loss": 0.4849, + "step": 952 + }, + { + "epoch": 0.44, + "learning_rate": 4.417677089980856e-06, + "loss": 0.7182, + "step": 956 + }, + { + "epoch": 0.45, + "learning_rate": 4.4144862795149974e-06, + "loss": 0.6782, + "step": 960 + }, + { + "epoch": 0.45, + "learning_rate": 4.411295469049139e-06, + "loss": 0.4837, + "step": 964 + }, + { + "epoch": 0.45, + "learning_rate": 4.40810465858328e-06, + "loss": 0.7323, + "step": 968 + }, + { + "epoch": 0.45, + "learning_rate": 4.404913848117422e-06, + "loss": 0.5807, + "step": 972 + }, + { + "epoch": 0.45, + "learning_rate": 4.401723037651564e-06, + "loss": 0.373, + "step": 976 + }, + { + "epoch": 0.45, + "learning_rate": 4.398532227185706e-06, + "loss": 0.5072, + "step": 980 + }, + { + "epoch": 0.46, + "learning_rate": 4.395341416719847e-06, + "loss": 0.5952, + "step": 984 + }, + { + "epoch": 0.46, + "learning_rate": 4.392150606253989e-06, + "loss": 0.549, + "step": 988 + }, + { + "epoch": 0.46, + "learning_rate": 4.388959795788131e-06, + "loss": 0.5918, + "step": 992 + }, + { + "epoch": 0.46, + "learning_rate": 4.385768985322272e-06, + "loss": 0.4411, + "step": 996 + }, + { + "epoch": 0.46, + "learning_rate": 4.382578174856414e-06, + "loss": 0.7001, + "step": 1000 + }, + { + "epoch": 0.47, + "learning_rate": 4.379387364390555e-06, + "loss": 0.744, + "step": 1004 + }, + { + "epoch": 0.47, + "learning_rate": 4.376196553924697e-06, + "loss": 0.4091, + "step": 1008 + }, + { + "epoch": 0.47, + "learning_rate": 4.373005743458839e-06, + "loss": 0.7464, + "step": 1012 + }, + { + "epoch": 0.47, + "learning_rate": 4.3698149329929806e-06, + "loss": 0.6164, + "step": 1016 + }, + { + "epoch": 0.47, + "learning_rate": 4.366624122527122e-06, + "loss": 0.6213, + "step": 1020 + }, + { + "epoch": 0.47, + "learning_rate": 4.3634333120612635e-06, + "loss": 0.6991, + "step": 1024 + }, + { + "epoch": 0.48, + "learning_rate": 4.360242501595406e-06, + "loss": 0.5268, + "step": 1028 + }, + { + "epoch": 0.48, + "learning_rate": 4.357051691129547e-06, + "loss": 0.7768, + "step": 1032 + }, + { + "epoch": 0.48, + "learning_rate": 4.353860880663689e-06, + "loss": 0.9204, + "step": 1036 + }, + { + "epoch": 0.48, + "learning_rate": 4.35067007019783e-06, + "loss": 0.5844, + "step": 1040 + }, + { + "epoch": 0.48, + "learning_rate": 4.347479259731972e-06, + "loss": 0.5198, + "step": 1044 + }, + { + "epoch": 0.49, + "learning_rate": 4.344288449266114e-06, + "loss": 0.3069, + "step": 1048 + }, + { + "epoch": 0.49, + "learning_rate": 4.3410976388002555e-06, + "loss": 0.5465, + "step": 1052 + }, + { + "epoch": 0.49, + "learning_rate": 4.337906828334397e-06, + "loss": 0.4729, + "step": 1056 + }, + { + "epoch": 0.49, + "learning_rate": 4.3347160178685384e-06, + "loss": 0.6514, + "step": 1060 + }, + { + "epoch": 0.49, + "learning_rate": 4.331525207402681e-06, + "loss": 0.8142, + "step": 1064 + }, + { + "epoch": 0.5, + "learning_rate": 4.328334396936822e-06, + "loss": 0.6477, + "step": 1068 + }, + { + "epoch": 0.5, + "learning_rate": 4.325143586470964e-06, + "loss": 0.4601, + "step": 1072 + }, + { + "epoch": 0.5, + "learning_rate": 4.321952776005105e-06, + "loss": 0.6687, + "step": 1076 + }, + { + "epoch": 0.5, + "learning_rate": 4.3187619655392475e-06, + "loss": 0.4565, + "step": 1080 + }, + { + "epoch": 0.5, + "learning_rate": 4.315571155073389e-06, + "loss": 0.646, + "step": 1084 + }, + { + "epoch": 0.5, + "learning_rate": 4.31238034460753e-06, + "loss": 0.6145, + "step": 1088 + }, + { + "epoch": 0.51, + "learning_rate": 4.309189534141672e-06, + "loss": 0.3854, + "step": 1092 + }, + { + "epoch": 0.51, + "learning_rate": 4.305998723675814e-06, + "loss": 0.6016, + "step": 1096 + }, + { + "epoch": 0.51, + "learning_rate": 4.302807913209956e-06, + "loss": 0.5223, + "step": 1100 + }, + { + "epoch": 0.51, + "learning_rate": 4.299617102744097e-06, + "loss": 0.6356, + "step": 1104 + }, + { + "epoch": 0.51, + "learning_rate": 4.2964262922782395e-06, + "loss": 0.4599, + "step": 1108 + }, + { + "epoch": 0.52, + "learning_rate": 4.293235481812381e-06, + "loss": 0.6452, + "step": 1112 + }, + { + "epoch": 0.52, + "learning_rate": 4.290044671346522e-06, + "loss": 0.386, + "step": 1116 + }, + { + "epoch": 0.52, + "learning_rate": 4.286853860880664e-06, + "loss": 0.6384, + "step": 1120 + }, + { + "epoch": 0.52, + "learning_rate": 4.283663050414806e-06, + "loss": 0.7654, + "step": 1124 + }, + { + "epoch": 0.52, + "learning_rate": 4.280472239948948e-06, + "loss": 0.6019, + "step": 1128 + }, + { + "epoch": 0.53, + "learning_rate": 4.277281429483089e-06, + "loss": 0.6078, + "step": 1132 + }, + { + "epoch": 0.53, + "learning_rate": 4.274090619017231e-06, + "loss": 0.5181, + "step": 1136 + }, + { + "epoch": 0.53, + "learning_rate": 4.270899808551373e-06, + "loss": 0.6731, + "step": 1140 + }, + { + "epoch": 0.53, + "learning_rate": 4.267708998085514e-06, + "loss": 0.4956, + "step": 1144 + }, + { + "epoch": 0.53, + "learning_rate": 4.264518187619656e-06, + "loss": 0.6115, + "step": 1148 + }, + { + "epoch": 0.53, + "learning_rate": 4.261327377153797e-06, + "loss": 0.7712, + "step": 1152 + }, + { + "epoch": 0.54, + "learning_rate": 4.25813656668794e-06, + "loss": 0.5086, + "step": 1156 + }, + { + "epoch": 0.54, + "learning_rate": 4.254945756222081e-06, + "loss": 0.7241, + "step": 1160 + }, + { + "epoch": 0.54, + "learning_rate": 4.251754945756223e-06, + "loss": 0.5275, + "step": 1164 + }, + { + "epoch": 0.54, + "learning_rate": 4.248564135290364e-06, + "loss": 0.7552, + "step": 1168 + }, + { + "epoch": 0.54, + "learning_rate": 4.245373324824506e-06, + "loss": 0.4292, + "step": 1172 + }, + { + "epoch": 0.55, + "learning_rate": 4.242182514358648e-06, + "loss": 0.7575, + "step": 1176 + }, + { + "epoch": 0.55, + "learning_rate": 4.238991703892789e-06, + "loss": 0.5653, + "step": 1180 + }, + { + "epoch": 0.55, + "learning_rate": 4.235800893426931e-06, + "loss": 0.6882, + "step": 1184 + }, + { + "epoch": 0.55, + "learning_rate": 4.232610082961072e-06, + "loss": 0.6488, + "step": 1188 + }, + { + "epoch": 0.55, + "learning_rate": 4.2294192724952146e-06, + "loss": 0.5522, + "step": 1192 + }, + { + "epoch": 0.55, + "learning_rate": 4.226228462029356e-06, + "loss": 0.578, + "step": 1196 + }, + { + "epoch": 0.56, + "learning_rate": 4.2230376515634975e-06, + "loss": 0.7412, + "step": 1200 + }, + { + "epoch": 0.56, + "learning_rate": 4.219846841097639e-06, + "loss": 0.5138, + "step": 1204 + }, + { + "epoch": 0.56, + "learning_rate": 4.216656030631781e-06, + "loss": 0.6943, + "step": 1208 + }, + { + "epoch": 0.56, + "learning_rate": 4.213465220165923e-06, + "loss": 0.4599, + "step": 1212 + }, + { + "epoch": 0.56, + "learning_rate": 4.210274409700064e-06, + "loss": 0.8815, + "step": 1216 + }, + { + "epoch": 0.57, + "learning_rate": 4.207083599234206e-06, + "loss": 0.6245, + "step": 1220 + }, + { + "epoch": 0.57, + "learning_rate": 4.203892788768347e-06, + "loss": 0.5513, + "step": 1224 + }, + { + "epoch": 0.57, + "learning_rate": 4.2007019783024895e-06, + "loss": 0.4635, + "step": 1228 + }, + { + "epoch": 0.57, + "learning_rate": 4.197511167836631e-06, + "loss": 0.5711, + "step": 1232 + }, + { + "epoch": 0.57, + "learning_rate": 4.1943203573707724e-06, + "loss": 0.5078, + "step": 1236 + }, + { + "epoch": 0.58, + "learning_rate": 4.191129546904914e-06, + "loss": 0.4304, + "step": 1240 + }, + { + "epoch": 0.58, + "learning_rate": 4.187938736439056e-06, + "loss": 0.715, + "step": 1244 + }, + { + "epoch": 0.58, + "learning_rate": 4.184747925973198e-06, + "loss": 0.6305, + "step": 1248 + }, + { + "epoch": 0.58, + "learning_rate": 4.181557115507339e-06, + "loss": 0.6243, + "step": 1252 + }, + { + "epoch": 0.58, + "learning_rate": 4.178366305041481e-06, + "loss": 0.6439, + "step": 1256 + }, + { + "epoch": 0.58, + "learning_rate": 4.175175494575622e-06, + "loss": 0.4782, + "step": 1260 + }, + { + "epoch": 0.59, + "learning_rate": 4.171984684109764e-06, + "loss": 0.4523, + "step": 1264 + }, + { + "epoch": 0.59, + "learning_rate": 4.168793873643906e-06, + "loss": 0.4884, + "step": 1268 + }, + { + "epoch": 0.59, + "learning_rate": 4.165603063178047e-06, + "loss": 0.3461, + "step": 1272 + }, + { + "epoch": 0.59, + "learning_rate": 4.162412252712189e-06, + "loss": 0.2459, + "step": 1276 + }, + { + "epoch": 0.59, + "learning_rate": 4.159221442246331e-06, + "loss": 0.8138, + "step": 1280 + }, + { + "epoch": 0.6, + "learning_rate": 4.156030631780473e-06, + "loss": 0.6026, + "step": 1284 + }, + { + "epoch": 0.6, + "learning_rate": 4.152839821314614e-06, + "loss": 0.5463, + "step": 1288 + }, + { + "epoch": 0.6, + "learning_rate": 4.1496490108487556e-06, + "loss": 0.4317, + "step": 1292 + }, + { + "epoch": 0.6, + "learning_rate": 4.146458200382897e-06, + "loss": 0.6244, + "step": 1296 + }, + { + "epoch": 0.6, + "learning_rate": 4.143267389917039e-06, + "loss": 0.554, + "step": 1300 + }, + { + "epoch": 0.6, + "learning_rate": 4.140076579451181e-06, + "loss": 0.6441, + "step": 1304 + }, + { + "epoch": 0.61, + "learning_rate": 4.136885768985322e-06, + "loss": 0.6233, + "step": 1308 + }, + { + "epoch": 0.61, + "learning_rate": 4.133694958519464e-06, + "loss": 0.5561, + "step": 1312 + }, + { + "epoch": 0.61, + "learning_rate": 4.130504148053606e-06, + "loss": 0.7524, + "step": 1316 + }, + { + "epoch": 0.61, + "learning_rate": 4.1273133375877475e-06, + "loss": 0.4338, + "step": 1320 + }, + { + "epoch": 0.61, + "learning_rate": 4.124122527121889e-06, + "loss": 0.4495, + "step": 1324 + }, + { + "epoch": 0.62, + "learning_rate": 4.1209317166560305e-06, + "loss": 0.5139, + "step": 1328 + }, + { + "epoch": 0.62, + "learning_rate": 4.117740906190173e-06, + "loss": 0.6545, + "step": 1332 + }, + { + "epoch": 0.62, + "learning_rate": 4.114550095724314e-06, + "loss": 0.5588, + "step": 1336 + }, + { + "epoch": 0.62, + "learning_rate": 4.111359285258456e-06, + "loss": 0.609, + "step": 1340 + }, + { + "epoch": 0.62, + "learning_rate": 4.108168474792597e-06, + "loss": 0.553, + "step": 1344 + }, + { + "epoch": 0.63, + "learning_rate": 4.1049776643267395e-06, + "loss": 0.5844, + "step": 1348 + }, + { + "epoch": 0.63, + "learning_rate": 4.101786853860881e-06, + "loss": 0.5779, + "step": 1352 + }, + { + "epoch": 0.63, + "learning_rate": 4.0985960433950225e-06, + "loss": 0.4207, + "step": 1356 + }, + { + "epoch": 0.63, + "learning_rate": 4.095405232929165e-06, + "loss": 0.4617, + "step": 1360 + }, + { + "epoch": 0.63, + "learning_rate": 4.092214422463306e-06, + "loss": 0.6092, + "step": 1364 + }, + { + "epoch": 0.63, + "learning_rate": 4.089023611997448e-06, + "loss": 0.4607, + "step": 1368 + }, + { + "epoch": 0.64, + "learning_rate": 4.085832801531589e-06, + "loss": 0.4239, + "step": 1372 + }, + { + "epoch": 0.64, + "learning_rate": 4.0826419910657315e-06, + "loss": 0.5438, + "step": 1376 + }, + { + "epoch": 0.64, + "learning_rate": 4.079451180599873e-06, + "loss": 0.5006, + "step": 1380 + }, + { + "epoch": 0.64, + "learning_rate": 4.0762603701340144e-06, + "loss": 0.6889, + "step": 1384 + }, + { + "epoch": 0.64, + "learning_rate": 4.073069559668156e-06, + "loss": 0.5742, + "step": 1388 + }, + { + "epoch": 0.65, + "learning_rate": 4.069878749202298e-06, + "loss": 0.8366, + "step": 1392 + }, + { + "epoch": 0.65, + "learning_rate": 4.06668793873644e-06, + "loss": 0.5182, + "step": 1396 + }, + { + "epoch": 0.65, + "learning_rate": 4.063497128270581e-06, + "loss": 0.4807, + "step": 1400 + }, + { + "epoch": 0.65, + "learning_rate": 4.060306317804723e-06, + "loss": 0.3995, + "step": 1404 + }, + { + "epoch": 0.65, + "learning_rate": 4.057115507338865e-06, + "loss": 0.5958, + "step": 1408 + }, + { + "epoch": 0.65, + "learning_rate": 4.0539246968730064e-06, + "loss": 0.4855, + "step": 1412 + }, + { + "epoch": 0.66, + "learning_rate": 4.050733886407148e-06, + "loss": 0.5908, + "step": 1416 + }, + { + "epoch": 0.66, + "learning_rate": 4.047543075941289e-06, + "loss": 0.7867, + "step": 1420 + }, + { + "epoch": 0.66, + "learning_rate": 4.044352265475432e-06, + "loss": 0.7617, + "step": 1424 + }, + { + "epoch": 0.66, + "learning_rate": 4.041161455009573e-06, + "loss": 0.4752, + "step": 1428 + }, + { + "epoch": 0.66, + "learning_rate": 4.037970644543715e-06, + "loss": 0.4732, + "step": 1432 + }, + { + "epoch": 0.67, + "learning_rate": 4.034779834077856e-06, + "loss": 0.635, + "step": 1436 + }, + { + "epoch": 0.67, + "learning_rate": 4.0315890236119976e-06, + "loss": 0.4924, + "step": 1440 + }, + { + "epoch": 0.67, + "learning_rate": 4.02839821314614e-06, + "loss": 0.4416, + "step": 1444 + }, + { + "epoch": 0.67, + "learning_rate": 4.025207402680281e-06, + "loss": 0.4448, + "step": 1448 + }, + { + "epoch": 0.67, + "learning_rate": 4.022016592214423e-06, + "loss": 0.7631, + "step": 1452 + }, + { + "epoch": 0.68, + "learning_rate": 4.018825781748564e-06, + "loss": 0.5035, + "step": 1456 + }, + { + "epoch": 0.68, + "learning_rate": 4.015634971282707e-06, + "loss": 0.3779, + "step": 1460 + }, + { + "epoch": 0.68, + "learning_rate": 4.012444160816848e-06, + "loss": 0.4924, + "step": 1464 + }, + { + "epoch": 0.68, + "learning_rate": 4.0092533503509896e-06, + "loss": 0.3932, + "step": 1468 + }, + { + "epoch": 0.68, + "learning_rate": 4.006062539885131e-06, + "loss": 0.6974, + "step": 1472 + }, + { + "epoch": 0.68, + "learning_rate": 4.0028717294192725e-06, + "loss": 0.7347, + "step": 1476 + }, + { + "epoch": 0.69, + "learning_rate": 3.999680918953415e-06, + "loss": 0.5564, + "step": 1480 + }, + { + "epoch": 0.69, + "learning_rate": 3.996490108487556e-06, + "loss": 0.4424, + "step": 1484 + }, + { + "epoch": 0.69, + "learning_rate": 3.993299298021698e-06, + "loss": 0.5323, + "step": 1488 + }, + { + "epoch": 0.69, + "learning_rate": 3.990108487555839e-06, + "loss": 0.6138, + "step": 1492 + }, + { + "epoch": 0.69, + "learning_rate": 3.9869176770899815e-06, + "loss": 0.5156, + "step": 1496 + }, + { + "epoch": 0.7, + "learning_rate": 3.983726866624123e-06, + "loss": 0.282, + "step": 1500 + }, + { + "epoch": 0.7, + "learning_rate": 3.9805360561582645e-06, + "loss": 0.5392, + "step": 1504 + }, + { + "epoch": 0.7, + "learning_rate": 3.977345245692406e-06, + "loss": 0.5721, + "step": 1508 + }, + { + "epoch": 0.7, + "learning_rate": 3.974154435226547e-06, + "loss": 0.6967, + "step": 1512 + }, + { + "epoch": 0.7, + "learning_rate": 3.97096362476069e-06, + "loss": 0.5348, + "step": 1516 + }, + { + "epoch": 0.71, + "learning_rate": 3.967772814294831e-06, + "loss": 0.6884, + "step": 1520 + }, + { + "epoch": 0.71, + "learning_rate": 3.964582003828973e-06, + "loss": 0.5065, + "step": 1524 + }, + { + "epoch": 0.71, + "learning_rate": 3.961391193363114e-06, + "loss": 0.4505, + "step": 1528 + }, + { + "epoch": 0.71, + "learning_rate": 3.9582003828972565e-06, + "loss": 0.6881, + "step": 1532 + }, + { + "epoch": 0.71, + "learning_rate": 3.955009572431398e-06, + "loss": 0.5952, + "step": 1536 + }, + { + "epoch": 0.71, + "learning_rate": 3.951818761965539e-06, + "loss": 0.5656, + "step": 1540 + }, + { + "epoch": 0.72, + "learning_rate": 3.948627951499681e-06, + "loss": 0.6437, + "step": 1544 + }, + { + "epoch": 0.72, + "learning_rate": 3.945437141033822e-06, + "loss": 0.5179, + "step": 1548 + }, + { + "epoch": 0.72, + "learning_rate": 3.942246330567965e-06, + "loss": 0.5278, + "step": 1552 + }, + { + "epoch": 0.72, + "learning_rate": 3.939055520102106e-06, + "loss": 0.6951, + "step": 1556 + }, + { + "epoch": 0.72, + "learning_rate": 3.935864709636248e-06, + "loss": 0.5468, + "step": 1560 + }, + { + "epoch": 0.73, + "learning_rate": 3.932673899170389e-06, + "loss": 0.5132, + "step": 1564 + }, + { + "epoch": 0.73, + "learning_rate": 3.929483088704531e-06, + "loss": 0.6297, + "step": 1568 + }, + { + "epoch": 0.73, + "learning_rate": 3.926292278238673e-06, + "loss": 0.5472, + "step": 1572 + }, + { + "epoch": 0.73, + "learning_rate": 3.923101467772814e-06, + "loss": 0.6623, + "step": 1576 + }, + { + "epoch": 0.73, + "learning_rate": 3.919910657306956e-06, + "loss": 0.6216, + "step": 1580 + }, + { + "epoch": 0.73, + "learning_rate": 3.916719846841098e-06, + "loss": 0.5332, + "step": 1584 + }, + { + "epoch": 0.74, + "learning_rate": 3.91352903637524e-06, + "loss": 0.4792, + "step": 1588 + }, + { + "epoch": 0.74, + "learning_rate": 3.910338225909381e-06, + "loss": 0.4573, + "step": 1592 + }, + { + "epoch": 0.74, + "learning_rate": 3.9071474154435225e-06, + "loss": 0.5135, + "step": 1596 + }, + { + "epoch": 0.74, + "learning_rate": 3.903956604977665e-06, + "loss": 0.7619, + "step": 1600 + }, + { + "epoch": 0.74, + "learning_rate": 3.900765794511806e-06, + "loss": 0.6681, + "step": 1604 + }, + { + "epoch": 0.75, + "learning_rate": 3.897574984045948e-06, + "loss": 0.7789, + "step": 1608 + }, + { + "epoch": 0.75, + "learning_rate": 3.89438417358009e-06, + "loss": 0.6078, + "step": 1612 + }, + { + "epoch": 0.75, + "learning_rate": 3.8911933631142316e-06, + "loss": 0.4812, + "step": 1616 + }, + { + "epoch": 0.75, + "learning_rate": 3.888002552648373e-06, + "loss": 0.5893, + "step": 1620 + }, + { + "epoch": 0.75, + "learning_rate": 3.8848117421825145e-06, + "loss": 0.4775, + "step": 1624 + }, + { + "epoch": 0.76, + "learning_rate": 3.881620931716657e-06, + "loss": 0.5012, + "step": 1628 + }, + { + "epoch": 0.76, + "learning_rate": 3.878430121250798e-06, + "loss": 0.4752, + "step": 1632 + }, + { + "epoch": 0.76, + "learning_rate": 3.87523931078494e-06, + "loss": 0.4365, + "step": 1636 + }, + { + "epoch": 0.76, + "learning_rate": 3.872048500319081e-06, + "loss": 0.6722, + "step": 1640 + }, + { + "epoch": 0.76, + "learning_rate": 3.8688576898532236e-06, + "loss": 0.6083, + "step": 1644 + }, + { + "epoch": 0.76, + "learning_rate": 3.865666879387365e-06, + "loss": 0.4533, + "step": 1648 + }, + { + "epoch": 0.77, + "learning_rate": 3.8624760689215065e-06, + "loss": 0.5879, + "step": 1652 + }, + { + "epoch": 0.77, + "learning_rate": 3.859285258455648e-06, + "loss": 0.6564, + "step": 1656 + }, + { + "epoch": 0.77, + "learning_rate": 3.85609444798979e-06, + "loss": 0.5475, + "step": 1660 + }, + { + "epoch": 0.77, + "learning_rate": 3.852903637523932e-06, + "loss": 0.5018, + "step": 1664 + }, + { + "epoch": 0.77, + "learning_rate": 3.849712827058073e-06, + "loss": 0.4544, + "step": 1668 + }, + { + "epoch": 0.78, + "learning_rate": 3.846522016592215e-06, + "loss": 0.6603, + "step": 1672 + }, + { + "epoch": 0.78, + "learning_rate": 3.843331206126357e-06, + "loss": 0.6887, + "step": 1676 + }, + { + "epoch": 0.78, + "learning_rate": 3.8401403956604985e-06, + "loss": 0.7819, + "step": 1680 + }, + { + "epoch": 0.78, + "learning_rate": 3.83694958519464e-06, + "loss": 0.5052, + "step": 1684 + }, + { + "epoch": 0.78, + "learning_rate": 3.833758774728781e-06, + "loss": 0.6689, + "step": 1688 + }, + { + "epoch": 0.78, + "learning_rate": 3.830567964262923e-06, + "loss": 0.5564, + "step": 1692 + }, + { + "epoch": 0.79, + "learning_rate": 3.827377153797065e-06, + "loss": 0.3658, + "step": 1696 + }, + { + "epoch": 0.79, + "learning_rate": 3.824186343331207e-06, + "loss": 0.6376, + "step": 1700 + }, + { + "epoch": 0.79, + "learning_rate": 3.820995532865348e-06, + "loss": 0.5681, + "step": 1704 + }, + { + "epoch": 0.79, + "learning_rate": 3.81780472239949e-06, + "loss": 0.5974, + "step": 1708 + }, + { + "epoch": 0.79, + "learning_rate": 3.814613911933632e-06, + "loss": 0.5623, + "step": 1712 + }, + { + "epoch": 0.8, + "learning_rate": 3.8114231014677734e-06, + "loss": 0.6437, + "step": 1716 + }, + { + "epoch": 0.8, + "learning_rate": 3.808232291001915e-06, + "loss": 0.6442, + "step": 1720 + }, + { + "epoch": 0.8, + "learning_rate": 3.8050414805360563e-06, + "loss": 0.4729, + "step": 1724 + }, + { + "epoch": 0.8, + "learning_rate": 3.801850670070198e-06, + "loss": 0.3677, + "step": 1728 + }, + { + "epoch": 0.8, + "learning_rate": 3.79865985960434e-06, + "loss": 0.4295, + "step": 1732 + }, + { + "epoch": 0.81, + "learning_rate": 3.7954690491384816e-06, + "loss": 0.6049, + "step": 1736 + }, + { + "epoch": 0.81, + "learning_rate": 3.792278238672623e-06, + "loss": 0.6363, + "step": 1740 + }, + { + "epoch": 0.81, + "learning_rate": 3.7890874282067645e-06, + "loss": 0.5939, + "step": 1744 + }, + { + "epoch": 0.81, + "learning_rate": 3.785896617740907e-06, + "loss": 0.5011, + "step": 1748 + }, + { + "epoch": 0.81, + "learning_rate": 3.7827058072750483e-06, + "loss": 0.5177, + "step": 1752 + }, + { + "epoch": 0.81, + "learning_rate": 3.77951499680919e-06, + "loss": 0.7722, + "step": 1756 + }, + { + "epoch": 0.82, + "learning_rate": 3.7763241863433313e-06, + "loss": 0.5204, + "step": 1760 + }, + { + "epoch": 0.82, + "learning_rate": 3.773133375877473e-06, + "loss": 0.455, + "step": 1764 + }, + { + "epoch": 0.82, + "learning_rate": 3.769942565411615e-06, + "loss": 0.5397, + "step": 1768 + }, + { + "epoch": 0.82, + "learning_rate": 3.7667517549457565e-06, + "loss": 0.5528, + "step": 1772 + }, + { + "epoch": 0.82, + "learning_rate": 3.763560944479898e-06, + "loss": 0.5286, + "step": 1776 + }, + { + "epoch": 0.83, + "learning_rate": 3.76037013401404e-06, + "loss": 0.5475, + "step": 1780 + }, + { + "epoch": 0.83, + "learning_rate": 3.7571793235481818e-06, + "loss": 0.3887, + "step": 1784 + }, + { + "epoch": 0.83, + "learning_rate": 3.7539885130823233e-06, + "loss": 0.6288, + "step": 1788 + }, + { + "epoch": 0.83, + "learning_rate": 3.7507977026164647e-06, + "loss": 0.5563, + "step": 1792 + }, + { + "epoch": 0.83, + "learning_rate": 3.7476068921506066e-06, + "loss": 0.6103, + "step": 1796 + }, + { + "epoch": 0.83, + "learning_rate": 3.744416081684748e-06, + "loss": 0.4141, + "step": 1800 + }, + { + "epoch": 0.84, + "learning_rate": 3.74122527121889e-06, + "loss": 0.4075, + "step": 1804 + }, + { + "epoch": 0.84, + "learning_rate": 3.738034460753032e-06, + "loss": 0.3594, + "step": 1808 + }, + { + "epoch": 0.84, + "learning_rate": 3.7348436502871733e-06, + "loss": 0.5157, + "step": 1812 + }, + { + "epoch": 0.84, + "learning_rate": 3.731652839821315e-06, + "loss": 0.4918, + "step": 1816 + }, + { + "epoch": 0.84, + "learning_rate": 3.7284620293554563e-06, + "loss": 0.4456, + "step": 1820 + }, + { + "epoch": 0.85, + "learning_rate": 3.7252712188895986e-06, + "loss": 0.7768, + "step": 1824 + }, + { + "epoch": 0.85, + "learning_rate": 3.72208040842374e-06, + "loss": 0.7511, + "step": 1828 + }, + { + "epoch": 0.85, + "learning_rate": 3.7188895979578815e-06, + "loss": 0.4604, + "step": 1832 + }, + { + "epoch": 0.85, + "learning_rate": 3.715698787492023e-06, + "loss": 0.6048, + "step": 1836 + }, + { + "epoch": 0.85, + "learning_rate": 3.7125079770261653e-06, + "loss": 0.6261, + "step": 1840 + }, + { + "epoch": 0.86, + "learning_rate": 3.709317166560307e-06, + "loss": 0.7588, + "step": 1844 + }, + { + "epoch": 0.86, + "learning_rate": 3.7061263560944483e-06, + "loss": 0.6608, + "step": 1848 + }, + { + "epoch": 0.86, + "learning_rate": 3.7029355456285897e-06, + "loss": 0.5453, + "step": 1852 + }, + { + "epoch": 0.86, + "learning_rate": 3.6997447351627312e-06, + "loss": 0.4361, + "step": 1856 + }, + { + "epoch": 0.86, + "learning_rate": 3.6965539246968735e-06, + "loss": 0.5557, + "step": 1860 + }, + { + "epoch": 0.86, + "learning_rate": 3.693363114231015e-06, + "loss": 0.6371, + "step": 1864 + }, + { + "epoch": 0.87, + "learning_rate": 3.6901723037651565e-06, + "loss": 0.4953, + "step": 1868 + }, + { + "epoch": 0.87, + "learning_rate": 3.686981493299298e-06, + "loss": 0.4157, + "step": 1872 + }, + { + "epoch": 0.87, + "learning_rate": 3.6837906828334403e-06, + "loss": 0.5469, + "step": 1876 + }, + { + "epoch": 0.87, + "learning_rate": 3.6805998723675817e-06, + "loss": 0.4933, + "step": 1880 + }, + { + "epoch": 0.87, + "learning_rate": 3.677409061901723e-06, + "loss": 0.4994, + "step": 1884 + }, + { + "epoch": 0.88, + "learning_rate": 3.6742182514358647e-06, + "loss": 0.3726, + "step": 1888 + }, + { + "epoch": 0.88, + "learning_rate": 3.6710274409700066e-06, + "loss": 0.5413, + "step": 1892 + }, + { + "epoch": 0.88, + "learning_rate": 3.6678366305041485e-06, + "loss": 0.574, + "step": 1896 + }, + { + "epoch": 0.88, + "learning_rate": 3.66464582003829e-06, + "loss": 0.2569, + "step": 1900 + }, + { + "epoch": 0.88, + "learning_rate": 3.6614550095724314e-06, + "loss": 0.5012, + "step": 1904 + }, + { + "epoch": 0.88, + "learning_rate": 3.6582641991065733e-06, + "loss": 0.586, + "step": 1908 + }, + { + "epoch": 0.89, + "learning_rate": 3.655073388640715e-06, + "loss": 0.4588, + "step": 1912 + }, + { + "epoch": 0.89, + "learning_rate": 3.6518825781748567e-06, + "loss": 0.3745, + "step": 1916 + }, + { + "epoch": 0.89, + "learning_rate": 3.6486917677089985e-06, + "loss": 0.5444, + "step": 1920 + }, + { + "epoch": 0.89, + "learning_rate": 3.64550095724314e-06, + "loss": 0.5545, + "step": 1924 + }, + { + "epoch": 0.89, + "learning_rate": 3.6423101467772815e-06, + "loss": 0.6965, + "step": 1928 + }, + { + "epoch": 0.9, + "learning_rate": 3.6391193363114234e-06, + "loss": 0.4442, + "step": 1932 + }, + { + "epoch": 0.9, + "learning_rate": 3.6359285258455653e-06, + "loss": 0.4866, + "step": 1936 + }, + { + "epoch": 0.9, + "learning_rate": 3.6327377153797067e-06, + "loss": 0.5114, + "step": 1940 + }, + { + "epoch": 0.9, + "learning_rate": 3.6295469049138482e-06, + "loss": 0.5922, + "step": 1944 + }, + { + "epoch": 0.9, + "learning_rate": 3.62635609444799e-06, + "loss": 0.4787, + "step": 1948 + }, + { + "epoch": 0.91, + "learning_rate": 3.623165283982132e-06, + "loss": 0.6709, + "step": 1952 + }, + { + "epoch": 0.91, + "learning_rate": 3.6199744735162735e-06, + "loss": 0.5078, + "step": 1956 + }, + { + "epoch": 0.91, + "learning_rate": 3.616783663050415e-06, + "loss": 0.5999, + "step": 1960 + }, + { + "epoch": 0.91, + "learning_rate": 3.6135928525845564e-06, + "loss": 0.5051, + "step": 1964 + }, + { + "epoch": 0.91, + "learning_rate": 3.6111997447351634e-06, + "loss": 0.4373, + "step": 1968 + }, + { + "epoch": 0.91, + "learning_rate": 3.608008934269305e-06, + "loss": 0.7497, + "step": 1972 + }, + { + "epoch": 0.92, + "learning_rate": 3.6048181238034463e-06, + "loss": 0.458, + "step": 1976 + }, + { + "epoch": 0.92, + "learning_rate": 3.6016273133375878e-06, + "loss": 0.3981, + "step": 1980 + }, + { + "epoch": 0.92, + "learning_rate": 3.59843650287173e-06, + "loss": 0.4995, + "step": 1984 + }, + { + "epoch": 0.92, + "learning_rate": 3.5952456924058716e-06, + "loss": 0.493, + "step": 1988 + }, + { + "epoch": 0.92, + "learning_rate": 3.592054881940013e-06, + "loss": 0.462, + "step": 1992 + }, + { + "epoch": 0.93, + "learning_rate": 3.5888640714741545e-06, + "loss": 0.5239, + "step": 1996 + }, + { + "epoch": 0.93, + "learning_rate": 3.585673261008296e-06, + "loss": 0.4376, + "step": 2000 + }, + { + "epoch": 0.93, + "learning_rate": 3.5824824505424383e-06, + "loss": 0.6067, + "step": 2004 + }, + { + "epoch": 0.93, + "learning_rate": 3.5792916400765798e-06, + "loss": 0.4091, + "step": 2008 + }, + { + "epoch": 0.93, + "learning_rate": 3.5761008296107212e-06, + "loss": 0.5261, + "step": 2012 + }, + { + "epoch": 0.94, + "learning_rate": 3.5729100191448627e-06, + "loss": 0.5408, + "step": 2016 + }, + { + "epoch": 0.94, + "learning_rate": 3.569719208679005e-06, + "loss": 0.5867, + "step": 2020 + }, + { + "epoch": 0.94, + "learning_rate": 3.5665283982131465e-06, + "loss": 0.636, + "step": 2024 + }, + { + "epoch": 0.94, + "learning_rate": 3.563337587747288e-06, + "loss": 0.4329, + "step": 2028 + }, + { + "epoch": 0.94, + "learning_rate": 3.56014677728143e-06, + "loss": 0.7026, + "step": 2032 + }, + { + "epoch": 0.94, + "learning_rate": 3.5569559668155713e-06, + "loss": 0.5245, + "step": 2036 + }, + { + "epoch": 0.95, + "learning_rate": 3.553765156349713e-06, + "loss": 0.4929, + "step": 2040 + }, + { + "epoch": 0.95, + "learning_rate": 3.5505743458838547e-06, + "loss": 0.4876, + "step": 2044 + }, + { + "epoch": 0.95, + "learning_rate": 3.5473835354179966e-06, + "loss": 0.45, + "step": 2048 + }, + { + "epoch": 0.95, + "learning_rate": 3.544192724952138e-06, + "loss": 0.5068, + "step": 2052 + }, + { + "epoch": 0.95, + "learning_rate": 3.54100191448628e-06, + "loss": 0.5647, + "step": 2056 + }, + { + "epoch": 0.96, + "learning_rate": 3.5378111040204214e-06, + "loss": 0.5048, + "step": 2060 + }, + { + "epoch": 0.96, + "learning_rate": 3.5346202935545633e-06, + "loss": 0.457, + "step": 2064 + }, + { + "epoch": 0.96, + "learning_rate": 3.5314294830887048e-06, + "loss": 0.4089, + "step": 2068 + }, + { + "epoch": 0.96, + "learning_rate": 3.5282386726228462e-06, + "loss": 0.3521, + "step": 2072 + }, + { + "epoch": 0.96, + "learning_rate": 3.5250478621569886e-06, + "loss": 0.3477, + "step": 2076 + }, + { + "epoch": 0.96, + "learning_rate": 3.52185705169113e-06, + "loss": 0.6625, + "step": 2080 + }, + { + "epoch": 0.97, + "learning_rate": 3.5186662412252715e-06, + "loss": 0.3829, + "step": 2084 + }, + { + "epoch": 0.97, + "learning_rate": 3.515475430759413e-06, + "loss": 0.4733, + "step": 2088 + }, + { + "epoch": 0.97, + "learning_rate": 3.5122846202935553e-06, + "loss": 0.4024, + "step": 2092 + }, + { + "epoch": 0.97, + "learning_rate": 3.5090938098276968e-06, + "loss": 0.5733, + "step": 2096 + }, + { + "epoch": 0.97, + "learning_rate": 3.5059029993618382e-06, + "loss": 0.5788, + "step": 2100 + }, + { + "epoch": 0.98, + "learning_rate": 3.5027121888959797e-06, + "loss": 0.4806, + "step": 2104 + }, + { + "epoch": 0.98, + "learning_rate": 3.499521378430121e-06, + "loss": 0.5091, + "step": 2108 + }, + { + "epoch": 0.98, + "learning_rate": 3.4963305679642635e-06, + "loss": 0.6465, + "step": 2112 + }, + { + "epoch": 0.98, + "learning_rate": 3.493139757498405e-06, + "loss": 0.4821, + "step": 2116 + }, + { + "epoch": 0.98, + "learning_rate": 3.4899489470325464e-06, + "loss": 0.3563, + "step": 2120 + }, + { + "epoch": 0.99, + "learning_rate": 3.486758136566688e-06, + "loss": 0.7174, + "step": 2124 + }, + { + "epoch": 0.99, + "learning_rate": 3.4835673261008302e-06, + "loss": 0.3833, + "step": 2128 + }, + { + "epoch": 0.99, + "learning_rate": 3.4803765156349717e-06, + "loss": 0.6688, + "step": 2132 + }, + { + "epoch": 0.99, + "learning_rate": 3.477185705169113e-06, + "loss": 0.5733, + "step": 2136 + }, + { + "epoch": 0.99, + "learning_rate": 3.4739948947032546e-06, + "loss": 0.5743, + "step": 2140 + }, + { + "epoch": 0.99, + "learning_rate": 3.4708040842373965e-06, + "loss": 0.5219, + "step": 2144 + }, + { + "epoch": 1.0, + "learning_rate": 3.4676132737715384e-06, + "loss": 0.5964, + "step": 2148 + }, + { + "epoch": 1.0, + "learning_rate": 3.46442246330568e-06, + "loss": 0.5981, + "step": 2152 + }, + { + "epoch": 1.0, + "learning_rate": 3.4612316528398214e-06, + "loss": 0.4068, + "step": 2156 + }, + { + "epoch": 1.0, + "learning_rate": 3.4580408423739632e-06, + "loss": 0.3966, + "step": 2160 + }, + { + "epoch": 1.0, + "learning_rate": 3.454850031908105e-06, + "loss": 0.2291, + "step": 2164 + }, + { + "epoch": 1.01, + "learning_rate": 3.4516592214422466e-06, + "loss": 0.4695, + "step": 2168 + }, + { + "epoch": 1.01, + "learning_rate": 3.4484684109763885e-06, + "loss": 0.5594, + "step": 2172 + }, + { + "epoch": 1.01, + "learning_rate": 3.44527760051053e-06, + "loss": 0.5603, + "step": 2176 + }, + { + "epoch": 1.01, + "learning_rate": 3.4420867900446714e-06, + "loss": 0.4934, + "step": 2180 + }, + { + "epoch": 1.01, + "learning_rate": 3.4388959795788133e-06, + "loss": 0.6316, + "step": 2184 + }, + { + "epoch": 1.01, + "learning_rate": 3.4357051691129552e-06, + "loss": 0.3424, + "step": 2188 + }, + { + "epoch": 1.02, + "learning_rate": 3.4325143586470967e-06, + "loss": 0.566, + "step": 2192 + }, + { + "epoch": 1.02, + "learning_rate": 3.429323548181238e-06, + "loss": 0.3565, + "step": 2196 + }, + { + "epoch": 1.02, + "learning_rate": 3.42613273771538e-06, + "loss": 0.5191, + "step": 2200 + }, + { + "epoch": 1.02, + "learning_rate": 3.422941927249522e-06, + "loss": 0.3848, + "step": 2204 + }, + { + "epoch": 1.02, + "learning_rate": 3.4197511167836634e-06, + "loss": 0.6962, + "step": 2208 + }, + { + "epoch": 1.03, + "learning_rate": 3.416560306317805e-06, + "loss": 0.3646, + "step": 2212 + }, + { + "epoch": 1.03, + "learning_rate": 3.4133694958519464e-06, + "loss": 0.3756, + "step": 2216 + }, + { + "epoch": 1.03, + "learning_rate": 3.4101786853860887e-06, + "loss": 0.2853, + "step": 2220 + }, + { + "epoch": 1.03, + "learning_rate": 3.40698787492023e-06, + "loss": 0.2925, + "step": 2224 + }, + { + "epoch": 1.03, + "learning_rate": 3.4037970644543716e-06, + "loss": 0.3838, + "step": 2228 + }, + { + "epoch": 1.04, + "learning_rate": 3.400606253988513e-06, + "loss": 0.4479, + "step": 2232 + }, + { + "epoch": 1.04, + "learning_rate": 3.3974154435226554e-06, + "loss": 0.5207, + "step": 2236 + }, + { + "epoch": 1.04, + "learning_rate": 3.394224633056797e-06, + "loss": 0.3813, + "step": 2240 + }, + { + "epoch": 1.04, + "learning_rate": 3.3910338225909384e-06, + "loss": 0.4028, + "step": 2244 + }, + { + "epoch": 1.04, + "learning_rate": 3.38784301212508e-06, + "loss": 0.3406, + "step": 2248 + }, + { + "epoch": 1.04, + "learning_rate": 3.3846522016592213e-06, + "loss": 0.4495, + "step": 2252 + }, + { + "epoch": 1.05, + "learning_rate": 3.3814613911933636e-06, + "loss": 0.5411, + "step": 2256 + }, + { + "epoch": 1.05, + "learning_rate": 3.378270580727505e-06, + "loss": 0.3533, + "step": 2260 + }, + { + "epoch": 1.05, + "learning_rate": 3.3750797702616465e-06, + "loss": 0.5577, + "step": 2264 + }, + { + "epoch": 1.05, + "learning_rate": 3.371888959795788e-06, + "loss": 0.4198, + "step": 2268 + }, + { + "epoch": 1.05, + "learning_rate": 3.3686981493299303e-06, + "loss": 0.2956, + "step": 2272 + }, + { + "epoch": 1.06, + "learning_rate": 3.365507338864072e-06, + "loss": 0.5714, + "step": 2276 + }, + { + "epoch": 1.06, + "learning_rate": 3.3623165283982133e-06, + "loss": 0.3393, + "step": 2280 + }, + { + "epoch": 1.06, + "learning_rate": 3.359125717932355e-06, + "loss": 0.3448, + "step": 2284 + }, + { + "epoch": 1.06, + "learning_rate": 3.3559349074664966e-06, + "loss": 0.4956, + "step": 2288 + }, + { + "epoch": 1.06, + "learning_rate": 3.3527440970006385e-06, + "loss": 0.4609, + "step": 2292 + }, + { + "epoch": 1.06, + "learning_rate": 3.34955328653478e-06, + "loss": 0.4499, + "step": 2296 + }, + { + "epoch": 1.07, + "learning_rate": 3.346362476068922e-06, + "loss": 0.3638, + "step": 2300 + }, + { + "epoch": 1.07, + "learning_rate": 3.3431716656030634e-06, + "loss": 0.6062, + "step": 2304 + }, + { + "epoch": 1.07, + "learning_rate": 3.3399808551372053e-06, + "loss": 0.319, + "step": 2308 + }, + { + "epoch": 1.07, + "learning_rate": 3.3367900446713467e-06, + "loss": 0.3106, + "step": 2312 + }, + { + "epoch": 1.07, + "learning_rate": 3.3335992342054886e-06, + "loss": 0.6715, + "step": 2316 + }, + { + "epoch": 1.08, + "learning_rate": 3.33040842373963e-06, + "loss": 0.4007, + "step": 2320 + }, + { + "epoch": 1.08, + "learning_rate": 3.3272176132737716e-06, + "loss": 0.5854, + "step": 2324 + }, + { + "epoch": 1.08, + "learning_rate": 3.324026802807914e-06, + "loss": 0.4384, + "step": 2328 + }, + { + "epoch": 1.08, + "learning_rate": 3.3208359923420554e-06, + "loss": 0.5186, + "step": 2332 + }, + { + "epoch": 1.08, + "learning_rate": 3.317645181876197e-06, + "loss": 0.2793, + "step": 2336 + }, + { + "epoch": 1.09, + "learning_rate": 3.3144543714103383e-06, + "loss": 0.3945, + "step": 2340 + }, + { + "epoch": 1.09, + "learning_rate": 3.3112635609444806e-06, + "loss": 0.4488, + "step": 2344 + }, + { + "epoch": 1.09, + "learning_rate": 3.308072750478622e-06, + "loss": 0.2692, + "step": 2348 + }, + { + "epoch": 1.09, + "learning_rate": 3.3048819400127635e-06, + "loss": 0.4689, + "step": 2352 + }, + { + "epoch": 1.09, + "learning_rate": 3.301691129546905e-06, + "loss": 0.2162, + "step": 2356 + }, + { + "epoch": 1.09, + "learning_rate": 3.2985003190810465e-06, + "loss": 0.3339, + "step": 2360 + }, + { + "epoch": 1.1, + "learning_rate": 3.295309508615189e-06, + "loss": 0.5855, + "step": 2364 + }, + { + "epoch": 1.1, + "learning_rate": 3.2921186981493303e-06, + "loss": 0.4823, + "step": 2368 + }, + { + "epoch": 1.1, + "learning_rate": 3.2889278876834717e-06, + "loss": 0.3587, + "step": 2372 + }, + { + "epoch": 1.1, + "learning_rate": 3.2857370772176132e-06, + "loss": 0.3903, + "step": 2376 + }, + { + "epoch": 1.1, + "learning_rate": 3.2825462667517555e-06, + "loss": 0.5669, + "step": 2380 + }, + { + "epoch": 1.11, + "learning_rate": 3.279355456285897e-06, + "loss": 0.413, + "step": 2384 + }, + { + "epoch": 1.11, + "learning_rate": 3.2761646458200385e-06, + "loss": 0.3735, + "step": 2388 + }, + { + "epoch": 1.11, + "learning_rate": 3.27297383535418e-06, + "loss": 0.5467, + "step": 2392 + }, + { + "epoch": 1.11, + "learning_rate": 3.269783024888322e-06, + "loss": 0.3738, + "step": 2396 + }, + { + "epoch": 1.11, + "learning_rate": 3.2665922144224637e-06, + "loss": 0.4619, + "step": 2400 + }, + { + "epoch": 1.12, + "learning_rate": 3.263401403956605e-06, + "loss": 0.3739, + "step": 2404 + }, + { + "epoch": 1.12, + "learning_rate": 3.2602105934907467e-06, + "loss": 0.4065, + "step": 2408 + }, + { + "epoch": 1.12, + "learning_rate": 3.2570197830248886e-06, + "loss": 0.3406, + "step": 2412 + }, + { + "epoch": 1.12, + "learning_rate": 3.2538289725590305e-06, + "loss": 0.4554, + "step": 2416 + }, + { + "epoch": 1.12, + "learning_rate": 3.250638162093172e-06, + "loss": 0.799, + "step": 2420 + }, + { + "epoch": 1.12, + "learning_rate": 3.247447351627314e-06, + "loss": 0.4552, + "step": 2424 + }, + { + "epoch": 1.13, + "learning_rate": 3.2442565411614553e-06, + "loss": 0.3708, + "step": 2428 + }, + { + "epoch": 1.13, + "learning_rate": 3.2410657306955968e-06, + "loss": 0.3309, + "step": 2432 + }, + { + "epoch": 1.13, + "learning_rate": 3.2378749202297387e-06, + "loss": 0.4065, + "step": 2436 + }, + { + "epoch": 1.13, + "learning_rate": 3.2346841097638806e-06, + "loss": 0.627, + "step": 2440 + }, + { + "epoch": 1.13, + "learning_rate": 3.231493299298022e-06, + "loss": 0.3551, + "step": 2444 + }, + { + "epoch": 1.14, + "learning_rate": 3.2283024888321635e-06, + "loss": 0.2465, + "step": 2448 + }, + { + "epoch": 1.14, + "learning_rate": 3.2251116783663054e-06, + "loss": 0.5104, + "step": 2452 + }, + { + "epoch": 1.14, + "learning_rate": 3.2219208679004473e-06, + "loss": 0.5923, + "step": 2456 + }, + { + "epoch": 1.14, + "learning_rate": 3.2187300574345887e-06, + "loss": 0.3455, + "step": 2460 + }, + { + "epoch": 1.14, + "learning_rate": 3.2155392469687302e-06, + "loss": 0.3767, + "step": 2464 + }, + { + "epoch": 1.14, + "learning_rate": 3.2123484365028717e-06, + "loss": 0.4728, + "step": 2468 + }, + { + "epoch": 1.15, + "learning_rate": 3.209157626037014e-06, + "loss": 0.4427, + "step": 2472 + }, + { + "epoch": 1.15, + "learning_rate": 3.2059668155711555e-06, + "loss": 0.3805, + "step": 2476 + }, + { + "epoch": 1.15, + "learning_rate": 3.202776005105297e-06, + "loss": 0.2417, + "step": 2480 + }, + { + "epoch": 1.15, + "learning_rate": 3.1995851946394384e-06, + "loss": 0.4459, + "step": 2484 + }, + { + "epoch": 1.15, + "learning_rate": 3.1963943841735807e-06, + "loss": 0.5951, + "step": 2488 + }, + { + "epoch": 1.16, + "learning_rate": 3.193203573707722e-06, + "loss": 0.4512, + "step": 2492 + }, + { + "epoch": 1.16, + "learning_rate": 3.1900127632418637e-06, + "loss": 0.4038, + "step": 2496 + }, + { + "epoch": 1.16, + "learning_rate": 3.186821952776005e-06, + "loss": 0.5716, + "step": 2500 + }, + { + "epoch": 1.16, + "learning_rate": 3.1836311423101466e-06, + "loss": 0.3122, + "step": 2504 + }, + { + "epoch": 1.16, + "learning_rate": 3.180440331844289e-06, + "loss": 0.6523, + "step": 2508 + }, + { + "epoch": 1.17, + "learning_rate": 3.1772495213784304e-06, + "loss": 0.4129, + "step": 2512 + }, + { + "epoch": 1.17, + "learning_rate": 3.174058710912572e-06, + "loss": 0.7674, + "step": 2516 + }, + { + "epoch": 1.17, + "learning_rate": 3.1708679004467138e-06, + "loss": 0.5227, + "step": 2520 + }, + { + "epoch": 1.17, + "learning_rate": 3.1676770899808557e-06, + "loss": 0.457, + "step": 2524 + }, + { + "epoch": 1.17, + "learning_rate": 3.164486279514997e-06, + "loss": 0.3279, + "step": 2528 + }, + { + "epoch": 1.17, + "learning_rate": 3.1612954690491386e-06, + "loss": 0.4809, + "step": 2532 + }, + { + "epoch": 1.18, + "learning_rate": 3.1581046585832805e-06, + "loss": 0.3513, + "step": 2536 + }, + { + "epoch": 1.18, + "learning_rate": 3.154913848117422e-06, + "loss": 0.5097, + "step": 2540 + }, + { + "epoch": 1.18, + "learning_rate": 3.151723037651564e-06, + "loss": 0.4727, + "step": 2544 + }, + { + "epoch": 1.18, + "learning_rate": 3.1485322271857053e-06, + "loss": 0.3848, + "step": 2548 + }, + { + "epoch": 1.18, + "learning_rate": 3.1453414167198472e-06, + "loss": 0.49, + "step": 2552 + }, + { + "epoch": 1.19, + "learning_rate": 3.1421506062539887e-06, + "loss": 0.4166, + "step": 2556 + }, + { + "epoch": 1.19, + "learning_rate": 3.1389597957881306e-06, + "loss": 0.4397, + "step": 2560 + }, + { + "epoch": 1.19, + "learning_rate": 3.135768985322272e-06, + "loss": 0.4295, + "step": 2564 + }, + { + "epoch": 1.19, + "learning_rate": 3.132578174856414e-06, + "loss": 0.3807, + "step": 2568 + }, + { + "epoch": 1.19, + "learning_rate": 3.1293873643905554e-06, + "loss": 0.5155, + "step": 2572 + }, + { + "epoch": 1.19, + "learning_rate": 3.126196553924697e-06, + "loss": 0.4183, + "step": 2576 + }, + { + "epoch": 1.2, + "learning_rate": 3.123005743458839e-06, + "loss": 0.4173, + "step": 2580 + }, + { + "epoch": 1.2, + "learning_rate": 3.1198149329929807e-06, + "loss": 0.5842, + "step": 2584 + }, + { + "epoch": 1.2, + "learning_rate": 3.116624122527122e-06, + "loss": 0.6673, + "step": 2588 + }, + { + "epoch": 1.2, + "learning_rate": 3.1134333120612636e-06, + "loss": 0.4492, + "step": 2592 + }, + { + "epoch": 1.2, + "learning_rate": 3.110242501595406e-06, + "loss": 0.4747, + "step": 2596 + }, + { + "epoch": 1.21, + "learning_rate": 3.1070516911295474e-06, + "loss": 0.5746, + "step": 2600 + }, + { + "epoch": 1.21, + "learning_rate": 3.103860880663689e-06, + "loss": 0.5708, + "step": 2604 + }, + { + "epoch": 1.21, + "learning_rate": 3.1006700701978303e-06, + "loss": 0.601, + "step": 2608 + }, + { + "epoch": 1.21, + "learning_rate": 3.097479259731972e-06, + "loss": 0.7205, + "step": 2612 + }, + { + "epoch": 1.21, + "learning_rate": 3.094288449266114e-06, + "loss": 0.4023, + "step": 2616 + }, + { + "epoch": 1.22, + "learning_rate": 3.0910976388002556e-06, + "loss": 0.5012, + "step": 2620 + }, + { + "epoch": 1.22, + "learning_rate": 3.087906828334397e-06, + "loss": 0.4779, + "step": 2624 + }, + { + "epoch": 1.22, + "learning_rate": 3.0847160178685385e-06, + "loss": 0.5062, + "step": 2628 + }, + { + "epoch": 1.22, + "learning_rate": 3.081525207402681e-06, + "loss": 0.2888, + "step": 2632 + }, + { + "epoch": 1.22, + "learning_rate": 3.0783343969368223e-06, + "loss": 0.4121, + "step": 2636 + }, + { + "epoch": 1.22, + "learning_rate": 3.075143586470964e-06, + "loss": 0.4721, + "step": 2640 + }, + { + "epoch": 1.23, + "learning_rate": 3.0719527760051053e-06, + "loss": 0.3422, + "step": 2644 + }, + { + "epoch": 1.23, + "learning_rate": 3.068761965539247e-06, + "loss": 0.5018, + "step": 2648 + }, + { + "epoch": 1.23, + "learning_rate": 3.065571155073389e-06, + "loss": 0.5165, + "step": 2652 + }, + { + "epoch": 1.23, + "learning_rate": 3.0623803446075305e-06, + "loss": 0.3574, + "step": 2656 + }, + { + "epoch": 1.23, + "learning_rate": 3.059189534141672e-06, + "loss": 0.579, + "step": 2660 + }, + { + "epoch": 1.24, + "learning_rate": 3.055998723675814e-06, + "loss": 0.4961, + "step": 2664 + }, + { + "epoch": 1.24, + "learning_rate": 3.0528079132099558e-06, + "loss": 0.3949, + "step": 2668 + }, + { + "epoch": 1.24, + "learning_rate": 3.0496171027440973e-06, + "loss": 0.3999, + "step": 2672 + }, + { + "epoch": 1.24, + "learning_rate": 3.046426292278239e-06, + "loss": 0.524, + "step": 2676 + }, + { + "epoch": 1.24, + "learning_rate": 3.0432354818123806e-06, + "loss": 0.3688, + "step": 2680 + }, + { + "epoch": 1.24, + "learning_rate": 3.040044671346522e-06, + "loss": 0.403, + "step": 2684 + }, + { + "epoch": 1.25, + "learning_rate": 3.036853860880664e-06, + "loss": 0.3388, + "step": 2688 + }, + { + "epoch": 1.25, + "learning_rate": 3.033663050414806e-06, + "loss": 0.4706, + "step": 2692 + }, + { + "epoch": 1.25, + "learning_rate": 3.0304722399489473e-06, + "loss": 0.6817, + "step": 2696 + }, + { + "epoch": 1.25, + "learning_rate": 3.027281429483089e-06, + "loss": 0.3896, + "step": 2700 + }, + { + "epoch": 1.25, + "learning_rate": 3.0240906190172307e-06, + "loss": 0.358, + "step": 2704 + }, + { + "epoch": 1.26, + "learning_rate": 3.0208998085513726e-06, + "loss": 0.3115, + "step": 2708 + }, + { + "epoch": 1.26, + "learning_rate": 3.017708998085514e-06, + "loss": 0.5322, + "step": 2712 + }, + { + "epoch": 1.26, + "learning_rate": 3.0145181876196555e-06, + "loss": 0.4613, + "step": 2716 + }, + { + "epoch": 1.26, + "learning_rate": 3.011327377153797e-06, + "loss": 0.4374, + "step": 2720 + }, + { + "epoch": 1.26, + "learning_rate": 3.0081365666879393e-06, + "loss": 0.4775, + "step": 2724 + }, + { + "epoch": 1.27, + "learning_rate": 3.004945756222081e-06, + "loss": 0.349, + "step": 2728 + }, + { + "epoch": 1.27, + "learning_rate": 3.0017549457562223e-06, + "loss": 0.5114, + "step": 2732 + }, + { + "epoch": 1.27, + "learning_rate": 2.9985641352903637e-06, + "loss": 0.3901, + "step": 2736 + }, + { + "epoch": 1.27, + "learning_rate": 2.995373324824506e-06, + "loss": 0.4756, + "step": 2740 + }, + { + "epoch": 1.27, + "learning_rate": 2.9921825143586475e-06, + "loss": 0.4669, + "step": 2744 + }, + { + "epoch": 1.27, + "learning_rate": 2.988991703892789e-06, + "loss": 0.5554, + "step": 2748 + }, + { + "epoch": 1.28, + "learning_rate": 2.9858008934269305e-06, + "loss": 0.3345, + "step": 2752 + }, + { + "epoch": 1.28, + "learning_rate": 2.982610082961072e-06, + "loss": 0.3653, + "step": 2756 + }, + { + "epoch": 1.28, + "learning_rate": 2.9794192724952143e-06, + "loss": 0.4543, + "step": 2760 + }, + { + "epoch": 1.28, + "learning_rate": 2.9762284620293557e-06, + "loss": 0.382, + "step": 2764 + }, + { + "epoch": 1.28, + "learning_rate": 2.973037651563497e-06, + "loss": 0.2821, + "step": 2768 + }, + { + "epoch": 1.29, + "learning_rate": 2.969846841097639e-06, + "loss": 0.4392, + "step": 2772 + }, + { + "epoch": 1.29, + "learning_rate": 2.966656030631781e-06, + "loss": 0.3785, + "step": 2776 + }, + { + "epoch": 1.29, + "learning_rate": 2.9634652201659224e-06, + "loss": 0.4799, + "step": 2780 + }, + { + "epoch": 1.29, + "learning_rate": 2.960274409700064e-06, + "loss": 0.4004, + "step": 2784 + }, + { + "epoch": 1.29, + "learning_rate": 2.957083599234206e-06, + "loss": 0.4598, + "step": 2788 + }, + { + "epoch": 1.29, + "learning_rate": 2.9538927887683473e-06, + "loss": 0.6889, + "step": 2792 + }, + { + "epoch": 1.3, + "learning_rate": 2.950701978302489e-06, + "loss": 0.3401, + "step": 2796 + }, + { + "epoch": 1.3, + "learning_rate": 2.9475111678366306e-06, + "loss": 0.5162, + "step": 2800 + }, + { + "epoch": 1.3, + "learning_rate": 2.9443203573707725e-06, + "loss": 0.3811, + "step": 2804 + }, + { + "epoch": 1.3, + "learning_rate": 2.941129546904914e-06, + "loss": 0.3048, + "step": 2808 + }, + { + "epoch": 1.3, + "learning_rate": 2.937938736439056e-06, + "loss": 0.5528, + "step": 2812 + }, + { + "epoch": 1.31, + "learning_rate": 2.9347479259731974e-06, + "loss": 0.3721, + "step": 2816 + }, + { + "epoch": 1.31, + "learning_rate": 2.9315571155073393e-06, + "loss": 0.4877, + "step": 2820 + }, + { + "epoch": 1.31, + "learning_rate": 2.9283663050414807e-06, + "loss": 0.3101, + "step": 2824 + }, + { + "epoch": 1.31, + "learning_rate": 2.925175494575622e-06, + "loss": 0.3458, + "step": 2828 + }, + { + "epoch": 1.31, + "learning_rate": 2.9219846841097645e-06, + "loss": 0.3741, + "step": 2832 + }, + { + "epoch": 1.32, + "learning_rate": 2.918793873643906e-06, + "loss": 0.7428, + "step": 2836 + }, + { + "epoch": 1.32, + "learning_rate": 2.9156030631780475e-06, + "loss": 0.3487, + "step": 2840 + }, + { + "epoch": 1.32, + "learning_rate": 2.912412252712189e-06, + "loss": 0.3184, + "step": 2844 + }, + { + "epoch": 1.32, + "learning_rate": 2.9092214422463313e-06, + "loss": 0.3778, + "step": 2848 + }, + { + "epoch": 1.32, + "learning_rate": 2.9060306317804727e-06, + "loss": 0.4085, + "step": 2852 + }, + { + "epoch": 1.32, + "learning_rate": 2.902839821314614e-06, + "loss": 0.4859, + "step": 2856 + }, + { + "epoch": 1.33, + "learning_rate": 2.8996490108487557e-06, + "loss": 0.5783, + "step": 2860 + }, + { + "epoch": 1.33, + "learning_rate": 2.896458200382897e-06, + "loss": 0.2396, + "step": 2864 + }, + { + "epoch": 1.33, + "learning_rate": 2.8932673899170395e-06, + "loss": 0.6563, + "step": 2868 + }, + { + "epoch": 1.33, + "learning_rate": 2.890076579451181e-06, + "loss": 0.3048, + "step": 2872 + }, + { + "epoch": 1.33, + "learning_rate": 2.8868857689853224e-06, + "loss": 0.3935, + "step": 2876 + }, + { + "epoch": 1.34, + "learning_rate": 2.883694958519464e-06, + "loss": 0.5063, + "step": 2880 + }, + { + "epoch": 1.34, + "learning_rate": 2.880504148053606e-06, + "loss": 0.5056, + "step": 2884 + }, + { + "epoch": 1.34, + "learning_rate": 2.8773133375877476e-06, + "loss": 0.2203, + "step": 2888 + }, + { + "epoch": 1.34, + "learning_rate": 2.874122527121889e-06, + "loss": 0.3044, + "step": 2892 + }, + { + "epoch": 1.34, + "learning_rate": 2.8709317166560306e-06, + "loss": 0.4444, + "step": 2896 + }, + { + "epoch": 1.35, + "learning_rate": 2.8677409061901725e-06, + "loss": 0.3645, + "step": 2900 + }, + { + "epoch": 1.35, + "learning_rate": 2.8645500957243144e-06, + "loss": 0.4594, + "step": 2904 + }, + { + "epoch": 1.35, + "learning_rate": 2.861359285258456e-06, + "loss": 0.4897, + "step": 2908 + }, + { + "epoch": 1.35, + "learning_rate": 2.8581684747925973e-06, + "loss": 0.4772, + "step": 2912 + }, + { + "epoch": 1.35, + "learning_rate": 2.854977664326739e-06, + "loss": 0.388, + "step": 2916 + }, + { + "epoch": 1.35, + "learning_rate": 2.851786853860881e-06, + "loss": 0.3869, + "step": 2920 + }, + { + "epoch": 1.36, + "learning_rate": 2.8485960433950226e-06, + "loss": 0.4853, + "step": 2924 + }, + { + "epoch": 1.36, + "learning_rate": 2.8454052329291645e-06, + "loss": 0.4467, + "step": 2928 + }, + { + "epoch": 1.36, + "learning_rate": 2.842214422463306e-06, + "loss": 0.2356, + "step": 2932 + }, + { + "epoch": 1.36, + "learning_rate": 2.8390236119974474e-06, + "loss": 0.4614, + "step": 2936 + }, + { + "epoch": 1.36, + "learning_rate": 2.8358328015315893e-06, + "loss": 0.3212, + "step": 2940 + }, + { + "epoch": 1.37, + "learning_rate": 2.832641991065731e-06, + "loss": 0.5037, + "step": 2944 + }, + { + "epoch": 1.37, + "learning_rate": 2.8294511805998727e-06, + "loss": 0.4957, + "step": 2948 + }, + { + "epoch": 1.37, + "learning_rate": 2.826260370134014e-06, + "loss": 0.418, + "step": 2952 + }, + { + "epoch": 1.37, + "learning_rate": 2.823069559668156e-06, + "loss": 0.2996, + "step": 2956 + }, + { + "epoch": 1.37, + "learning_rate": 2.819878749202298e-06, + "loss": 0.5421, + "step": 2960 + }, + { + "epoch": 1.37, + "learning_rate": 2.8166879387364394e-06, + "loss": 0.5049, + "step": 2964 + }, + { + "epoch": 1.38, + "learning_rate": 2.813497128270581e-06, + "loss": 0.3929, + "step": 2968 + }, + { + "epoch": 1.38, + "learning_rate": 2.8103063178047223e-06, + "loss": 0.4045, + "step": 2972 + }, + { + "epoch": 1.38, + "learning_rate": 2.8071155073388646e-06, + "loss": 0.3494, + "step": 2976 + }, + { + "epoch": 1.38, + "learning_rate": 2.803924696873006e-06, + "loss": 0.3782, + "step": 2980 + }, + { + "epoch": 1.38, + "learning_rate": 2.8007338864071476e-06, + "loss": 0.2768, + "step": 2984 + }, + { + "epoch": 1.39, + "learning_rate": 2.797543075941289e-06, + "loss": 0.531, + "step": 2988 + }, + { + "epoch": 1.39, + "learning_rate": 2.7943522654754314e-06, + "loss": 0.4958, + "step": 2992 + }, + { + "epoch": 1.39, + "learning_rate": 2.791161455009573e-06, + "loss": 0.6183, + "step": 2996 + }, + { + "epoch": 1.39, + "learning_rate": 2.7879706445437143e-06, + "loss": 0.3521, + "step": 3000 + }, + { + "epoch": 1.39, + "learning_rate": 2.7847798340778558e-06, + "loss": 0.4406, + "step": 3004 + }, + { + "epoch": 1.4, + "learning_rate": 2.7815890236119973e-06, + "loss": 0.4131, + "step": 3008 + }, + { + "epoch": 1.4, + "learning_rate": 2.7783982131461396e-06, + "loss": 0.5107, + "step": 3012 + }, + { + "epoch": 1.4, + "learning_rate": 2.775207402680281e-06, + "loss": 0.2735, + "step": 3016 + }, + { + "epoch": 1.4, + "learning_rate": 2.7720165922144225e-06, + "loss": 0.3788, + "step": 3020 + }, + { + "epoch": 1.4, + "learning_rate": 2.7696234843650286e-06, + "loss": 0.5599, + "step": 3024 + }, + { + "epoch": 1.4, + "learning_rate": 2.7664326738991705e-06, + "loss": 0.2355, + "step": 3028 + }, + { + "epoch": 1.41, + "learning_rate": 2.7632418634333124e-06, + "loss": 0.5358, + "step": 3032 + }, + { + "epoch": 1.41, + "learning_rate": 2.760051052967454e-06, + "loss": 0.3283, + "step": 3036 + }, + { + "epoch": 1.41, + "learning_rate": 2.7568602425015958e-06, + "loss": 0.4093, + "step": 3040 + }, + { + "epoch": 1.41, + "learning_rate": 2.7536694320357372e-06, + "loss": 0.287, + "step": 3044 + }, + { + "epoch": 1.41, + "learning_rate": 2.750478621569879e-06, + "loss": 0.5271, + "step": 3048 + }, + { + "epoch": 1.42, + "learning_rate": 2.7472878111040206e-06, + "loss": 0.3372, + "step": 3052 + }, + { + "epoch": 1.42, + "learning_rate": 2.7440970006381625e-06, + "loss": 0.5649, + "step": 3056 + }, + { + "epoch": 1.42, + "learning_rate": 2.740906190172304e-06, + "loss": 0.5017, + "step": 3060 + }, + { + "epoch": 1.42, + "learning_rate": 2.7377153797064454e-06, + "loss": 0.6057, + "step": 3064 + }, + { + "epoch": 1.42, + "learning_rate": 2.7345245692405873e-06, + "loss": 0.4184, + "step": 3068 + }, + { + "epoch": 1.42, + "learning_rate": 2.7313337587747292e-06, + "loss": 0.2892, + "step": 3072 + }, + { + "epoch": 1.43, + "learning_rate": 2.7281429483088707e-06, + "loss": 0.5914, + "step": 3076 + }, + { + "epoch": 1.43, + "learning_rate": 2.724952137843012e-06, + "loss": 0.472, + "step": 3080 + }, + { + "epoch": 1.43, + "learning_rate": 2.721761327377154e-06, + "loss": 0.3773, + "step": 3084 + }, + { + "epoch": 1.43, + "learning_rate": 2.718570516911296e-06, + "loss": 0.2942, + "step": 3088 + }, + { + "epoch": 1.43, + "learning_rate": 2.7153797064454374e-06, + "loss": 0.3445, + "step": 3092 + }, + { + "epoch": 1.44, + "learning_rate": 2.712188895979579e-06, + "loss": 0.2773, + "step": 3096 + }, + { + "epoch": 1.44, + "learning_rate": 2.7089980855137204e-06, + "loss": 0.4007, + "step": 3100 + }, + { + "epoch": 1.44, + "learning_rate": 2.7058072750478627e-06, + "loss": 0.3083, + "step": 3104 + }, + { + "epoch": 1.44, + "learning_rate": 2.702616464582004e-06, + "loss": 0.4782, + "step": 3108 + }, + { + "epoch": 1.44, + "learning_rate": 2.6994256541161456e-06, + "loss": 0.5419, + "step": 3112 + }, + { + "epoch": 1.45, + "learning_rate": 2.696234843650287e-06, + "loss": 0.5713, + "step": 3116 + }, + { + "epoch": 1.45, + "learning_rate": 2.6930440331844294e-06, + "loss": 0.3722, + "step": 3120 + }, + { + "epoch": 1.45, + "learning_rate": 2.689853222718571e-06, + "loss": 0.4663, + "step": 3124 + }, + { + "epoch": 1.45, + "learning_rate": 2.6866624122527123e-06, + "loss": 0.3208, + "step": 3128 + }, + { + "epoch": 1.45, + "learning_rate": 2.683471601786854e-06, + "loss": 0.351, + "step": 3132 + }, + { + "epoch": 1.45, + "learning_rate": 2.6802807913209957e-06, + "loss": 0.513, + "step": 3136 + }, + { + "epoch": 1.46, + "learning_rate": 2.6770899808551376e-06, + "loss": 0.4409, + "step": 3140 + }, + { + "epoch": 1.46, + "learning_rate": 2.673899170389279e-06, + "loss": 0.3335, + "step": 3144 + }, + { + "epoch": 1.46, + "learning_rate": 2.6707083599234205e-06, + "loss": 0.3706, + "step": 3148 + }, + { + "epoch": 1.46, + "learning_rate": 2.6675175494575624e-06, + "loss": 0.4404, + "step": 3152 + }, + { + "epoch": 1.46, + "learning_rate": 2.6643267389917043e-06, + "loss": 0.4186, + "step": 3156 + }, + { + "epoch": 1.47, + "learning_rate": 2.661135928525846e-06, + "loss": 0.3666, + "step": 3160 + }, + { + "epoch": 1.47, + "learning_rate": 2.6579451180599873e-06, + "loss": 0.3849, + "step": 3164 + }, + { + "epoch": 1.47, + "learning_rate": 2.654754307594129e-06, + "loss": 0.4564, + "step": 3168 + }, + { + "epoch": 1.47, + "learning_rate": 2.6515634971282706e-06, + "loss": 0.3534, + "step": 3172 + }, + { + "epoch": 1.47, + "learning_rate": 2.6483726866624125e-06, + "loss": 0.3735, + "step": 3176 + }, + { + "epoch": 1.47, + "learning_rate": 2.645181876196554e-06, + "loss": 0.4449, + "step": 3180 + }, + { + "epoch": 1.48, + "learning_rate": 2.641991065730696e-06, + "loss": 0.5032, + "step": 3184 + }, + { + "epoch": 1.48, + "learning_rate": 2.6388002552648374e-06, + "loss": 0.3677, + "step": 3188 + }, + { + "epoch": 1.48, + "learning_rate": 2.6356094447989793e-06, + "loss": 0.5004, + "step": 3192 + }, + { + "epoch": 1.48, + "learning_rate": 2.632418634333121e-06, + "loss": 0.1972, + "step": 3196 + }, + { + "epoch": 1.48, + "learning_rate": 2.6292278238672626e-06, + "loss": 0.4606, + "step": 3200 + }, + { + "epoch": 1.49, + "learning_rate": 2.626037013401404e-06, + "loss": 0.3533, + "step": 3204 + }, + { + "epoch": 1.49, + "learning_rate": 2.6228462029355456e-06, + "loss": 0.3607, + "step": 3208 + }, + { + "epoch": 1.49, + "learning_rate": 2.619655392469688e-06, + "loss": 0.5767, + "step": 3212 + }, + { + "epoch": 1.49, + "learning_rate": 2.6164645820038293e-06, + "loss": 0.5316, + "step": 3216 + }, + { + "epoch": 1.49, + "learning_rate": 2.613273771537971e-06, + "loss": 0.2474, + "step": 3220 + }, + { + "epoch": 1.5, + "learning_rate": 2.6100829610721123e-06, + "loss": 0.3168, + "step": 3224 + }, + { + "epoch": 1.5, + "learning_rate": 2.6068921506062546e-06, + "loss": 0.4029, + "step": 3228 + }, + { + "epoch": 1.5, + "learning_rate": 2.603701340140396e-06, + "loss": 0.2693, + "step": 3232 + }, + { + "epoch": 1.5, + "learning_rate": 2.6005105296745375e-06, + "loss": 0.3756, + "step": 3236 + }, + { + "epoch": 1.5, + "learning_rate": 2.597319719208679e-06, + "loss": 0.3712, + "step": 3240 + }, + { + "epoch": 1.5, + "learning_rate": 2.5941289087428205e-06, + "loss": 0.366, + "step": 3244 + }, + { + "epoch": 1.51, + "learning_rate": 2.590938098276963e-06, + "loss": 0.3813, + "step": 3248 + }, + { + "epoch": 1.51, + "learning_rate": 2.5877472878111043e-06, + "loss": 0.4442, + "step": 3252 + }, + { + "epoch": 1.51, + "learning_rate": 2.5845564773452457e-06, + "loss": 0.4061, + "step": 3256 + }, + { + "epoch": 1.51, + "learning_rate": 2.581365666879387e-06, + "loss": 0.3679, + "step": 3260 + }, + { + "epoch": 1.51, + "learning_rate": 2.5781748564135295e-06, + "loss": 0.2641, + "step": 3264 + }, + { + "epoch": 1.52, + "learning_rate": 2.574984045947671e-06, + "loss": 0.5656, + "step": 3268 + }, + { + "epoch": 1.52, + "learning_rate": 2.5717932354818125e-06, + "loss": 0.3672, + "step": 3272 + }, + { + "epoch": 1.52, + "learning_rate": 2.568602425015954e-06, + "loss": 0.3395, + "step": 3276 + }, + { + "epoch": 1.52, + "learning_rate": 2.565411614550096e-06, + "loss": 0.5946, + "step": 3280 + }, + { + "epoch": 1.52, + "learning_rate": 2.5622208040842377e-06, + "loss": 0.3526, + "step": 3284 + }, + { + "epoch": 1.53, + "learning_rate": 2.559029993618379e-06, + "loss": 0.3365, + "step": 3288 + }, + { + "epoch": 1.53, + "learning_rate": 2.555839183152521e-06, + "loss": 0.4003, + "step": 3292 + }, + { + "epoch": 1.53, + "learning_rate": 2.5526483726866626e-06, + "loss": 0.3994, + "step": 3296 + }, + { + "epoch": 1.53, + "learning_rate": 2.5494575622208045e-06, + "loss": 0.3623, + "step": 3300 + }, + { + "epoch": 1.53, + "learning_rate": 2.546266751754946e-06, + "loss": 0.5994, + "step": 3304 + }, + { + "epoch": 1.53, + "learning_rate": 2.543075941289088e-06, + "loss": 0.3717, + "step": 3308 + }, + { + "epoch": 1.54, + "learning_rate": 2.5398851308232293e-06, + "loss": 0.2424, + "step": 3312 + }, + { + "epoch": 1.54, + "learning_rate": 2.5366943203573708e-06, + "loss": 0.5083, + "step": 3316 + }, + { + "epoch": 1.54, + "learning_rate": 2.5335035098915127e-06, + "loss": 0.2865, + "step": 3320 + }, + { + "epoch": 1.54, + "learning_rate": 2.5303126994256545e-06, + "loss": 0.2184, + "step": 3324 + }, + { + "epoch": 1.54, + "learning_rate": 2.527121888959796e-06, + "loss": 0.5697, + "step": 3328 + }, + { + "epoch": 1.55, + "learning_rate": 2.5239310784939375e-06, + "loss": 0.3524, + "step": 3332 + }, + { + "epoch": 1.55, + "learning_rate": 2.5207402680280794e-06, + "loss": 0.3922, + "step": 3336 + }, + { + "epoch": 1.55, + "learning_rate": 2.5175494575622213e-06, + "loss": 0.3364, + "step": 3340 + }, + { + "epoch": 1.55, + "learning_rate": 2.5143586470963627e-06, + "loss": 0.3983, + "step": 3344 + }, + { + "epoch": 1.55, + "learning_rate": 2.5111678366305042e-06, + "loss": 0.3812, + "step": 3348 + }, + { + "epoch": 1.55, + "learning_rate": 2.5079770261646457e-06, + "loss": 0.3001, + "step": 3352 + }, + { + "epoch": 1.56, + "learning_rate": 2.504786215698788e-06, + "loss": 0.3159, + "step": 3356 + }, + { + "epoch": 1.56, + "learning_rate": 2.5015954052329295e-06, + "loss": 0.316, + "step": 3360 + }, + { + "epoch": 1.56, + "learning_rate": 2.498404594767071e-06, + "loss": 0.2347, + "step": 3364 + }, + { + "epoch": 1.56, + "learning_rate": 2.495213784301213e-06, + "loss": 0.4728, + "step": 3368 + }, + { + "epoch": 1.56, + "learning_rate": 2.4920229738353543e-06, + "loss": 0.4451, + "step": 3372 + }, + { + "epoch": 1.57, + "learning_rate": 2.488832163369496e-06, + "loss": 0.6876, + "step": 3376 + }, + { + "epoch": 1.57, + "learning_rate": 2.4856413529036377e-06, + "loss": 0.4799, + "step": 3380 + }, + { + "epoch": 1.57, + "learning_rate": 2.482450542437779e-06, + "loss": 0.3912, + "step": 3384 + }, + { + "epoch": 1.57, + "learning_rate": 2.479259731971921e-06, + "loss": 0.2295, + "step": 3388 + }, + { + "epoch": 1.57, + "learning_rate": 2.4760689215060625e-06, + "loss": 0.2529, + "step": 3392 + }, + { + "epoch": 1.58, + "learning_rate": 2.4728781110402044e-06, + "loss": 0.454, + "step": 3396 + }, + { + "epoch": 1.58, + "learning_rate": 2.469687300574346e-06, + "loss": 0.3894, + "step": 3400 + }, + { + "epoch": 1.58, + "learning_rate": 2.4664964901084878e-06, + "loss": 0.2908, + "step": 3404 + }, + { + "epoch": 1.58, + "learning_rate": 2.4633056796426292e-06, + "loss": 0.499, + "step": 3408 + }, + { + "epoch": 1.58, + "learning_rate": 2.460114869176771e-06, + "loss": 0.3336, + "step": 3412 + }, + { + "epoch": 1.58, + "learning_rate": 2.4569240587109126e-06, + "loss": 0.3016, + "step": 3416 + }, + { + "epoch": 1.59, + "learning_rate": 2.4537332482450545e-06, + "loss": 0.4519, + "step": 3420 + }, + { + "epoch": 1.59, + "learning_rate": 2.450542437779196e-06, + "loss": 0.2589, + "step": 3424 + }, + { + "epoch": 1.59, + "learning_rate": 2.447351627313338e-06, + "loss": 0.404, + "step": 3428 + }, + { + "epoch": 1.59, + "learning_rate": 2.4441608168474793e-06, + "loss": 0.335, + "step": 3432 + }, + { + "epoch": 1.59, + "learning_rate": 2.4409700063816212e-06, + "loss": 0.4312, + "step": 3436 + }, + { + "epoch": 1.6, + "learning_rate": 2.437779195915763e-06, + "loss": 0.2877, + "step": 3440 + }, + { + "epoch": 1.6, + "learning_rate": 2.4345883854499046e-06, + "loss": 0.3591, + "step": 3444 + }, + { + "epoch": 1.6, + "learning_rate": 2.4313975749840465e-06, + "loss": 0.3149, + "step": 3448 + }, + { + "epoch": 1.6, + "learning_rate": 2.428206764518188e-06, + "loss": 0.3785, + "step": 3452 + }, + { + "epoch": 1.6, + "learning_rate": 2.4250159540523294e-06, + "loss": 0.3654, + "step": 3456 + }, + { + "epoch": 1.6, + "learning_rate": 2.4218251435864713e-06, + "loss": 0.2894, + "step": 3460 + }, + { + "epoch": 1.61, + "learning_rate": 2.4186343331206128e-06, + "loss": 0.5198, + "step": 3464 + }, + { + "epoch": 1.61, + "learning_rate": 2.4154435226547547e-06, + "loss": 0.4666, + "step": 3468 + }, + { + "epoch": 1.61, + "learning_rate": 2.412252712188896e-06, + "loss": 0.3899, + "step": 3472 + }, + { + "epoch": 1.61, + "learning_rate": 2.409061901723038e-06, + "loss": 0.4248, + "step": 3476 + }, + { + "epoch": 1.61, + "learning_rate": 2.4058710912571795e-06, + "loss": 0.3144, + "step": 3480 + }, + { + "epoch": 1.62, + "learning_rate": 2.4026802807913214e-06, + "loss": 0.3294, + "step": 3484 + }, + { + "epoch": 1.62, + "learning_rate": 2.399489470325463e-06, + "loss": 0.3395, + "step": 3488 + }, + { + "epoch": 1.62, + "learning_rate": 2.3962986598596043e-06, + "loss": 0.4384, + "step": 3492 + }, + { + "epoch": 1.62, + "learning_rate": 2.3931078493937462e-06, + "loss": 0.3029, + "step": 3496 + }, + { + "epoch": 1.62, + "learning_rate": 2.3899170389278877e-06, + "loss": 0.3868, + "step": 3500 + }, + { + "epoch": 1.63, + "learning_rate": 2.3867262284620296e-06, + "loss": 0.233, + "step": 3504 + }, + { + "epoch": 1.63, + "learning_rate": 2.383535417996171e-06, + "loss": 0.4025, + "step": 3508 + }, + { + "epoch": 1.63, + "learning_rate": 2.380344607530313e-06, + "loss": 0.2714, + "step": 3512 + }, + { + "epoch": 1.63, + "learning_rate": 2.3771537970644544e-06, + "loss": 0.4694, + "step": 3516 + }, + { + "epoch": 1.63, + "learning_rate": 2.3739629865985963e-06, + "loss": 0.3092, + "step": 3520 + }, + { + "epoch": 1.63, + "learning_rate": 2.370772176132738e-06, + "loss": 0.3375, + "step": 3524 + }, + { + "epoch": 1.64, + "learning_rate": 2.3675813656668793e-06, + "loss": 0.2356, + "step": 3528 + }, + { + "epoch": 1.64, + "learning_rate": 2.364390555201021e-06, + "loss": 0.4403, + "step": 3532 + }, + { + "epoch": 1.64, + "learning_rate": 2.3611997447351626e-06, + "loss": 0.4015, + "step": 3536 + }, + { + "epoch": 1.64, + "learning_rate": 2.3580089342693045e-06, + "loss": 0.5201, + "step": 3540 + }, + { + "epoch": 1.64, + "learning_rate": 2.3548181238034464e-06, + "loss": 0.4203, + "step": 3544 + }, + { + "epoch": 1.65, + "learning_rate": 2.351627313337588e-06, + "loss": 0.4869, + "step": 3548 + }, + { + "epoch": 1.65, + "learning_rate": 2.3484365028717298e-06, + "loss": 0.3923, + "step": 3552 + }, + { + "epoch": 1.65, + "learning_rate": 2.3452456924058712e-06, + "loss": 0.6743, + "step": 3556 + }, + { + "epoch": 1.65, + "learning_rate": 2.342054881940013e-06, + "loss": 0.2588, + "step": 3560 + }, + { + "epoch": 1.65, + "learning_rate": 2.3388640714741546e-06, + "loss": 0.323, + "step": 3564 + }, + { + "epoch": 1.65, + "learning_rate": 2.3356732610082965e-06, + "loss": 0.2859, + "step": 3568 + }, + { + "epoch": 1.66, + "learning_rate": 2.332482450542438e-06, + "loss": 0.2747, + "step": 3572 + }, + { + "epoch": 1.66, + "learning_rate": 2.32929164007658e-06, + "loss": 0.2221, + "step": 3576 + }, + { + "epoch": 1.66, + "learning_rate": 2.3261008296107213e-06, + "loss": 0.3744, + "step": 3580 + }, + { + "epoch": 1.66, + "learning_rate": 2.3229100191448632e-06, + "loss": 0.3965, + "step": 3584 + }, + { + "epoch": 1.66, + "learning_rate": 2.3197192086790047e-06, + "loss": 0.4889, + "step": 3588 + }, + { + "epoch": 1.67, + "learning_rate": 2.3165283982131466e-06, + "loss": 0.4218, + "step": 3592 + }, + { + "epoch": 1.67, + "learning_rate": 2.313337587747288e-06, + "loss": 0.3016, + "step": 3596 + }, + { + "epoch": 1.67, + "learning_rate": 2.3101467772814295e-06, + "loss": 0.3408, + "step": 3600 + }, + { + "epoch": 1.67, + "learning_rate": 2.3069559668155714e-06, + "loss": 0.387, + "step": 3604 + }, + { + "epoch": 1.67, + "learning_rate": 2.303765156349713e-06, + "loss": 0.3845, + "step": 3608 + }, + { + "epoch": 1.68, + "learning_rate": 2.300574345883855e-06, + "loss": 0.2885, + "step": 3612 + }, + { + "epoch": 1.68, + "learning_rate": 2.2973835354179963e-06, + "loss": 0.1871, + "step": 3616 + }, + { + "epoch": 1.68, + "learning_rate": 2.294192724952138e-06, + "loss": 0.3516, + "step": 3620 + }, + { + "epoch": 1.68, + "learning_rate": 2.2910019144862796e-06, + "loss": 0.4165, + "step": 3624 + }, + { + "epoch": 1.68, + "learning_rate": 2.2878111040204215e-06, + "loss": 0.2891, + "step": 3628 + }, + { + "epoch": 1.68, + "learning_rate": 2.284620293554563e-06, + "loss": 0.3616, + "step": 3632 + }, + { + "epoch": 1.69, + "learning_rate": 2.2814294830887045e-06, + "loss": 0.4057, + "step": 3636 + }, + { + "epoch": 1.69, + "learning_rate": 2.2782386726228464e-06, + "loss": 0.5166, + "step": 3640 + }, + { + "epoch": 1.69, + "learning_rate": 2.275047862156988e-06, + "loss": 0.3279, + "step": 3644 + }, + { + "epoch": 1.69, + "learning_rate": 2.2718570516911297e-06, + "loss": 0.3537, + "step": 3648 + }, + { + "epoch": 1.69, + "learning_rate": 2.268666241225271e-06, + "loss": 0.3187, + "step": 3652 + }, + { + "epoch": 1.7, + "learning_rate": 2.265475430759413e-06, + "loss": 0.4043, + "step": 3656 + }, + { + "epoch": 1.7, + "learning_rate": 2.2622846202935546e-06, + "loss": 0.2799, + "step": 3660 + }, + { + "epoch": 1.7, + "learning_rate": 2.2590938098276964e-06, + "loss": 0.3363, + "step": 3664 + }, + { + "epoch": 1.7, + "learning_rate": 2.255902999361838e-06, + "loss": 0.6477, + "step": 3668 + }, + { + "epoch": 1.7, + "learning_rate": 2.25271218889598e-06, + "loss": 0.4967, + "step": 3672 + }, + { + "epoch": 1.71, + "learning_rate": 2.2495213784301213e-06, + "loss": 0.4474, + "step": 3676 + }, + { + "epoch": 1.71, + "learning_rate": 2.246330567964263e-06, + "loss": 0.2501, + "step": 3680 + }, + { + "epoch": 1.71, + "learning_rate": 2.2431397574984046e-06, + "loss": 0.3448, + "step": 3684 + }, + { + "epoch": 1.71, + "learning_rate": 2.2399489470325465e-06, + "loss": 0.3084, + "step": 3688 + }, + { + "epoch": 1.71, + "learning_rate": 2.2367581365666884e-06, + "loss": 0.3165, + "step": 3692 + }, + { + "epoch": 1.71, + "learning_rate": 2.23356732610083e-06, + "loss": 0.405, + "step": 3696 + }, + { + "epoch": 1.72, + "learning_rate": 2.230376515634972e-06, + "loss": 0.3648, + "step": 3700 + }, + { + "epoch": 1.72, + "learning_rate": 2.2271857051691133e-06, + "loss": 0.2938, + "step": 3704 + }, + { + "epoch": 1.72, + "learning_rate": 2.2239948947032547e-06, + "loss": 0.336, + "step": 3708 + }, + { + "epoch": 1.72, + "learning_rate": 2.2208040842373966e-06, + "loss": 0.4741, + "step": 3712 + }, + { + "epoch": 1.72, + "learning_rate": 2.217613273771538e-06, + "loss": 0.4006, + "step": 3716 + }, + { + "epoch": 1.73, + "learning_rate": 2.21442246330568e-06, + "loss": 0.3443, + "step": 3720 + }, + { + "epoch": 1.73, + "learning_rate": 2.2112316528398215e-06, + "loss": 0.2771, + "step": 3724 + }, + { + "epoch": 1.73, + "learning_rate": 2.2080408423739634e-06, + "loss": 0.2515, + "step": 3728 + }, + { + "epoch": 1.73, + "learning_rate": 2.204850031908105e-06, + "loss": 0.3897, + "step": 3732 + }, + { + "epoch": 1.73, + "learning_rate": 2.2016592214422467e-06, + "loss": 0.182, + "step": 3736 + }, + { + "epoch": 1.73, + "learning_rate": 2.198468410976388e-06, + "loss": 0.3575, + "step": 3740 + }, + { + "epoch": 1.74, + "learning_rate": 2.1952776005105297e-06, + "loss": 0.3662, + "step": 3744 + }, + { + "epoch": 1.74, + "learning_rate": 2.1920867900446716e-06, + "loss": 0.4394, + "step": 3748 + }, + { + "epoch": 1.74, + "learning_rate": 2.188895979578813e-06, + "loss": 0.3541, + "step": 3752 + }, + { + "epoch": 1.74, + "learning_rate": 2.185705169112955e-06, + "loss": 0.3837, + "step": 3756 + }, + { + "epoch": 1.74, + "learning_rate": 2.1825143586470964e-06, + "loss": 0.2765, + "step": 3760 + }, + { + "epoch": 1.75, + "learning_rate": 2.1793235481812383e-06, + "loss": 0.3349, + "step": 3764 + }, + { + "epoch": 1.75, + "learning_rate": 2.1761327377153797e-06, + "loss": 0.3141, + "step": 3768 + }, + { + "epoch": 1.75, + "learning_rate": 2.1729419272495216e-06, + "loss": 0.3836, + "step": 3772 + }, + { + "epoch": 1.75, + "learning_rate": 2.169751116783663e-06, + "loss": 0.417, + "step": 3776 + }, + { + "epoch": 1.75, + "learning_rate": 2.1665603063178046e-06, + "loss": 0.339, + "step": 3780 + }, + { + "epoch": 1.76, + "learning_rate": 2.1633694958519465e-06, + "loss": 0.4287, + "step": 3784 + }, + { + "epoch": 1.76, + "learning_rate": 2.1601786853860884e-06, + "loss": 0.3423, + "step": 3788 + }, + { + "epoch": 1.76, + "learning_rate": 2.15698787492023e-06, + "loss": 0.3367, + "step": 3792 + }, + { + "epoch": 1.76, + "learning_rate": 2.1537970644543717e-06, + "loss": 0.2519, + "step": 3796 + }, + { + "epoch": 1.76, + "learning_rate": 2.150606253988513e-06, + "loss": 0.3884, + "step": 3800 + }, + { + "epoch": 1.76, + "learning_rate": 2.147415443522655e-06, + "loss": 0.2767, + "step": 3804 + }, + { + "epoch": 1.77, + "learning_rate": 2.1442246330567966e-06, + "loss": 0.3162, + "step": 3808 + }, + { + "epoch": 1.77, + "learning_rate": 2.1410338225909385e-06, + "loss": 0.3722, + "step": 3812 + }, + { + "epoch": 1.77, + "learning_rate": 2.13784301212508e-06, + "loss": 0.462, + "step": 3816 + }, + { + "epoch": 1.77, + "learning_rate": 2.134652201659222e-06, + "loss": 0.4508, + "step": 3820 + }, + { + "epoch": 1.77, + "learning_rate": 2.1314613911933633e-06, + "loss": 0.309, + "step": 3824 + }, + { + "epoch": 1.78, + "learning_rate": 2.128270580727505e-06, + "loss": 0.4566, + "step": 3828 + }, + { + "epoch": 1.78, + "learning_rate": 2.1250797702616467e-06, + "loss": 0.3216, + "step": 3832 + }, + { + "epoch": 1.78, + "learning_rate": 2.1218889597957886e-06, + "loss": 0.4669, + "step": 3836 + }, + { + "epoch": 1.78, + "learning_rate": 2.11869814932993e-06, + "loss": 0.4764, + "step": 3840 + }, + { + "epoch": 1.78, + "learning_rate": 2.115507338864072e-06, + "loss": 0.3011, + "step": 3844 + }, + { + "epoch": 1.78, + "learning_rate": 2.1123165283982134e-06, + "loss": 0.3308, + "step": 3848 + }, + { + "epoch": 1.79, + "learning_rate": 2.109125717932355e-06, + "loss": 0.4038, + "step": 3852 + }, + { + "epoch": 1.79, + "learning_rate": 2.1059349074664967e-06, + "loss": 0.2768, + "step": 3856 + }, + { + "epoch": 1.79, + "learning_rate": 2.1027440970006382e-06, + "loss": 0.374, + "step": 3860 + }, + { + "epoch": 1.79, + "learning_rate": 2.09955328653478e-06, + "loss": 0.3393, + "step": 3864 + }, + { + "epoch": 1.79, + "learning_rate": 2.0963624760689216e-06, + "loss": 0.3846, + "step": 3868 + }, + { + "epoch": 1.8, + "learning_rate": 2.0931716656030635e-06, + "loss": 0.308, + "step": 3872 + }, + { + "epoch": 1.8, + "learning_rate": 2.089980855137205e-06, + "loss": 0.4816, + "step": 3876 + }, + { + "epoch": 1.8, + "learning_rate": 2.086790044671347e-06, + "loss": 0.2121, + "step": 3880 + }, + { + "epoch": 1.8, + "learning_rate": 2.0835992342054883e-06, + "loss": 0.3698, + "step": 3884 + }, + { + "epoch": 1.8, + "learning_rate": 2.0804084237396298e-06, + "loss": 0.3615, + "step": 3888 + }, + { + "epoch": 1.81, + "learning_rate": 2.0772176132737717e-06, + "loss": 0.2294, + "step": 3892 + }, + { + "epoch": 1.81, + "learning_rate": 2.074026802807913e-06, + "loss": 0.2515, + "step": 3896 + }, + { + "epoch": 1.81, + "learning_rate": 2.070835992342055e-06, + "loss": 0.3559, + "step": 3900 + }, + { + "epoch": 1.81, + "learning_rate": 2.0676451818761965e-06, + "loss": 0.4243, + "step": 3904 + }, + { + "epoch": 1.81, + "learning_rate": 2.0644543714103384e-06, + "loss": 0.3622, + "step": 3908 + }, + { + "epoch": 1.81, + "learning_rate": 2.06126356094448e-06, + "loss": 0.5588, + "step": 3912 + }, + { + "epoch": 1.82, + "learning_rate": 2.0580727504786218e-06, + "loss": 0.2169, + "step": 3916 + }, + { + "epoch": 1.82, + "learning_rate": 2.0548819400127632e-06, + "loss": 0.4732, + "step": 3920 + }, + { + "epoch": 1.82, + "learning_rate": 2.051691129546905e-06, + "loss": 0.2331, + "step": 3924 + }, + { + "epoch": 1.82, + "learning_rate": 2.0485003190810466e-06, + "loss": 0.3388, + "step": 3928 + }, + { + "epoch": 1.82, + "learning_rate": 2.0453095086151885e-06, + "loss": 0.4545, + "step": 3932 + }, + { + "epoch": 1.83, + "learning_rate": 2.04211869814933e-06, + "loss": 0.3886, + "step": 3936 + }, + { + "epoch": 1.83, + "learning_rate": 2.038927887683472e-06, + "loss": 0.2233, + "step": 3940 + }, + { + "epoch": 1.83, + "learning_rate": 2.0357370772176138e-06, + "loss": 0.3658, + "step": 3944 + }, + { + "epoch": 1.83, + "learning_rate": 2.0325462667517552e-06, + "loss": 0.3229, + "step": 3948 + }, + { + "epoch": 1.83, + "learning_rate": 2.029355456285897e-06, + "loss": 0.1759, + "step": 3952 + }, + { + "epoch": 1.83, + "learning_rate": 2.0261646458200386e-06, + "loss": 0.3737, + "step": 3956 + }, + { + "epoch": 1.84, + "learning_rate": 2.02297383535418e-06, + "loss": 0.3362, + "step": 3960 + }, + { + "epoch": 1.84, + "learning_rate": 2.019783024888322e-06, + "loss": 0.2873, + "step": 3964 + }, + { + "epoch": 1.84, + "learning_rate": 2.0165922144224634e-06, + "loss": 0.3454, + "step": 3968 + }, + { + "epoch": 1.84, + "learning_rate": 2.0134014039566053e-06, + "loss": 0.3428, + "step": 3972 + }, + { + "epoch": 1.84, + "learning_rate": 2.0102105934907468e-06, + "loss": 0.4089, + "step": 3976 + }, + { + "epoch": 1.85, + "learning_rate": 2.0070197830248887e-06, + "loss": 0.3472, + "step": 3980 + }, + { + "epoch": 1.85, + "learning_rate": 2.00382897255903e-06, + "loss": 0.2868, + "step": 3984 + }, + { + "epoch": 1.85, + "learning_rate": 2.000638162093172e-06, + "loss": 0.3088, + "step": 3988 + }, + { + "epoch": 1.85, + "learning_rate": 1.9974473516273135e-06, + "loss": 0.2471, + "step": 3992 + }, + { + "epoch": 1.85, + "learning_rate": 1.994256541161455e-06, + "loss": 0.2816, + "step": 3996 + }, + { + "epoch": 1.86, + "learning_rate": 1.991065730695597e-06, + "loss": 0.3135, + "step": 4000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9878749202297383e-06, + "loss": 0.379, + "step": 4004 + }, + { + "epoch": 1.86, + "learning_rate": 1.9846841097638802e-06, + "loss": 0.5225, + "step": 4008 + }, + { + "epoch": 1.86, + "learning_rate": 1.9814932992980217e-06, + "loss": 0.3229, + "step": 4012 + }, + { + "epoch": 1.86, + "learning_rate": 1.9783024888321636e-06, + "loss": 0.3573, + "step": 4016 + }, + { + "epoch": 1.86, + "learning_rate": 1.975111678366305e-06, + "loss": 0.2219, + "step": 4020 + }, + { + "epoch": 1.87, + "learning_rate": 1.971920867900447e-06, + "loss": 0.2133, + "step": 4024 + }, + { + "epoch": 1.87, + "learning_rate": 1.9687300574345884e-06, + "loss": 0.4303, + "step": 4028 + }, + { + "epoch": 1.87, + "learning_rate": 1.96553924696873e-06, + "loss": 0.4735, + "step": 4032 + }, + { + "epoch": 1.87, + "learning_rate": 1.9631461391193364e-06, + "loss": 0.3223, + "step": 4036 + }, + { + "epoch": 1.87, + "learning_rate": 1.959955328653478e-06, + "loss": 0.3124, + "step": 4040 + }, + { + "epoch": 1.88, + "learning_rate": 1.95676451818762e-06, + "loss": 0.4547, + "step": 4044 + }, + { + "epoch": 1.88, + "learning_rate": 1.9535737077217613e-06, + "loss": 0.3089, + "step": 4048 + }, + { + "epoch": 1.88, + "learning_rate": 1.950382897255903e-06, + "loss": 0.344, + "step": 4052 + }, + { + "epoch": 1.88, + "learning_rate": 1.947192086790045e-06, + "loss": 0.1488, + "step": 4056 + }, + { + "epoch": 1.88, + "learning_rate": 1.9440012763241865e-06, + "loss": 0.4715, + "step": 4060 + }, + { + "epoch": 1.88, + "learning_rate": 1.9408104658583284e-06, + "loss": 0.2866, + "step": 4064 + }, + { + "epoch": 1.89, + "learning_rate": 1.93761965539247e-06, + "loss": 0.3207, + "step": 4068 + }, + { + "epoch": 1.89, + "learning_rate": 1.9344288449266118e-06, + "loss": 0.3532, + "step": 4072 + }, + { + "epoch": 1.89, + "learning_rate": 1.9312380344607532e-06, + "loss": 0.3416, + "step": 4076 + }, + { + "epoch": 1.89, + "learning_rate": 1.928047223994895e-06, + "loss": 0.6239, + "step": 4080 + }, + { + "epoch": 1.89, + "learning_rate": 1.9248564135290366e-06, + "loss": 0.1806, + "step": 4084 + }, + { + "epoch": 1.9, + "learning_rate": 1.9216656030631785e-06, + "loss": 0.3065, + "step": 4088 + }, + { + "epoch": 1.9, + "learning_rate": 1.91847479259732e-06, + "loss": 0.2393, + "step": 4092 + }, + { + "epoch": 1.9, + "learning_rate": 1.9152839821314614e-06, + "loss": 0.4581, + "step": 4096 + }, + { + "epoch": 1.9, + "learning_rate": 1.9120931716656033e-06, + "loss": 0.2407, + "step": 4100 + }, + { + "epoch": 1.9, + "learning_rate": 1.908902361199745e-06, + "loss": 0.3328, + "step": 4104 + }, + { + "epoch": 1.91, + "learning_rate": 1.9057115507338867e-06, + "loss": 0.2898, + "step": 4108 + }, + { + "epoch": 1.91, + "learning_rate": 1.9025207402680282e-06, + "loss": 0.5888, + "step": 4112 + }, + { + "epoch": 1.91, + "learning_rate": 1.89932992980217e-06, + "loss": 0.3909, + "step": 4116 + }, + { + "epoch": 1.91, + "learning_rate": 1.8961391193363115e-06, + "loss": 0.2613, + "step": 4120 + }, + { + "epoch": 1.91, + "learning_rate": 1.8929483088704534e-06, + "loss": 0.2594, + "step": 4124 + }, + { + "epoch": 1.91, + "learning_rate": 1.889757498404595e-06, + "loss": 0.3601, + "step": 4128 + }, + { + "epoch": 1.92, + "learning_rate": 1.8865666879387366e-06, + "loss": 0.1791, + "step": 4132 + }, + { + "epoch": 1.92, + "learning_rate": 1.8833758774728783e-06, + "loss": 0.3714, + "step": 4136 + }, + { + "epoch": 1.92, + "learning_rate": 1.88018506700702e-06, + "loss": 0.3601, + "step": 4140 + }, + { + "epoch": 1.92, + "learning_rate": 1.8769942565411616e-06, + "loss": 0.4697, + "step": 4144 + }, + { + "epoch": 1.92, + "learning_rate": 1.8738034460753033e-06, + "loss": 0.4277, + "step": 4148 + }, + { + "epoch": 1.93, + "learning_rate": 1.870612635609445e-06, + "loss": 0.4183, + "step": 4152 + }, + { + "epoch": 1.93, + "learning_rate": 1.8674218251435867e-06, + "loss": 0.2764, + "step": 4156 + }, + { + "epoch": 1.93, + "learning_rate": 1.8642310146777281e-06, + "loss": 0.3209, + "step": 4160 + }, + { + "epoch": 1.93, + "learning_rate": 1.86104020421187e-06, + "loss": 0.328, + "step": 4164 + }, + { + "epoch": 1.93, + "learning_rate": 1.8578493937460115e-06, + "loss": 0.3673, + "step": 4168 + }, + { + "epoch": 1.94, + "learning_rate": 1.8546585832801534e-06, + "loss": 0.2856, + "step": 4172 + }, + { + "epoch": 1.94, + "learning_rate": 1.8514677728142949e-06, + "loss": 0.4248, + "step": 4176 + }, + { + "epoch": 1.94, + "learning_rate": 1.8482769623484368e-06, + "loss": 0.419, + "step": 4180 + }, + { + "epoch": 1.94, + "learning_rate": 1.8450861518825782e-06, + "loss": 0.3315, + "step": 4184 + }, + { + "epoch": 1.94, + "learning_rate": 1.8418953414167201e-06, + "loss": 0.3508, + "step": 4188 + }, + { + "epoch": 1.94, + "learning_rate": 1.8387045309508616e-06, + "loss": 0.2016, + "step": 4192 + }, + { + "epoch": 1.95, + "learning_rate": 1.8355137204850033e-06, + "loss": 0.2352, + "step": 4196 + }, + { + "epoch": 1.95, + "learning_rate": 1.832322910019145e-06, + "loss": 0.4638, + "step": 4200 + }, + { + "epoch": 1.95, + "learning_rate": 1.8291320995532866e-06, + "loss": 0.4352, + "step": 4204 + }, + { + "epoch": 1.95, + "learning_rate": 1.8259412890874283e-06, + "loss": 0.4832, + "step": 4208 + }, + { + "epoch": 1.95, + "learning_rate": 1.82275047862157e-06, + "loss": 0.295, + "step": 4212 + }, + { + "epoch": 1.96, + "learning_rate": 1.8195596681557117e-06, + "loss": 0.3176, + "step": 4216 + }, + { + "epoch": 1.96, + "learning_rate": 1.8163688576898534e-06, + "loss": 0.0922, + "step": 4220 + }, + { + "epoch": 1.96, + "learning_rate": 1.813178047223995e-06, + "loss": 0.2375, + "step": 4224 + }, + { + "epoch": 1.96, + "learning_rate": 1.8099872367581367e-06, + "loss": 0.3374, + "step": 4228 + }, + { + "epoch": 1.96, + "learning_rate": 1.8067964262922782e-06, + "loss": 0.2551, + "step": 4232 + }, + { + "epoch": 1.96, + "learning_rate": 1.80360561582642e-06, + "loss": 0.3228, + "step": 4236 + }, + { + "epoch": 1.97, + "learning_rate": 1.8004148053605616e-06, + "loss": 0.3102, + "step": 4240 + }, + { + "epoch": 1.97, + "learning_rate": 1.7972239948947035e-06, + "loss": 0.2471, + "step": 4244 + }, + { + "epoch": 1.97, + "learning_rate": 1.794033184428845e-06, + "loss": 0.285, + "step": 4248 + }, + { + "epoch": 1.97, + "learning_rate": 1.7908423739629868e-06, + "loss": 0.3468, + "step": 4252 + }, + { + "epoch": 1.97, + "learning_rate": 1.7876515634971283e-06, + "loss": 0.2877, + "step": 4256 + }, + { + "epoch": 1.98, + "learning_rate": 1.7844607530312702e-06, + "loss": 0.4362, + "step": 4260 + }, + { + "epoch": 1.98, + "learning_rate": 1.7812699425654117e-06, + "loss": 0.1789, + "step": 4264 + }, + { + "epoch": 1.98, + "learning_rate": 1.7780791320995533e-06, + "loss": 0.3056, + "step": 4268 + }, + { + "epoch": 1.98, + "learning_rate": 1.774888321633695e-06, + "loss": 0.478, + "step": 4272 + }, + { + "epoch": 1.98, + "learning_rate": 1.7716975111678367e-06, + "loss": 0.3405, + "step": 4276 + }, + { + "epoch": 1.99, + "learning_rate": 1.7685067007019786e-06, + "loss": 0.2038, + "step": 4280 + }, + { + "epoch": 1.99, + "learning_rate": 1.76531589023612e-06, + "loss": 0.2301, + "step": 4284 + }, + { + "epoch": 1.99, + "learning_rate": 1.762125079770262e-06, + "loss": 0.3283, + "step": 4288 + }, + { + "epoch": 1.99, + "learning_rate": 1.7589342693044034e-06, + "loss": 0.1711, + "step": 4292 + }, + { + "epoch": 1.99, + "learning_rate": 1.7557434588385453e-06, + "loss": 0.241, + "step": 4296 + }, + { + "epoch": 1.99, + "learning_rate": 1.7525526483726868e-06, + "loss": 0.2408, + "step": 4300 + } + ], + "logging_steps": 4, + "max_steps": 6468, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "total_flos": 43550404509696.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}