diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 10.0, - "global_step": 2018430, + "epoch": 17.0, + "global_step": 3431331, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -24332,11 +24332,17044 @@ "eval_samples_per_second": 1745.728, "eval_steps_per_second": 72.743, "step": 2018430 + }, + { + "epoch": 10.0, + "learning_rate": 4.500164236560099e-05, + "loss": 2.4274, + "step": 2018500 + }, + { + "epoch": 10.0, + "learning_rate": 4.500040377917491e-05, + "loss": 2.4295, + "step": 2019000 + }, + { + "epoch": 10.01, + "learning_rate": 4.4999165192748824e-05, + "loss": 2.3972, + "step": 2019500 + }, + { + "epoch": 10.01, + "learning_rate": 4.499792660632274e-05, + "loss": 2.4053, + "step": 2020000 + }, + { + "epoch": 10.01, + "learning_rate": 4.499668801989666e-05, + "loss": 2.4178, + "step": 2020500 + }, + { + "epoch": 10.01, + "learning_rate": 4.499544943347057e-05, + "loss": 2.4151, + "step": 2021000 + }, + { + "epoch": 10.02, + "learning_rate": 4.4994210847044485e-05, + "loss": 2.4157, + "step": 2021500 + }, + { + "epoch": 10.02, + "learning_rate": 4.49929722606184e-05, + "loss": 2.4145, + "step": 2022000 + }, + { + "epoch": 10.02, + "learning_rate": 4.499173367419232e-05, + "loss": 2.4341, + "step": 2022500 + }, + { + "epoch": 10.02, + "learning_rate": 4.499050004211194e-05, + "loss": 2.4384, + "step": 2023000 + }, + { + "epoch": 10.03, + "learning_rate": 4.4989261455685856e-05, + "loss": 2.4209, + "step": 2023500 + }, + { + "epoch": 10.03, + "learning_rate": 4.498802286925977e-05, + "loss": 2.4392, + "step": 2024000 + }, + { + "epoch": 10.03, + "learning_rate": 4.498678428283369e-05, + "loss": 2.4234, + "step": 2024500 + }, + { + "epoch": 10.03, + "learning_rate": 4.498554569640761e-05, + "loss": 2.412, + "step": 2025000 + }, + { + "epoch": 10.04, + "learning_rate": 4.4984307109981524e-05, + "loss": 2.4119, + "step": 2025500 + }, + { + "epoch": 10.04, + "learning_rate": 4.498306852355544e-05, + "loss": 2.4244, + "step": 2026000 + }, + { + "epoch": 10.04, + "learning_rate": 4.498182993712936e-05, + "loss": 2.4498, + "step": 2026500 + }, + { + "epoch": 10.04, + "learning_rate": 4.4980591350703275e-05, + "loss": 2.4358, + "step": 2027000 + }, + { + "epoch": 10.04, + "learning_rate": 4.497935276427719e-05, + "loss": 2.4245, + "step": 2027500 + }, + { + "epoch": 10.05, + "learning_rate": 4.497811913219681e-05, + "loss": 2.4284, + "step": 2028000 + }, + { + "epoch": 10.05, + "learning_rate": 4.497688054577073e-05, + "loss": 2.4131, + "step": 2028500 + }, + { + "epoch": 10.05, + "learning_rate": 4.497564443651749e-05, + "loss": 2.4187, + "step": 2029000 + }, + { + "epoch": 10.05, + "learning_rate": 4.497440585009141e-05, + "loss": 2.4189, + "step": 2029500 + }, + { + "epoch": 10.06, + "learning_rate": 4.4973167263665325e-05, + "loss": 2.4364, + "step": 2030000 + }, + { + "epoch": 10.06, + "learning_rate": 4.497192867723924e-05, + "loss": 2.4455, + "step": 2030500 + }, + { + "epoch": 10.06, + "learning_rate": 4.497069009081316e-05, + "loss": 2.4064, + "step": 2031000 + }, + { + "epoch": 10.06, + "learning_rate": 4.4969451504387076e-05, + "loss": 2.4481, + "step": 2031500 + }, + { + "epoch": 10.07, + "learning_rate": 4.4968212917960986e-05, + "loss": 2.4151, + "step": 2032000 + }, + { + "epoch": 10.07, + "learning_rate": 4.49669743315349e-05, + "loss": 2.4254, + "step": 2032500 + }, + { + "epoch": 10.07, + "learning_rate": 4.496573574510882e-05, + "loss": 2.428, + "step": 2033000 + }, + { + "epoch": 10.07, + "learning_rate": 4.496449715868274e-05, + "loss": 2.4263, + "step": 2033500 + }, + { + "epoch": 10.08, + "learning_rate": 4.4963258572256654e-05, + "loss": 2.4114, + "step": 2034000 + }, + { + "epoch": 10.08, + "learning_rate": 4.496201998583057e-05, + "loss": 2.4332, + "step": 2034500 + }, + { + "epoch": 10.08, + "learning_rate": 4.496078139940449e-05, + "loss": 2.4325, + "step": 2035000 + }, + { + "epoch": 10.08, + "learning_rate": 4.4959545290151257e-05, + "loss": 2.4009, + "step": 2035500 + }, + { + "epoch": 10.09, + "learning_rate": 4.4958306703725173e-05, + "loss": 2.435, + "step": 2036000 + }, + { + "epoch": 10.09, + "learning_rate": 4.495706811729909e-05, + "loss": 2.4103, + "step": 2036500 + }, + { + "epoch": 10.09, + "learning_rate": 4.495582953087301e-05, + "loss": 2.4359, + "step": 2037000 + }, + { + "epoch": 10.09, + "learning_rate": 4.4954590944446924e-05, + "loss": 2.4256, + "step": 2037500 + }, + { + "epoch": 10.1, + "learning_rate": 4.495335235802084e-05, + "loss": 2.4242, + "step": 2038000 + }, + { + "epoch": 10.1, + "learning_rate": 4.495211377159476e-05, + "loss": 2.4267, + "step": 2038500 + }, + { + "epoch": 10.1, + "learning_rate": 4.4950875185168675e-05, + "loss": 2.4368, + "step": 2039000 + }, + { + "epoch": 10.1, + "learning_rate": 4.4949641553088296e-05, + "loss": 2.4211, + "step": 2039500 + }, + { + "epoch": 10.11, + "learning_rate": 4.494840296666221e-05, + "loss": 2.4296, + "step": 2040000 + }, + { + "epoch": 10.11, + "learning_rate": 4.494716438023613e-05, + "loss": 2.4401, + "step": 2040500 + }, + { + "epoch": 10.11, + "learning_rate": 4.4945925793810047e-05, + "loss": 2.4441, + "step": 2041000 + }, + { + "epoch": 10.11, + "learning_rate": 4.494468720738396e-05, + "loss": 2.4434, + "step": 2041500 + }, + { + "epoch": 10.12, + "learning_rate": 4.4943448620957874e-05, + "loss": 2.4475, + "step": 2042000 + }, + { + "epoch": 10.12, + "learning_rate": 4.494221003453179e-05, + "loss": 2.4398, + "step": 2042500 + }, + { + "epoch": 10.12, + "learning_rate": 4.494097144810571e-05, + "loss": 2.4278, + "step": 2043000 + }, + { + "epoch": 10.12, + "learning_rate": 4.4939732861679624e-05, + "loss": 2.4079, + "step": 2043500 + }, + { + "epoch": 10.13, + "learning_rate": 4.493849675242639e-05, + "loss": 2.4734, + "step": 2044000 + }, + { + "epoch": 10.13, + "learning_rate": 4.49372581660003e-05, + "loss": 2.4516, + "step": 2044500 + }, + { + "epoch": 10.13, + "learning_rate": 4.493601957957422e-05, + "loss": 2.4456, + "step": 2045000 + }, + { + "epoch": 10.13, + "learning_rate": 4.493478099314814e-05, + "loss": 2.4319, + "step": 2045500 + }, + { + "epoch": 10.14, + "learning_rate": 4.4933542406722054e-05, + "loss": 2.4348, + "step": 2046000 + }, + { + "epoch": 10.14, + "learning_rate": 4.493230382029597e-05, + "loss": 2.4237, + "step": 2046500 + }, + { + "epoch": 10.14, + "learning_rate": 4.493106771104275e-05, + "loss": 2.4202, + "step": 2047000 + }, + { + "epoch": 10.14, + "learning_rate": 4.492982912461666e-05, + "loss": 2.421, + "step": 2047500 + }, + { + "epoch": 10.15, + "learning_rate": 4.492859301536343e-05, + "loss": 2.4291, + "step": 2048000 + }, + { + "epoch": 10.15, + "learning_rate": 4.4927356906110194e-05, + "loss": 2.422, + "step": 2048500 + }, + { + "epoch": 10.15, + "learning_rate": 4.492611831968411e-05, + "loss": 2.4306, + "step": 2049000 + }, + { + "epoch": 10.15, + "learning_rate": 4.492487973325803e-05, + "loss": 2.4154, + "step": 2049500 + }, + { + "epoch": 10.16, + "learning_rate": 4.4923641146831945e-05, + "loss": 2.4207, + "step": 2050000 + }, + { + "epoch": 10.16, + "learning_rate": 4.492240256040586e-05, + "loss": 2.4277, + "step": 2050500 + }, + { + "epoch": 10.16, + "learning_rate": 4.492116397397978e-05, + "loss": 2.4082, + "step": 2051000 + }, + { + "epoch": 10.16, + "learning_rate": 4.4919925387553696e-05, + "loss": 2.4284, + "step": 2051500 + }, + { + "epoch": 10.17, + "learning_rate": 4.491868680112761e-05, + "loss": 2.4324, + "step": 2052000 + }, + { + "epoch": 10.17, + "learning_rate": 4.491744821470153e-05, + "loss": 2.418, + "step": 2052500 + }, + { + "epoch": 10.17, + "learning_rate": 4.491620962827545e-05, + "loss": 2.4193, + "step": 2053000 + }, + { + "epoch": 10.17, + "learning_rate": 4.4914971041849364e-05, + "loss": 2.4341, + "step": 2053500 + }, + { + "epoch": 10.18, + "learning_rate": 4.4913732455423274e-05, + "loss": 2.4189, + "step": 2054000 + }, + { + "epoch": 10.18, + "learning_rate": 4.491249634617005e-05, + "loss": 2.4323, + "step": 2054500 + }, + { + "epoch": 10.18, + "learning_rate": 4.4911257759743966e-05, + "loss": 2.4587, + "step": 2055000 + }, + { + "epoch": 10.18, + "learning_rate": 4.491002165049073e-05, + "loss": 2.4278, + "step": 2055500 + }, + { + "epoch": 10.19, + "learning_rate": 4.4908783064064645e-05, + "loss": 2.4202, + "step": 2056000 + }, + { + "epoch": 10.19, + "learning_rate": 4.490754447763856e-05, + "loss": 2.4481, + "step": 2056500 + }, + { + "epoch": 10.19, + "learning_rate": 4.490630589121248e-05, + "loss": 2.4114, + "step": 2057000 + }, + { + "epoch": 10.19, + "learning_rate": 4.490506978195925e-05, + "loss": 2.4111, + "step": 2057500 + }, + { + "epoch": 10.2, + "learning_rate": 4.4903831195533165e-05, + "loss": 2.4391, + "step": 2058000 + }, + { + "epoch": 10.2, + "learning_rate": 4.490259260910708e-05, + "loss": 2.4327, + "step": 2058500 + }, + { + "epoch": 10.2, + "learning_rate": 4.4901354022681e-05, + "loss": 2.4279, + "step": 2059000 + }, + { + "epoch": 10.2, + "learning_rate": 4.4900115436254916e-05, + "loss": 2.4355, + "step": 2059500 + }, + { + "epoch": 10.21, + "learning_rate": 4.4898881804174536e-05, + "loss": 2.4252, + "step": 2060000 + }, + { + "epoch": 10.21, + "learning_rate": 4.489764321774845e-05, + "loss": 2.4196, + "step": 2060500 + }, + { + "epoch": 10.21, + "learning_rate": 4.4896404631322363e-05, + "loss": 2.4315, + "step": 2061000 + }, + { + "epoch": 10.21, + "learning_rate": 4.489516604489628e-05, + "loss": 2.4199, + "step": 2061500 + }, + { + "epoch": 10.22, + "learning_rate": 4.48939274584702e-05, + "loss": 2.4235, + "step": 2062000 + }, + { + "epoch": 10.22, + "learning_rate": 4.4892688872044114e-05, + "loss": 2.3984, + "step": 2062500 + }, + { + "epoch": 10.22, + "learning_rate": 4.489145028561803e-05, + "loss": 2.4339, + "step": 2063000 + }, + { + "epoch": 10.22, + "learning_rate": 4.489021169919195e-05, + "loss": 2.3881, + "step": 2063500 + }, + { + "epoch": 10.23, + "learning_rate": 4.4888973112765865e-05, + "loss": 2.4048, + "step": 2064000 + }, + { + "epoch": 10.23, + "learning_rate": 4.488773452633978e-05, + "loss": 2.426, + "step": 2064500 + }, + { + "epoch": 10.23, + "learning_rate": 4.48864959399137e-05, + "loss": 2.4169, + "step": 2065000 + }, + { + "epoch": 10.23, + "learning_rate": 4.4885257353487616e-05, + "loss": 2.4291, + "step": 2065500 + }, + { + "epoch": 10.24, + "learning_rate": 4.488402124423438e-05, + "loss": 2.422, + "step": 2066000 + }, + { + "epoch": 10.24, + "learning_rate": 4.4882782657808295e-05, + "loss": 2.4173, + "step": 2066500 + }, + { + "epoch": 10.24, + "learning_rate": 4.488154407138221e-05, + "loss": 2.415, + "step": 2067000 + }, + { + "epoch": 10.24, + "learning_rate": 4.488030548495613e-05, + "loss": 2.4058, + "step": 2067500 + }, + { + "epoch": 10.25, + "learning_rate": 4.4879066898530046e-05, + "loss": 2.4282, + "step": 2068000 + }, + { + "epoch": 10.25, + "learning_rate": 4.4877830789276814e-05, + "loss": 2.4527, + "step": 2068500 + }, + { + "epoch": 10.25, + "learning_rate": 4.487659220285073e-05, + "loss": 2.4305, + "step": 2069000 + }, + { + "epoch": 10.25, + "learning_rate": 4.487535361642465e-05, + "loss": 2.4422, + "step": 2069500 + }, + { + "epoch": 10.26, + "learning_rate": 4.4874115029998565e-05, + "loss": 2.4138, + "step": 2070000 + }, + { + "epoch": 10.26, + "learning_rate": 4.4872878920745334e-05, + "loss": 2.4326, + "step": 2070500 + }, + { + "epoch": 10.26, + "learning_rate": 4.487164033431925e-05, + "loss": 2.4561, + "step": 2071000 + }, + { + "epoch": 10.26, + "learning_rate": 4.487040174789317e-05, + "loss": 2.4219, + "step": 2071500 + }, + { + "epoch": 10.27, + "learning_rate": 4.4869163161467085e-05, + "loss": 2.4363, + "step": 2072000 + }, + { + "epoch": 10.27, + "learning_rate": 4.4867924575040995e-05, + "loss": 2.4182, + "step": 2072500 + }, + { + "epoch": 10.27, + "learning_rate": 4.486668598861491e-05, + "loss": 2.4498, + "step": 2073000 + }, + { + "epoch": 10.27, + "learning_rate": 4.486544740218883e-05, + "loss": 2.4262, + "step": 2073500 + }, + { + "epoch": 10.28, + "learning_rate": 4.4864208815762746e-05, + "loss": 2.4582, + "step": 2074000 + }, + { + "epoch": 10.28, + "learning_rate": 4.4862972706509515e-05, + "loss": 2.4353, + "step": 2074500 + }, + { + "epoch": 10.28, + "learning_rate": 4.486173412008343e-05, + "loss": 2.4298, + "step": 2075000 + }, + { + "epoch": 10.28, + "learning_rate": 4.486049553365735e-05, + "loss": 2.4233, + "step": 2075500 + }, + { + "epoch": 10.29, + "learning_rate": 4.4859259424404124e-05, + "loss": 2.4328, + "step": 2076000 + }, + { + "epoch": 10.29, + "learning_rate": 4.4858023315150886e-05, + "loss": 2.4591, + "step": 2076500 + }, + { + "epoch": 10.29, + "learning_rate": 4.48567847287248e-05, + "loss": 2.4568, + "step": 2077000 + }, + { + "epoch": 10.29, + "learning_rate": 4.485554614229872e-05, + "loss": 2.4302, + "step": 2077500 + }, + { + "epoch": 10.3, + "learning_rate": 4.485430755587264e-05, + "loss": 2.4473, + "step": 2078000 + }, + { + "epoch": 10.3, + "learning_rate": 4.4853071446619406e-05, + "loss": 2.4297, + "step": 2078500 + }, + { + "epoch": 10.3, + "learning_rate": 4.4851835337366174e-05, + "loss": 2.4359, + "step": 2079000 + }, + { + "epoch": 10.3, + "learning_rate": 4.485059675094009e-05, + "loss": 2.4481, + "step": 2079500 + }, + { + "epoch": 10.31, + "learning_rate": 4.4849358164514e-05, + "loss": 2.4354, + "step": 2080000 + }, + { + "epoch": 10.31, + "learning_rate": 4.484811957808792e-05, + "loss": 2.435, + "step": 2080500 + }, + { + "epoch": 10.31, + "learning_rate": 4.484688346883469e-05, + "loss": 2.4235, + "step": 2081000 + }, + { + "epoch": 10.31, + "learning_rate": 4.4845644882408604e-05, + "loss": 2.419, + "step": 2081500 + }, + { + "epoch": 10.31, + "learning_rate": 4.484440629598252e-05, + "loss": 2.4439, + "step": 2082000 + }, + { + "epoch": 10.32, + "learning_rate": 4.484316770955644e-05, + "loss": 2.423, + "step": 2082500 + }, + { + "epoch": 10.32, + "learning_rate": 4.4841929123130355e-05, + "loss": 2.4478, + "step": 2083000 + }, + { + "epoch": 10.32, + "learning_rate": 4.484069053670427e-05, + "loss": 2.4356, + "step": 2083500 + }, + { + "epoch": 10.32, + "learning_rate": 4.483945195027819e-05, + "loss": 2.4367, + "step": 2084000 + }, + { + "epoch": 10.33, + "learning_rate": 4.4838213363852106e-05, + "loss": 2.4409, + "step": 2084500 + }, + { + "epoch": 10.33, + "learning_rate": 4.483697477742602e-05, + "loss": 2.4238, + "step": 2085000 + }, + { + "epoch": 10.33, + "learning_rate": 4.483573619099994e-05, + "loss": 2.4366, + "step": 2085500 + }, + { + "epoch": 10.33, + "learning_rate": 4.4834497604573856e-05, + "loss": 2.4146, + "step": 2086000 + }, + { + "epoch": 10.34, + "learning_rate": 4.4833259018147773e-05, + "loss": 2.4158, + "step": 2086500 + }, + { + "epoch": 10.34, + "learning_rate": 4.483202043172169e-05, + "loss": 2.4511, + "step": 2087000 + }, + { + "epoch": 10.34, + "learning_rate": 4.483078184529561e-05, + "loss": 2.42, + "step": 2087500 + }, + { + "epoch": 10.34, + "learning_rate": 4.4829543258869524e-05, + "loss": 2.4163, + "step": 2088000 + }, + { + "epoch": 10.35, + "learning_rate": 4.482830467244344e-05, + "loss": 2.4279, + "step": 2088500 + }, + { + "epoch": 10.35, + "learning_rate": 4.482706608601735e-05, + "loss": 2.451, + "step": 2089000 + }, + { + "epoch": 10.35, + "learning_rate": 4.482582749959127e-05, + "loss": 2.4472, + "step": 2089500 + }, + { + "epoch": 10.35, + "learning_rate": 4.4824588913165185e-05, + "loss": 2.4138, + "step": 2090000 + }, + { + "epoch": 10.36, + "learning_rate": 4.48233503267391e-05, + "loss": 2.4263, + "step": 2090500 + }, + { + "epoch": 10.36, + "learning_rate": 4.482211174031302e-05, + "loss": 2.4066, + "step": 2091000 + }, + { + "epoch": 10.36, + "learning_rate": 4.482087563105979e-05, + "loss": 2.4424, + "step": 2091500 + }, + { + "epoch": 10.36, + "learning_rate": 4.48196370446337e-05, + "loss": 2.4485, + "step": 2092000 + }, + { + "epoch": 10.37, + "learning_rate": 4.4818398458207615e-05, + "loss": 2.4437, + "step": 2092500 + }, + { + "epoch": 10.37, + "learning_rate": 4.481716234895439e-05, + "loss": 2.4505, + "step": 2093000 + }, + { + "epoch": 10.37, + "learning_rate": 4.481592376252831e-05, + "loss": 2.4309, + "step": 2093500 + }, + { + "epoch": 10.37, + "learning_rate": 4.4814685176102224e-05, + "loss": 2.4408, + "step": 2094000 + }, + { + "epoch": 10.38, + "learning_rate": 4.481344658967614e-05, + "loss": 2.4208, + "step": 2094500 + }, + { + "epoch": 10.38, + "learning_rate": 4.481220800325005e-05, + "loss": 2.3977, + "step": 2095000 + }, + { + "epoch": 10.38, + "learning_rate": 4.481097189399682e-05, + "loss": 2.4398, + "step": 2095500 + }, + { + "epoch": 10.38, + "learning_rate": 4.480973330757074e-05, + "loss": 2.4252, + "step": 2096000 + }, + { + "epoch": 10.39, + "learning_rate": 4.4808494721144654e-05, + "loss": 2.4061, + "step": 2096500 + }, + { + "epoch": 10.39, + "learning_rate": 4.480725613471857e-05, + "loss": 2.4416, + "step": 2097000 + }, + { + "epoch": 10.39, + "learning_rate": 4.480601754829249e-05, + "loss": 2.4346, + "step": 2097500 + }, + { + "epoch": 10.39, + "learning_rate": 4.4804778961866405e-05, + "loss": 2.4364, + "step": 2098000 + }, + { + "epoch": 10.4, + "learning_rate": 4.4803540375440315e-05, + "loss": 2.4197, + "step": 2098500 + }, + { + "epoch": 10.4, + "learning_rate": 4.480230178901423e-05, + "loss": 2.423, + "step": 2099000 + }, + { + "epoch": 10.4, + "learning_rate": 4.480106320258815e-05, + "loss": 2.4381, + "step": 2099500 + }, + { + "epoch": 10.4, + "learning_rate": 4.4799824616162066e-05, + "loss": 2.4665, + "step": 2100000 + }, + { + "epoch": 10.41, + "learning_rate": 4.479858602973598e-05, + "loss": 2.4273, + "step": 2100500 + }, + { + "epoch": 10.41, + "learning_rate": 4.479734992048276e-05, + "loss": 2.4372, + "step": 2101000 + }, + { + "epoch": 10.41, + "learning_rate": 4.479611133405667e-05, + "loss": 2.429, + "step": 2101500 + }, + { + "epoch": 10.41, + "learning_rate": 4.4794872747630585e-05, + "loss": 2.4399, + "step": 2102000 + }, + { + "epoch": 10.42, + "learning_rate": 4.47936341612045e-05, + "loss": 2.4392, + "step": 2102500 + }, + { + "epoch": 10.42, + "learning_rate": 4.479239805195127e-05, + "loss": 2.4241, + "step": 2103000 + }, + { + "epoch": 10.42, + "learning_rate": 4.479115946552519e-05, + "loss": 2.4679, + "step": 2103500 + }, + { + "epoch": 10.42, + "learning_rate": 4.4789920879099105e-05, + "loss": 2.4386, + "step": 2104000 + }, + { + "epoch": 10.43, + "learning_rate": 4.4788682292673015e-05, + "loss": 2.4394, + "step": 2104500 + }, + { + "epoch": 10.43, + "learning_rate": 4.478744370624693e-05, + "loss": 2.4168, + "step": 2105000 + }, + { + "epoch": 10.43, + "learning_rate": 4.478620511982085e-05, + "loss": 2.4562, + "step": 2105500 + }, + { + "epoch": 10.43, + "learning_rate": 4.4784969010567625e-05, + "loss": 2.4321, + "step": 2106000 + }, + { + "epoch": 10.44, + "learning_rate": 4.478373042414154e-05, + "loss": 2.4339, + "step": 2106500 + }, + { + "epoch": 10.44, + "learning_rate": 4.478249183771546e-05, + "loss": 2.4364, + "step": 2107000 + }, + { + "epoch": 10.44, + "learning_rate": 4.478125325128937e-05, + "loss": 2.3967, + "step": 2107500 + }, + { + "epoch": 10.44, + "learning_rate": 4.4780014664863286e-05, + "loss": 2.4516, + "step": 2108000 + }, + { + "epoch": 10.45, + "learning_rate": 4.47787760784372e-05, + "loss": 2.4217, + "step": 2108500 + }, + { + "epoch": 10.45, + "learning_rate": 4.477753749201112e-05, + "loss": 2.418, + "step": 2109000 + }, + { + "epoch": 10.45, + "learning_rate": 4.4776298905585036e-05, + "loss": 2.4375, + "step": 2109500 + }, + { + "epoch": 10.45, + "learning_rate": 4.477506031915895e-05, + "loss": 2.4179, + "step": 2110000 + }, + { + "epoch": 10.46, + "learning_rate": 4.477382173273287e-05, + "loss": 2.4415, + "step": 2110500 + }, + { + "epoch": 10.46, + "learning_rate": 4.477258562347963e-05, + "loss": 2.4335, + "step": 2111000 + }, + { + "epoch": 10.46, + "learning_rate": 4.477134703705355e-05, + "loss": 2.4394, + "step": 2111500 + }, + { + "epoch": 10.46, + "learning_rate": 4.4770108450627466e-05, + "loss": 2.441, + "step": 2112000 + }, + { + "epoch": 10.47, + "learning_rate": 4.476886986420138e-05, + "loss": 2.4271, + "step": 2112500 + }, + { + "epoch": 10.47, + "learning_rate": 4.47676312777753e-05, + "loss": 2.4607, + "step": 2113000 + }, + { + "epoch": 10.47, + "learning_rate": 4.476639269134922e-05, + "loss": 2.4462, + "step": 2113500 + }, + { + "epoch": 10.47, + "learning_rate": 4.4765154104923134e-05, + "loss": 2.4493, + "step": 2114000 + }, + { + "epoch": 10.48, + "learning_rate": 4.47639179956699e-05, + "loss": 2.4344, + "step": 2114500 + }, + { + "epoch": 10.48, + "learning_rate": 4.476267940924382e-05, + "loss": 2.4534, + "step": 2115000 + }, + { + "epoch": 10.48, + "learning_rate": 4.4761440822817736e-05, + "loss": 2.4273, + "step": 2115500 + }, + { + "epoch": 10.48, + "learning_rate": 4.476020223639165e-05, + "loss": 2.4293, + "step": 2116000 + }, + { + "epoch": 10.49, + "learning_rate": 4.475896612713842e-05, + "loss": 2.4547, + "step": 2116500 + }, + { + "epoch": 10.49, + "learning_rate": 4.475773001788519e-05, + "loss": 2.4524, + "step": 2117000 + }, + { + "epoch": 10.49, + "learning_rate": 4.475649143145911e-05, + "loss": 2.4532, + "step": 2117500 + }, + { + "epoch": 10.49, + "learning_rate": 4.4755252845033025e-05, + "loss": 2.4419, + "step": 2118000 + }, + { + "epoch": 10.5, + "learning_rate": 4.475401425860694e-05, + "loss": 2.4467, + "step": 2118500 + }, + { + "epoch": 10.5, + "learning_rate": 4.475277567218086e-05, + "loss": 2.4355, + "step": 2119000 + }, + { + "epoch": 10.5, + "learning_rate": 4.475153956292763e-05, + "loss": 2.4465, + "step": 2119500 + }, + { + "epoch": 10.5, + "learning_rate": 4.4750300976501544e-05, + "loss": 2.4484, + "step": 2120000 + }, + { + "epoch": 10.51, + "learning_rate": 4.474906239007546e-05, + "loss": 2.4718, + "step": 2120500 + }, + { + "epoch": 10.51, + "learning_rate": 4.474782380364937e-05, + "loss": 2.4429, + "step": 2121000 + }, + { + "epoch": 10.51, + "learning_rate": 4.474658521722329e-05, + "loss": 2.461, + "step": 2121500 + }, + { + "epoch": 10.51, + "learning_rate": 4.4745346630797205e-05, + "loss": 2.4328, + "step": 2122000 + }, + { + "epoch": 10.52, + "learning_rate": 4.474410804437112e-05, + "loss": 2.457, + "step": 2122500 + }, + { + "epoch": 10.52, + "learning_rate": 4.474286945794504e-05, + "loss": 2.4409, + "step": 2123000 + }, + { + "epoch": 10.52, + "learning_rate": 4.474163087151895e-05, + "loss": 2.4528, + "step": 2123500 + }, + { + "epoch": 10.52, + "learning_rate": 4.4740392285092866e-05, + "loss": 2.4398, + "step": 2124000 + }, + { + "epoch": 10.53, + "learning_rate": 4.473915617583964e-05, + "loss": 2.419, + "step": 2124500 + }, + { + "epoch": 10.53, + "learning_rate": 4.473791758941356e-05, + "loss": 2.4351, + "step": 2125000 + }, + { + "epoch": 10.53, + "learning_rate": 4.4736679002987476e-05, + "loss": 2.4138, + "step": 2125500 + }, + { + "epoch": 10.53, + "learning_rate": 4.473544041656139e-05, + "loss": 2.4318, + "step": 2126000 + }, + { + "epoch": 10.54, + "learning_rate": 4.47342018301353e-05, + "loss": 2.4301, + "step": 2126500 + }, + { + "epoch": 10.54, + "learning_rate": 4.473296324370922e-05, + "loss": 2.4201, + "step": 2127000 + }, + { + "epoch": 10.54, + "learning_rate": 4.473172465728314e-05, + "loss": 2.439, + "step": 2127500 + }, + { + "epoch": 10.54, + "learning_rate": 4.4730488548029905e-05, + "loss": 2.4401, + "step": 2128000 + }, + { + "epoch": 10.55, + "learning_rate": 4.472924996160382e-05, + "loss": 2.4265, + "step": 2128500 + }, + { + "epoch": 10.55, + "learning_rate": 4.472801137517774e-05, + "loss": 2.4333, + "step": 2129000 + }, + { + "epoch": 10.55, + "learning_rate": 4.4726772788751656e-05, + "loss": 2.4448, + "step": 2129500 + }, + { + "epoch": 10.55, + "learning_rate": 4.4725534202325566e-05, + "loss": 2.4431, + "step": 2130000 + }, + { + "epoch": 10.56, + "learning_rate": 4.472429561589948e-05, + "loss": 2.4097, + "step": 2130500 + }, + { + "epoch": 10.56, + "learning_rate": 4.47230570294734e-05, + "loss": 2.4432, + "step": 2131000 + }, + { + "epoch": 10.56, + "learning_rate": 4.472181844304732e-05, + "loss": 2.4459, + "step": 2131500 + }, + { + "epoch": 10.56, + "learning_rate": 4.472058233379409e-05, + "loss": 2.4461, + "step": 2132000 + }, + { + "epoch": 10.57, + "learning_rate": 4.4719343747368e-05, + "loss": 2.4396, + "step": 2132500 + }, + { + "epoch": 10.57, + "learning_rate": 4.471810763811478e-05, + "loss": 2.4402, + "step": 2133000 + }, + { + "epoch": 10.57, + "learning_rate": 4.4716869051688695e-05, + "loss": 2.4444, + "step": 2133500 + }, + { + "epoch": 10.57, + "learning_rate": 4.471563046526261e-05, + "loss": 2.4261, + "step": 2134000 + }, + { + "epoch": 10.58, + "learning_rate": 4.471439187883652e-05, + "loss": 2.4395, + "step": 2134500 + }, + { + "epoch": 10.58, + "learning_rate": 4.471315329241044e-05, + "loss": 2.4222, + "step": 2135000 + }, + { + "epoch": 10.58, + "learning_rate": 4.4711914705984356e-05, + "loss": 2.4348, + "step": 2135500 + }, + { + "epoch": 10.58, + "learning_rate": 4.4710678596731125e-05, + "loss": 2.4261, + "step": 2136000 + }, + { + "epoch": 10.58, + "learning_rate": 4.470944001030504e-05, + "loss": 2.4462, + "step": 2136500 + }, + { + "epoch": 10.59, + "learning_rate": 4.470820142387896e-05, + "loss": 2.4554, + "step": 2137000 + }, + { + "epoch": 10.59, + "learning_rate": 4.4706962837452876e-05, + "loss": 2.4547, + "step": 2137500 + }, + { + "epoch": 10.59, + "learning_rate": 4.470572425102679e-05, + "loss": 2.4258, + "step": 2138000 + }, + { + "epoch": 10.59, + "learning_rate": 4.470448566460071e-05, + "loss": 2.4368, + "step": 2138500 + }, + { + "epoch": 10.6, + "learning_rate": 4.470324707817462e-05, + "loss": 2.4417, + "step": 2139000 + }, + { + "epoch": 10.6, + "learning_rate": 4.470200849174854e-05, + "loss": 2.4279, + "step": 2139500 + }, + { + "epoch": 10.6, + "learning_rate": 4.470077238249531e-05, + "loss": 2.4251, + "step": 2140000 + }, + { + "epoch": 10.6, + "learning_rate": 4.469953379606923e-05, + "loss": 2.4378, + "step": 2140500 + }, + { + "epoch": 10.61, + "learning_rate": 4.469829520964314e-05, + "loss": 2.4518, + "step": 2141000 + }, + { + "epoch": 10.61, + "learning_rate": 4.4697056623217057e-05, + "loss": 2.4638, + "step": 2141500 + }, + { + "epoch": 10.61, + "learning_rate": 4.4695818036790973e-05, + "loss": 2.4405, + "step": 2142000 + }, + { + "epoch": 10.61, + "learning_rate": 4.469457945036489e-05, + "loss": 2.444, + "step": 2142500 + }, + { + "epoch": 10.62, + "learning_rate": 4.469334334111166e-05, + "loss": 2.4427, + "step": 2143000 + }, + { + "epoch": 10.62, + "learning_rate": 4.4692104754685576e-05, + "loss": 2.4335, + "step": 2143500 + }, + { + "epoch": 10.62, + "learning_rate": 4.469086616825949e-05, + "loss": 2.4229, + "step": 2144000 + }, + { + "epoch": 10.62, + "learning_rate": 4.468962758183341e-05, + "loss": 2.4291, + "step": 2144500 + }, + { + "epoch": 10.63, + "learning_rate": 4.468838899540732e-05, + "loss": 2.4256, + "step": 2145000 + }, + { + "epoch": 10.63, + "learning_rate": 4.4687152886154096e-05, + "loss": 2.4089, + "step": 2145500 + }, + { + "epoch": 10.63, + "learning_rate": 4.468591429972801e-05, + "loss": 2.4351, + "step": 2146000 + }, + { + "epoch": 10.63, + "learning_rate": 4.468467571330193e-05, + "loss": 2.4628, + "step": 2146500 + }, + { + "epoch": 10.64, + "learning_rate": 4.468343960404869e-05, + "loss": 2.4254, + "step": 2147000 + }, + { + "epoch": 10.64, + "learning_rate": 4.468220349479546e-05, + "loss": 2.4318, + "step": 2147500 + }, + { + "epoch": 10.64, + "learning_rate": 4.468096490836938e-05, + "loss": 2.4339, + "step": 2148000 + }, + { + "epoch": 10.64, + "learning_rate": 4.4679726321943294e-05, + "loss": 2.4517, + "step": 2148500 + }, + { + "epoch": 10.65, + "learning_rate": 4.467848773551721e-05, + "loss": 2.4626, + "step": 2149000 + }, + { + "epoch": 10.65, + "learning_rate": 4.467725162626398e-05, + "loss": 2.4349, + "step": 2149500 + }, + { + "epoch": 10.65, + "learning_rate": 4.467601551701075e-05, + "loss": 2.4428, + "step": 2150000 + }, + { + "epoch": 10.65, + "learning_rate": 4.4674776930584666e-05, + "loss": 2.4328, + "step": 2150500 + }, + { + "epoch": 10.66, + "learning_rate": 4.467353834415858e-05, + "loss": 2.4354, + "step": 2151000 + }, + { + "epoch": 10.66, + "learning_rate": 4.46722997577325e-05, + "loss": 2.4413, + "step": 2151500 + }, + { + "epoch": 10.66, + "learning_rate": 4.467106117130641e-05, + "loss": 2.4063, + "step": 2152000 + }, + { + "epoch": 10.66, + "learning_rate": 4.466982258488033e-05, + "loss": 2.4428, + "step": 2152500 + }, + { + "epoch": 10.67, + "learning_rate": 4.4668583998454244e-05, + "loss": 2.4312, + "step": 2153000 + }, + { + "epoch": 10.67, + "learning_rate": 4.466734541202816e-05, + "loss": 2.4444, + "step": 2153500 + }, + { + "epoch": 10.67, + "learning_rate": 4.466610682560208e-05, + "loss": 2.4416, + "step": 2154000 + }, + { + "epoch": 10.67, + "learning_rate": 4.4664868239175994e-05, + "loss": 2.4138, + "step": 2154500 + }, + { + "epoch": 10.68, + "learning_rate": 4.466362965274991e-05, + "loss": 2.4313, + "step": 2155000 + }, + { + "epoch": 10.68, + "learning_rate": 4.466239106632383e-05, + "loss": 2.419, + "step": 2155500 + }, + { + "epoch": 10.68, + "learning_rate": 4.4661152479897745e-05, + "loss": 2.4566, + "step": 2156000 + }, + { + "epoch": 10.68, + "learning_rate": 4.4659916370644514e-05, + "loss": 2.4375, + "step": 2156500 + }, + { + "epoch": 10.69, + "learning_rate": 4.465867778421843e-05, + "loss": 2.4304, + "step": 2157000 + }, + { + "epoch": 10.69, + "learning_rate": 4.465743919779235e-05, + "loss": 2.4378, + "step": 2157500 + }, + { + "epoch": 10.69, + "learning_rate": 4.465620061136626e-05, + "loss": 2.4383, + "step": 2158000 + }, + { + "epoch": 10.69, + "learning_rate": 4.4654962024940175e-05, + "loss": 2.4265, + "step": 2158500 + }, + { + "epoch": 10.7, + "learning_rate": 4.4653725915686944e-05, + "loss": 2.4481, + "step": 2159000 + }, + { + "epoch": 10.7, + "learning_rate": 4.465248732926086e-05, + "loss": 2.4098, + "step": 2159500 + }, + { + "epoch": 10.7, + "learning_rate": 4.4651251220007636e-05, + "loss": 2.4331, + "step": 2160000 + }, + { + "epoch": 10.7, + "learning_rate": 4.465001263358155e-05, + "loss": 2.419, + "step": 2160500 + }, + { + "epoch": 10.71, + "learning_rate": 4.464877404715547e-05, + "loss": 2.43, + "step": 2161000 + }, + { + "epoch": 10.71, + "learning_rate": 4.464753546072938e-05, + "loss": 2.4379, + "step": 2161500 + }, + { + "epoch": 10.71, + "learning_rate": 4.46462968743033e-05, + "loss": 2.4677, + "step": 2162000 + }, + { + "epoch": 10.71, + "learning_rate": 4.4645058287877214e-05, + "loss": 2.431, + "step": 2162500 + }, + { + "epoch": 10.72, + "learning_rate": 4.464381970145113e-05, + "loss": 2.4455, + "step": 2163000 + }, + { + "epoch": 10.72, + "learning_rate": 4.464258111502505e-05, + "loss": 2.4502, + "step": 2163500 + }, + { + "epoch": 10.72, + "learning_rate": 4.4641342528598965e-05, + "loss": 2.4302, + "step": 2164000 + }, + { + "epoch": 10.72, + "learning_rate": 4.464010641934573e-05, + "loss": 2.4198, + "step": 2164500 + }, + { + "epoch": 10.73, + "learning_rate": 4.4638867832919644e-05, + "loss": 2.4332, + "step": 2165000 + }, + { + "epoch": 10.73, + "learning_rate": 4.463762924649356e-05, + "loss": 2.4356, + "step": 2165500 + }, + { + "epoch": 10.73, + "learning_rate": 4.463639066006748e-05, + "loss": 2.422, + "step": 2166000 + }, + { + "epoch": 10.73, + "learning_rate": 4.4635152073641395e-05, + "loss": 2.4193, + "step": 2166500 + }, + { + "epoch": 10.74, + "learning_rate": 4.463391348721531e-05, + "loss": 2.4501, + "step": 2167000 + }, + { + "epoch": 10.74, + "learning_rate": 4.463267490078923e-05, + "loss": 2.4621, + "step": 2167500 + }, + { + "epoch": 10.74, + "learning_rate": 4.4631436314363145e-05, + "loss": 2.4338, + "step": 2168000 + }, + { + "epoch": 10.74, + "learning_rate": 4.4630200205109914e-05, + "loss": 2.4167, + "step": 2168500 + }, + { + "epoch": 10.75, + "learning_rate": 4.462896161868383e-05, + "loss": 2.4113, + "step": 2169000 + }, + { + "epoch": 10.75, + "learning_rate": 4.462772303225775e-05, + "loss": 2.4667, + "step": 2169500 + }, + { + "epoch": 10.75, + "learning_rate": 4.462648692300452e-05, + "loss": 2.4525, + "step": 2170000 + }, + { + "epoch": 10.75, + "learning_rate": 4.4625248336578434e-05, + "loss": 2.4503, + "step": 2170500 + }, + { + "epoch": 10.76, + "learning_rate": 4.4624009750152344e-05, + "loss": 2.4539, + "step": 2171000 + }, + { + "epoch": 10.76, + "learning_rate": 4.462277116372626e-05, + "loss": 2.4368, + "step": 2171500 + }, + { + "epoch": 10.76, + "learning_rate": 4.462153257730018e-05, + "loss": 2.4427, + "step": 2172000 + }, + { + "epoch": 10.76, + "learning_rate": 4.4620293990874095e-05, + "loss": 2.4173, + "step": 2172500 + }, + { + "epoch": 10.77, + "learning_rate": 4.461905788162087e-05, + "loss": 2.4316, + "step": 2173000 + }, + { + "epoch": 10.77, + "learning_rate": 4.461781929519479e-05, + "loss": 2.4271, + "step": 2173500 + }, + { + "epoch": 10.77, + "learning_rate": 4.461658318594155e-05, + "loss": 2.4018, + "step": 2174000 + }, + { + "epoch": 10.77, + "learning_rate": 4.4615344599515466e-05, + "loss": 2.427, + "step": 2174500 + }, + { + "epoch": 10.78, + "learning_rate": 4.461410601308938e-05, + "loss": 2.4359, + "step": 2175000 + }, + { + "epoch": 10.78, + "learning_rate": 4.46128674266633e-05, + "loss": 2.4292, + "step": 2175500 + }, + { + "epoch": 10.78, + "learning_rate": 4.461163131741007e-05, + "loss": 2.4656, + "step": 2176000 + }, + { + "epoch": 10.78, + "learning_rate": 4.4610392730983986e-05, + "loss": 2.4482, + "step": 2176500 + }, + { + "epoch": 10.79, + "learning_rate": 4.46091541445579e-05, + "loss": 2.4331, + "step": 2177000 + }, + { + "epoch": 10.79, + "learning_rate": 4.460791555813182e-05, + "loss": 2.4638, + "step": 2177500 + }, + { + "epoch": 10.79, + "learning_rate": 4.4606676971705737e-05, + "loss": 2.4594, + "step": 2178000 + }, + { + "epoch": 10.79, + "learning_rate": 4.4605438385279654e-05, + "loss": 2.4464, + "step": 2178500 + }, + { + "epoch": 10.8, + "learning_rate": 4.460419979885357e-05, + "loss": 2.4558, + "step": 2179000 + }, + { + "epoch": 10.8, + "learning_rate": 4.460296121242749e-05, + "loss": 2.4394, + "step": 2179500 + }, + { + "epoch": 10.8, + "learning_rate": 4.46017226260014e-05, + "loss": 2.4307, + "step": 2180000 + }, + { + "epoch": 10.8, + "learning_rate": 4.4600484039575314e-05, + "loss": 2.4527, + "step": 2180500 + }, + { + "epoch": 10.81, + "learning_rate": 4.459924545314923e-05, + "loss": 2.4379, + "step": 2181000 + }, + { + "epoch": 10.81, + "learning_rate": 4.459800686672315e-05, + "loss": 2.4372, + "step": 2181500 + }, + { + "epoch": 10.81, + "learning_rate": 4.4596768280297065e-05, + "loss": 2.4373, + "step": 2182000 + }, + { + "epoch": 10.81, + "learning_rate": 4.4595532171043834e-05, + "loss": 2.4486, + "step": 2182500 + }, + { + "epoch": 10.82, + "learning_rate": 4.459429358461775e-05, + "loss": 2.4385, + "step": 2183000 + }, + { + "epoch": 10.82, + "learning_rate": 4.459305747536452e-05, + "loss": 2.4328, + "step": 2183500 + }, + { + "epoch": 10.82, + "learning_rate": 4.459181888893844e-05, + "loss": 2.4374, + "step": 2184000 + }, + { + "epoch": 10.82, + "learning_rate": 4.4590580302512354e-05, + "loss": 2.4451, + "step": 2184500 + }, + { + "epoch": 10.83, + "learning_rate": 4.458934171608627e-05, + "loss": 2.4589, + "step": 2185000 + }, + { + "epoch": 10.83, + "learning_rate": 4.458810312966019e-05, + "loss": 2.451, + "step": 2185500 + }, + { + "epoch": 10.83, + "learning_rate": 4.458686702040695e-05, + "loss": 2.418, + "step": 2186000 + }, + { + "epoch": 10.83, + "learning_rate": 4.4585628433980866e-05, + "loss": 2.4478, + "step": 2186500 + }, + { + "epoch": 10.84, + "learning_rate": 4.4584389847554783e-05, + "loss": 2.4271, + "step": 2187000 + }, + { + "epoch": 10.84, + "learning_rate": 4.45831512611287e-05, + "loss": 2.4425, + "step": 2187500 + }, + { + "epoch": 10.84, + "learning_rate": 4.458191267470262e-05, + "loss": 2.452, + "step": 2188000 + }, + { + "epoch": 10.84, + "learning_rate": 4.4580674088276534e-05, + "loss": 2.4415, + "step": 2188500 + }, + { + "epoch": 10.85, + "learning_rate": 4.457943550185045e-05, + "loss": 2.4458, + "step": 2189000 + }, + { + "epoch": 10.85, + "learning_rate": 4.457819691542436e-05, + "loss": 2.4406, + "step": 2189500 + }, + { + "epoch": 10.85, + "learning_rate": 4.457696080617114e-05, + "loss": 2.4222, + "step": 2190000 + }, + { + "epoch": 10.85, + "learning_rate": 4.4575722219745054e-05, + "loss": 2.458, + "step": 2190500 + }, + { + "epoch": 10.85, + "learning_rate": 4.457448363331897e-05, + "loss": 2.4399, + "step": 2191000 + }, + { + "epoch": 10.86, + "learning_rate": 4.457324504689289e-05, + "loss": 2.4346, + "step": 2191500 + }, + { + "epoch": 10.86, + "learning_rate": 4.4572008937639656e-05, + "loss": 2.4664, + "step": 2192000 + }, + { + "epoch": 10.86, + "learning_rate": 4.4570770351213567e-05, + "loss": 2.4396, + "step": 2192500 + }, + { + "epoch": 10.86, + "learning_rate": 4.4569531764787484e-05, + "loss": 2.4306, + "step": 2193000 + }, + { + "epoch": 10.87, + "learning_rate": 4.45682931783614e-05, + "loss": 2.4317, + "step": 2193500 + }, + { + "epoch": 10.87, + "learning_rate": 4.456705459193532e-05, + "loss": 2.4156, + "step": 2194000 + }, + { + "epoch": 10.87, + "learning_rate": 4.4565816005509234e-05, + "loss": 2.444, + "step": 2194500 + }, + { + "epoch": 10.87, + "learning_rate": 4.456457741908315e-05, + "loss": 2.4486, + "step": 2195000 + }, + { + "epoch": 10.88, + "learning_rate": 4.456333883265707e-05, + "loss": 2.4602, + "step": 2195500 + }, + { + "epoch": 10.88, + "learning_rate": 4.456210024623098e-05, + "loss": 2.4304, + "step": 2196000 + }, + { + "epoch": 10.88, + "learning_rate": 4.4560861659804895e-05, + "loss": 2.4429, + "step": 2196500 + }, + { + "epoch": 10.88, + "learning_rate": 4.455962307337881e-05, + "loss": 2.4199, + "step": 2197000 + }, + { + "epoch": 10.89, + "learning_rate": 4.455838448695273e-05, + "loss": 2.4348, + "step": 2197500 + }, + { + "epoch": 10.89, + "learning_rate": 4.4557145900526646e-05, + "loss": 2.4346, + "step": 2198000 + }, + { + "epoch": 10.89, + "learning_rate": 4.455590979127342e-05, + "loss": 2.4507, + "step": 2198500 + }, + { + "epoch": 10.89, + "learning_rate": 4.455467120484733e-05, + "loss": 2.443, + "step": 2199000 + }, + { + "epoch": 10.9, + "learning_rate": 4.455343261842125e-05, + "loss": 2.4219, + "step": 2199500 + }, + { + "epoch": 10.9, + "learning_rate": 4.455219650916802e-05, + "loss": 2.4231, + "step": 2200000 + }, + { + "epoch": 10.9, + "learning_rate": 4.4550957922741934e-05, + "loss": 2.4443, + "step": 2200500 + }, + { + "epoch": 10.9, + "learning_rate": 4.454971933631585e-05, + "loss": 2.4357, + "step": 2201000 + }, + { + "epoch": 10.91, + "learning_rate": 4.454848570423547e-05, + "loss": 2.4494, + "step": 2201500 + }, + { + "epoch": 10.91, + "learning_rate": 4.454724711780939e-05, + "loss": 2.4197, + "step": 2202000 + }, + { + "epoch": 10.91, + "learning_rate": 4.4546008531383306e-05, + "loss": 2.4512, + "step": 2202500 + }, + { + "epoch": 10.91, + "learning_rate": 4.454476994495722e-05, + "loss": 2.445, + "step": 2203000 + }, + { + "epoch": 10.92, + "learning_rate": 4.454353135853114e-05, + "loss": 2.449, + "step": 2203500 + }, + { + "epoch": 10.92, + "learning_rate": 4.45422952492779e-05, + "loss": 2.4383, + "step": 2204000 + }, + { + "epoch": 10.92, + "learning_rate": 4.454105666285182e-05, + "loss": 2.4595, + "step": 2204500 + }, + { + "epoch": 10.92, + "learning_rate": 4.4539818076425736e-05, + "loss": 2.4192, + "step": 2205000 + }, + { + "epoch": 10.93, + "learning_rate": 4.453857948999965e-05, + "loss": 2.4381, + "step": 2205500 + }, + { + "epoch": 10.93, + "learning_rate": 4.453734090357357e-05, + "loss": 2.429, + "step": 2206000 + }, + { + "epoch": 10.93, + "learning_rate": 4.453610479432034e-05, + "loss": 2.4534, + "step": 2206500 + }, + { + "epoch": 10.93, + "learning_rate": 4.4534866207894255e-05, + "loss": 2.4423, + "step": 2207000 + }, + { + "epoch": 10.94, + "learning_rate": 4.453362762146817e-05, + "loss": 2.4272, + "step": 2207500 + }, + { + "epoch": 10.94, + "learning_rate": 4.453239151221494e-05, + "loss": 2.4552, + "step": 2208000 + }, + { + "epoch": 10.94, + "learning_rate": 4.453115292578886e-05, + "loss": 2.4172, + "step": 2208500 + }, + { + "epoch": 10.94, + "learning_rate": 4.4529914339362775e-05, + "loss": 2.4602, + "step": 2209000 + }, + { + "epoch": 10.95, + "learning_rate": 4.4528675752936685e-05, + "loss": 2.4529, + "step": 2209500 + }, + { + "epoch": 10.95, + "learning_rate": 4.45274371665106e-05, + "loss": 2.4541, + "step": 2210000 + }, + { + "epoch": 10.95, + "learning_rate": 4.452619858008452e-05, + "loss": 2.4447, + "step": 2210500 + }, + { + "epoch": 10.95, + "learning_rate": 4.4524959993658436e-05, + "loss": 2.4507, + "step": 2211000 + }, + { + "epoch": 10.96, + "learning_rate": 4.452372140723235e-05, + "loss": 2.4288, + "step": 2211500 + }, + { + "epoch": 10.96, + "learning_rate": 4.452248282080627e-05, + "loss": 2.4157, + "step": 2212000 + }, + { + "epoch": 10.96, + "learning_rate": 4.4521244234380187e-05, + "loss": 2.4446, + "step": 2212500 + }, + { + "epoch": 10.96, + "learning_rate": 4.4520005647954103e-05, + "loss": 2.4222, + "step": 2213000 + }, + { + "epoch": 10.97, + "learning_rate": 4.451876953870087e-05, + "loss": 2.4498, + "step": 2213500 + }, + { + "epoch": 10.97, + "learning_rate": 4.451753095227479e-05, + "loss": 2.4253, + "step": 2214000 + }, + { + "epoch": 10.97, + "learning_rate": 4.4516292365848706e-05, + "loss": 2.4469, + "step": 2214500 + }, + { + "epoch": 10.97, + "learning_rate": 4.451505377942262e-05, + "loss": 2.4336, + "step": 2215000 + }, + { + "epoch": 10.98, + "learning_rate": 4.451381519299654e-05, + "loss": 2.4368, + "step": 2215500 + }, + { + "epoch": 10.98, + "learning_rate": 4.451257908374331e-05, + "loss": 2.4251, + "step": 2216000 + }, + { + "epoch": 10.98, + "learning_rate": 4.451134297449008e-05, + "loss": 2.4311, + "step": 2216500 + }, + { + "epoch": 10.98, + "learning_rate": 4.4510104388063995e-05, + "loss": 2.4489, + "step": 2217000 + }, + { + "epoch": 10.99, + "learning_rate": 4.450886580163791e-05, + "loss": 2.4204, + "step": 2217500 + }, + { + "epoch": 10.99, + "learning_rate": 4.450762721521183e-05, + "loss": 2.4315, + "step": 2218000 + }, + { + "epoch": 10.99, + "learning_rate": 4.450638862878574e-05, + "loss": 2.4432, + "step": 2218500 + }, + { + "epoch": 10.99, + "learning_rate": 4.4505150042359655e-05, + "loss": 2.4581, + "step": 2219000 + }, + { + "epoch": 11.0, + "learning_rate": 4.450391145593357e-05, + "loss": 2.4339, + "step": 2219500 + }, + { + "epoch": 11.0, + "learning_rate": 4.450267286950749e-05, + "loss": 2.4147, + "step": 2220000 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.6435940740781358, + "eval_accuracy_mlm": 0.596974884280312, + "eval_accuracy_nsp": 0.863742797861617, + "eval_loss": 2.406216859817505, + "eval_runtime": 146.12, + "eval_samples_per_second": 1744.86, + "eval_steps_per_second": 72.707, + "step": 2220273 + }, + { + "epoch": 11.0, + "learning_rate": 4.4501434283081406e-05, + "loss": 2.4115, + "step": 2220500 + }, + { + "epoch": 11.0, + "learning_rate": 4.450019569665532e-05, + "loss": 2.3987, + "step": 2221000 + }, + { + "epoch": 11.01, + "learning_rate": 4.449895958740209e-05, + "loss": 2.392, + "step": 2221500 + }, + { + "epoch": 11.01, + "learning_rate": 4.449772100097601e-05, + "loss": 2.3835, + "step": 2222000 + }, + { + "epoch": 11.01, + "learning_rate": 4.4496482414549926e-05, + "loss": 2.4125, + "step": 2222500 + }, + { + "epoch": 11.01, + "learning_rate": 4.4495243828123836e-05, + "loss": 2.4218, + "step": 2223000 + }, + { + "epoch": 11.02, + "learning_rate": 4.449400524169775e-05, + "loss": 2.4222, + "step": 2223500 + }, + { + "epoch": 11.02, + "learning_rate": 4.449276665527167e-05, + "loss": 2.3963, + "step": 2224000 + }, + { + "epoch": 11.02, + "learning_rate": 4.449152806884559e-05, + "loss": 2.4003, + "step": 2224500 + }, + { + "epoch": 11.02, + "learning_rate": 4.4490289482419504e-05, + "loss": 2.4131, + "step": 2225000 + }, + { + "epoch": 11.03, + "learning_rate": 4.448905089599342e-05, + "loss": 2.426, + "step": 2225500 + }, + { + "epoch": 11.03, + "learning_rate": 4.448781230956734e-05, + "loss": 2.4027, + "step": 2226000 + }, + { + "epoch": 11.03, + "learning_rate": 4.4486573723141255e-05, + "loss": 2.4331, + "step": 2226500 + }, + { + "epoch": 11.03, + "learning_rate": 4.448533761388802e-05, + "loss": 2.4009, + "step": 2227000 + }, + { + "epoch": 11.04, + "learning_rate": 4.448409902746194e-05, + "loss": 2.4009, + "step": 2227500 + }, + { + "epoch": 11.04, + "learning_rate": 4.448286044103586e-05, + "loss": 2.4034, + "step": 2228000 + }, + { + "epoch": 11.04, + "learning_rate": 4.4481621854609774e-05, + "loss": 2.4055, + "step": 2228500 + }, + { + "epoch": 11.04, + "learning_rate": 4.448038326818369e-05, + "loss": 2.3944, + "step": 2229000 + }, + { + "epoch": 11.05, + "learning_rate": 4.447914468175761e-05, + "loss": 2.4153, + "step": 2229500 + }, + { + "epoch": 11.05, + "learning_rate": 4.4477906095331525e-05, + "loss": 2.4075, + "step": 2230000 + }, + { + "epoch": 11.05, + "learning_rate": 4.447666750890544e-05, + "loss": 2.4052, + "step": 2230500 + }, + { + "epoch": 11.05, + "learning_rate": 4.447542892247936e-05, + "loss": 2.4021, + "step": 2231000 + }, + { + "epoch": 11.06, + "learning_rate": 4.447419281322612e-05, + "loss": 2.3982, + "step": 2231500 + }, + { + "epoch": 11.06, + "learning_rate": 4.447295422680004e-05, + "loss": 2.4161, + "step": 2232000 + }, + { + "epoch": 11.06, + "learning_rate": 4.4471715640373955e-05, + "loss": 2.4319, + "step": 2232500 + }, + { + "epoch": 11.06, + "learning_rate": 4.447047705394787e-05, + "loss": 2.4021, + "step": 2233000 + }, + { + "epoch": 11.07, + "learning_rate": 4.446923846752179e-05, + "loss": 2.4029, + "step": 2233500 + }, + { + "epoch": 11.07, + "learning_rate": 4.4467999881095705e-05, + "loss": 2.4136, + "step": 2234000 + }, + { + "epoch": 11.07, + "learning_rate": 4.446676129466962e-05, + "loss": 2.4027, + "step": 2234500 + }, + { + "epoch": 11.07, + "learning_rate": 4.446552518541639e-05, + "loss": 2.4079, + "step": 2235000 + }, + { + "epoch": 11.08, + "learning_rate": 4.446428659899031e-05, + "loss": 2.4259, + "step": 2235500 + }, + { + "epoch": 11.08, + "learning_rate": 4.4463048012564225e-05, + "loss": 2.4226, + "step": 2236000 + }, + { + "epoch": 11.08, + "learning_rate": 4.446180942613814e-05, + "loss": 2.4198, + "step": 2236500 + }, + { + "epoch": 11.08, + "learning_rate": 4.446057083971206e-05, + "loss": 2.3826, + "step": 2237000 + }, + { + "epoch": 11.09, + "learning_rate": 4.4459332253285976e-05, + "loss": 2.4163, + "step": 2237500 + }, + { + "epoch": 11.09, + "learning_rate": 4.445809366685989e-05, + "loss": 2.403, + "step": 2238000 + }, + { + "epoch": 11.09, + "learning_rate": 4.445685508043381e-05, + "loss": 2.4073, + "step": 2238500 + }, + { + "epoch": 11.09, + "learning_rate": 4.445561897118057e-05, + "loss": 2.4129, + "step": 2239000 + }, + { + "epoch": 11.1, + "learning_rate": 4.445438038475449e-05, + "loss": 2.4009, + "step": 2239500 + }, + { + "epoch": 11.1, + "learning_rate": 4.4453141798328406e-05, + "loss": 2.4227, + "step": 2240000 + }, + { + "epoch": 11.1, + "learning_rate": 4.445190321190232e-05, + "loss": 2.4006, + "step": 2240500 + }, + { + "epoch": 11.1, + "learning_rate": 4.445066710264909e-05, + "loss": 2.3994, + "step": 2241000 + }, + { + "epoch": 11.11, + "learning_rate": 4.444942851622301e-05, + "loss": 2.4206, + "step": 2241500 + }, + { + "epoch": 11.11, + "learning_rate": 4.4448189929796925e-05, + "loss": 2.4191, + "step": 2242000 + }, + { + "epoch": 11.11, + "learning_rate": 4.4446953820543694e-05, + "loss": 2.3991, + "step": 2242500 + }, + { + "epoch": 11.11, + "learning_rate": 4.444571523411761e-05, + "loss": 2.4203, + "step": 2243000 + }, + { + "epoch": 11.12, + "learning_rate": 4.444447664769152e-05, + "loss": 2.4075, + "step": 2243500 + }, + { + "epoch": 11.12, + "learning_rate": 4.444323806126544e-05, + "loss": 2.4173, + "step": 2244000 + }, + { + "epoch": 11.12, + "learning_rate": 4.4441999474839355e-05, + "loss": 2.4242, + "step": 2244500 + }, + { + "epoch": 11.12, + "learning_rate": 4.444076088841327e-05, + "loss": 2.4012, + "step": 2245000 + }, + { + "epoch": 11.12, + "learning_rate": 4.443952230198719e-05, + "loss": 2.4417, + "step": 2245500 + }, + { + "epoch": 11.13, + "learning_rate": 4.4438283715561106e-05, + "loss": 2.3909, + "step": 2246000 + }, + { + "epoch": 11.13, + "learning_rate": 4.443704512913502e-05, + "loss": 2.4142, + "step": 2246500 + }, + { + "epoch": 11.13, + "learning_rate": 4.443580901988179e-05, + "loss": 2.4145, + "step": 2247000 + }, + { + "epoch": 11.13, + "learning_rate": 4.443457043345571e-05, + "loss": 2.4039, + "step": 2247500 + }, + { + "epoch": 11.14, + "learning_rate": 4.4433331847029625e-05, + "loss": 2.4166, + "step": 2248000 + }, + { + "epoch": 11.14, + "learning_rate": 4.443209326060354e-05, + "loss": 2.4373, + "step": 2248500 + }, + { + "epoch": 11.14, + "learning_rate": 4.443085467417746e-05, + "loss": 2.3937, + "step": 2249000 + }, + { + "epoch": 11.14, + "learning_rate": 4.4429616087751376e-05, + "loss": 2.4121, + "step": 2249500 + }, + { + "epoch": 11.15, + "learning_rate": 4.442837997849814e-05, + "loss": 2.4216, + "step": 2250000 + }, + { + "epoch": 11.15, + "learning_rate": 4.4427141392072055e-05, + "loss": 2.4062, + "step": 2250500 + }, + { + "epoch": 11.15, + "learning_rate": 4.442590280564597e-05, + "loss": 2.3873, + "step": 2251000 + }, + { + "epoch": 11.15, + "learning_rate": 4.442466421921989e-05, + "loss": 2.42, + "step": 2251500 + }, + { + "epoch": 11.16, + "learning_rate": 4.4423425632793806e-05, + "loss": 2.3876, + "step": 2252000 + }, + { + "epoch": 11.16, + "learning_rate": 4.442218704636772e-05, + "loss": 2.4366, + "step": 2252500 + }, + { + "epoch": 11.16, + "learning_rate": 4.442095093711449e-05, + "loss": 2.4273, + "step": 2253000 + }, + { + "epoch": 11.16, + "learning_rate": 4.441971235068841e-05, + "loss": 2.4109, + "step": 2253500 + }, + { + "epoch": 11.17, + "learning_rate": 4.4418473764262325e-05, + "loss": 2.4171, + "step": 2254000 + }, + { + "epoch": 11.17, + "learning_rate": 4.441723517783624e-05, + "loss": 2.4281, + "step": 2254500 + }, + { + "epoch": 11.17, + "learning_rate": 4.441599659141016e-05, + "loss": 2.4044, + "step": 2255000 + }, + { + "epoch": 11.17, + "learning_rate": 4.441476048215693e-05, + "loss": 2.3969, + "step": 2255500 + }, + { + "epoch": 11.18, + "learning_rate": 4.4413521895730845e-05, + "loss": 2.4335, + "step": 2256000 + }, + { + "epoch": 11.18, + "learning_rate": 4.441228330930476e-05, + "loss": 2.4196, + "step": 2256500 + }, + { + "epoch": 11.18, + "learning_rate": 4.441104472287867e-05, + "loss": 2.4018, + "step": 2257000 + }, + { + "epoch": 11.18, + "learning_rate": 4.440980613645259e-05, + "loss": 2.4158, + "step": 2257500 + }, + { + "epoch": 11.19, + "learning_rate": 4.4408567550026506e-05, + "loss": 2.4362, + "step": 2258000 + }, + { + "epoch": 11.19, + "learning_rate": 4.4407331440773275e-05, + "loss": 2.4383, + "step": 2258500 + }, + { + "epoch": 11.19, + "learning_rate": 4.440609285434719e-05, + "loss": 2.4255, + "step": 2259000 + }, + { + "epoch": 11.19, + "learning_rate": 4.440485426792111e-05, + "loss": 2.4434, + "step": 2259500 + }, + { + "epoch": 11.2, + "learning_rate": 4.4403615681495026e-05, + "loss": 2.4004, + "step": 2260000 + }, + { + "epoch": 11.2, + "learning_rate": 4.440237709506894e-05, + "loss": 2.4139, + "step": 2260500 + }, + { + "epoch": 11.2, + "learning_rate": 4.440113850864286e-05, + "loss": 2.4226, + "step": 2261000 + }, + { + "epoch": 11.2, + "learning_rate": 4.439990239938963e-05, + "loss": 2.4301, + "step": 2261500 + }, + { + "epoch": 11.21, + "learning_rate": 4.4398663812963545e-05, + "loss": 2.4222, + "step": 2262000 + }, + { + "epoch": 11.21, + "learning_rate": 4.439742522653746e-05, + "loss": 2.4225, + "step": 2262500 + }, + { + "epoch": 11.21, + "learning_rate": 4.439618664011138e-05, + "loss": 2.4212, + "step": 2263000 + }, + { + "epoch": 11.21, + "learning_rate": 4.439494805368529e-05, + "loss": 2.4104, + "step": 2263500 + }, + { + "epoch": 11.22, + "learning_rate": 4.439371194443206e-05, + "loss": 2.4221, + "step": 2264000 + }, + { + "epoch": 11.22, + "learning_rate": 4.4392473358005975e-05, + "loss": 2.4218, + "step": 2264500 + }, + { + "epoch": 11.22, + "learning_rate": 4.439123477157989e-05, + "loss": 2.4348, + "step": 2265000 + }, + { + "epoch": 11.22, + "learning_rate": 4.438999618515381e-05, + "loss": 2.4062, + "step": 2265500 + }, + { + "epoch": 11.23, + "learning_rate": 4.4388757598727726e-05, + "loss": 2.4061, + "step": 2266000 + }, + { + "epoch": 11.23, + "learning_rate": 4.438751901230164e-05, + "loss": 2.4101, + "step": 2266500 + }, + { + "epoch": 11.23, + "learning_rate": 4.438628290304841e-05, + "loss": 2.4078, + "step": 2267000 + }, + { + "epoch": 11.23, + "learning_rate": 4.438504431662233e-05, + "loss": 2.4107, + "step": 2267500 + }, + { + "epoch": 11.24, + "learning_rate": 4.4383805730196245e-05, + "loss": 2.412, + "step": 2268000 + }, + { + "epoch": 11.24, + "learning_rate": 4.438256714377016e-05, + "loss": 2.4134, + "step": 2268500 + }, + { + "epoch": 11.24, + "learning_rate": 4.4381331034516924e-05, + "loss": 2.4223, + "step": 2269000 + }, + { + "epoch": 11.24, + "learning_rate": 4.438009244809084e-05, + "loss": 2.4328, + "step": 2269500 + }, + { + "epoch": 11.25, + "learning_rate": 4.437885386166476e-05, + "loss": 2.4224, + "step": 2270000 + }, + { + "epoch": 11.25, + "learning_rate": 4.4377615275238675e-05, + "loss": 2.4191, + "step": 2270500 + }, + { + "epoch": 11.25, + "learning_rate": 4.437637916598545e-05, + "loss": 2.4188, + "step": 2271000 + }, + { + "epoch": 11.25, + "learning_rate": 4.437514057955936e-05, + "loss": 2.4173, + "step": 2271500 + }, + { + "epoch": 11.26, + "learning_rate": 4.437390199313328e-05, + "loss": 2.3998, + "step": 2272000 + }, + { + "epoch": 11.26, + "learning_rate": 4.4372663406707195e-05, + "loss": 2.4385, + "step": 2272500 + }, + { + "epoch": 11.26, + "learning_rate": 4.437142482028111e-05, + "loss": 2.3898, + "step": 2273000 + }, + { + "epoch": 11.26, + "learning_rate": 4.437018623385503e-05, + "loss": 2.4457, + "step": 2273500 + }, + { + "epoch": 11.27, + "learning_rate": 4.4368947647428945e-05, + "loss": 2.4537, + "step": 2274000 + }, + { + "epoch": 11.27, + "learning_rate": 4.436770906100286e-05, + "loss": 2.4273, + "step": 2274500 + }, + { + "epoch": 11.27, + "learning_rate": 4.436647047457678e-05, + "loss": 2.4318, + "step": 2275000 + }, + { + "epoch": 11.27, + "learning_rate": 4.4365231888150696e-05, + "loss": 2.4194, + "step": 2275500 + }, + { + "epoch": 11.28, + "learning_rate": 4.436399330172461e-05, + "loss": 2.4104, + "step": 2276000 + }, + { + "epoch": 11.28, + "learning_rate": 4.4362757192471375e-05, + "loss": 2.4375, + "step": 2276500 + }, + { + "epoch": 11.28, + "learning_rate": 4.436152108321815e-05, + "loss": 2.4367, + "step": 2277000 + }, + { + "epoch": 11.28, + "learning_rate": 4.436028249679207e-05, + "loss": 2.4193, + "step": 2277500 + }, + { + "epoch": 11.29, + "learning_rate": 4.435904638753883e-05, + "loss": 2.4272, + "step": 2278000 + }, + { + "epoch": 11.29, + "learning_rate": 4.4357807801112747e-05, + "loss": 2.441, + "step": 2278500 + }, + { + "epoch": 11.29, + "learning_rate": 4.4356569214686664e-05, + "loss": 2.4035, + "step": 2279000 + }, + { + "epoch": 11.29, + "learning_rate": 4.435533062826058e-05, + "loss": 2.4012, + "step": 2279500 + }, + { + "epoch": 11.3, + "learning_rate": 4.43540920418345e-05, + "loss": 2.4267, + "step": 2280000 + }, + { + "epoch": 11.3, + "learning_rate": 4.4352853455408414e-05, + "loss": 2.4075, + "step": 2280500 + }, + { + "epoch": 11.3, + "learning_rate": 4.4351614868982324e-05, + "loss": 2.4112, + "step": 2281000 + }, + { + "epoch": 11.3, + "learning_rate": 4.435037628255624e-05, + "loss": 2.4298, + "step": 2281500 + }, + { + "epoch": 11.31, + "learning_rate": 4.434913769613016e-05, + "loss": 2.4208, + "step": 2282000 + }, + { + "epoch": 11.31, + "learning_rate": 4.4347899109704075e-05, + "loss": 2.4449, + "step": 2282500 + }, + { + "epoch": 11.31, + "learning_rate": 4.434666052327799e-05, + "loss": 2.4218, + "step": 2283000 + }, + { + "epoch": 11.31, + "learning_rate": 4.434542193685191e-05, + "loss": 2.4302, + "step": 2283500 + }, + { + "epoch": 11.32, + "learning_rate": 4.4344183350425826e-05, + "loss": 2.4257, + "step": 2284000 + }, + { + "epoch": 11.32, + "learning_rate": 4.4342947241172595e-05, + "loss": 2.4383, + "step": 2284500 + }, + { + "epoch": 11.32, + "learning_rate": 4.434170865474651e-05, + "loss": 2.3936, + "step": 2285000 + }, + { + "epoch": 11.32, + "learning_rate": 4.434047254549328e-05, + "loss": 2.4184, + "step": 2285500 + }, + { + "epoch": 11.33, + "learning_rate": 4.43392339590672e-05, + "loss": 2.413, + "step": 2286000 + }, + { + "epoch": 11.33, + "learning_rate": 4.4337995372641114e-05, + "loss": 2.4455, + "step": 2286500 + }, + { + "epoch": 11.33, + "learning_rate": 4.4336756786215025e-05, + "loss": 2.4197, + "step": 2287000 + }, + { + "epoch": 11.33, + "learning_rate": 4.433551819978894e-05, + "loss": 2.4045, + "step": 2287500 + }, + { + "epoch": 11.34, + "learning_rate": 4.433427961336286e-05, + "loss": 2.4081, + "step": 2288000 + }, + { + "epoch": 11.34, + "learning_rate": 4.4333041026936775e-05, + "loss": 2.4123, + "step": 2288500 + }, + { + "epoch": 11.34, + "learning_rate": 4.433180244051069e-05, + "loss": 2.4212, + "step": 2289000 + }, + { + "epoch": 11.34, + "learning_rate": 4.433056633125747e-05, + "loss": 2.4364, + "step": 2289500 + }, + { + "epoch": 11.35, + "learning_rate": 4.4329327744831385e-05, + "loss": 2.4244, + "step": 2290000 + }, + { + "epoch": 11.35, + "learning_rate": 4.4328091635578154e-05, + "loss": 2.439, + "step": 2290500 + }, + { + "epoch": 11.35, + "learning_rate": 4.432685304915207e-05, + "loss": 2.4081, + "step": 2291000 + }, + { + "epoch": 11.35, + "learning_rate": 4.432561446272598e-05, + "loss": 2.4084, + "step": 2291500 + }, + { + "epoch": 11.36, + "learning_rate": 4.43243758762999e-05, + "loss": 2.4155, + "step": 2292000 + }, + { + "epoch": 11.36, + "learning_rate": 4.4323137289873815e-05, + "loss": 2.4206, + "step": 2292500 + }, + { + "epoch": 11.36, + "learning_rate": 4.432189870344773e-05, + "loss": 2.4201, + "step": 2293000 + }, + { + "epoch": 11.36, + "learning_rate": 4.43206625941945e-05, + "loss": 2.4092, + "step": 2293500 + }, + { + "epoch": 11.37, + "learning_rate": 4.431942400776842e-05, + "loss": 2.4223, + "step": 2294000 + }, + { + "epoch": 11.37, + "learning_rate": 4.4318185421342334e-05, + "loss": 2.4183, + "step": 2294500 + }, + { + "epoch": 11.37, + "learning_rate": 4.431694683491625e-05, + "loss": 2.424, + "step": 2295000 + }, + { + "epoch": 11.37, + "learning_rate": 4.431570824849017e-05, + "loss": 2.4262, + "step": 2295500 + }, + { + "epoch": 11.38, + "learning_rate": 4.4314469662064085e-05, + "loss": 2.4002, + "step": 2296000 + }, + { + "epoch": 11.38, + "learning_rate": 4.4313231075637995e-05, + "loss": 2.4274, + "step": 2296500 + }, + { + "epoch": 11.38, + "learning_rate": 4.431199496638477e-05, + "loss": 2.4295, + "step": 2297000 + }, + { + "epoch": 11.38, + "learning_rate": 4.431075637995869e-05, + "loss": 2.4161, + "step": 2297500 + }, + { + "epoch": 11.39, + "learning_rate": 4.4309517793532605e-05, + "loss": 2.4199, + "step": 2298000 + }, + { + "epoch": 11.39, + "learning_rate": 4.4308279207106515e-05, + "loss": 2.4241, + "step": 2298500 + }, + { + "epoch": 11.39, + "learning_rate": 4.430704062068043e-05, + "loss": 2.4267, + "step": 2299000 + }, + { + "epoch": 11.39, + "learning_rate": 4.430580203425435e-05, + "loss": 2.4256, + "step": 2299500 + }, + { + "epoch": 11.39, + "learning_rate": 4.4304563447828265e-05, + "loss": 2.4038, + "step": 2300000 + }, + { + "epoch": 11.4, + "learning_rate": 4.4303324861402176e-05, + "loss": 2.419, + "step": 2300500 + }, + { + "epoch": 11.4, + "learning_rate": 4.430208627497609e-05, + "loss": 2.4068, + "step": 2301000 + }, + { + "epoch": 11.4, + "learning_rate": 4.430085016572287e-05, + "loss": 2.4023, + "step": 2301500 + }, + { + "epoch": 11.4, + "learning_rate": 4.4299611579296785e-05, + "loss": 2.4277, + "step": 2302000 + }, + { + "epoch": 11.41, + "learning_rate": 4.42983729928707e-05, + "loss": 2.4221, + "step": 2302500 + }, + { + "epoch": 11.41, + "learning_rate": 4.429713440644461e-05, + "loss": 2.429, + "step": 2303000 + }, + { + "epoch": 11.41, + "learning_rate": 4.429589582001853e-05, + "loss": 2.429, + "step": 2303500 + }, + { + "epoch": 11.41, + "learning_rate": 4.4294657233592446e-05, + "loss": 2.4134, + "step": 2304000 + }, + { + "epoch": 11.42, + "learning_rate": 4.429341864716636e-05, + "loss": 2.4422, + "step": 2304500 + }, + { + "epoch": 11.42, + "learning_rate": 4.429218253791313e-05, + "loss": 2.438, + "step": 2305000 + }, + { + "epoch": 11.42, + "learning_rate": 4.429094395148705e-05, + "loss": 2.4249, + "step": 2305500 + }, + { + "epoch": 11.42, + "learning_rate": 4.428970784223382e-05, + "loss": 2.4145, + "step": 2306000 + }, + { + "epoch": 11.43, + "learning_rate": 4.4288469255807734e-05, + "loss": 2.4202, + "step": 2306500 + }, + { + "epoch": 11.43, + "learning_rate": 4.428723066938165e-05, + "loss": 2.4222, + "step": 2307000 + }, + { + "epoch": 11.43, + "learning_rate": 4.428599208295557e-05, + "loss": 2.4162, + "step": 2307500 + }, + { + "epoch": 11.43, + "learning_rate": 4.428475597370234e-05, + "loss": 2.4277, + "step": 2308000 + }, + { + "epoch": 11.44, + "learning_rate": 4.4283517387276254e-05, + "loss": 2.4503, + "step": 2308500 + }, + { + "epoch": 11.44, + "learning_rate": 4.4282281278023016e-05, + "loss": 2.4174, + "step": 2309000 + }, + { + "epoch": 11.44, + "learning_rate": 4.428104269159693e-05, + "loss": 2.4125, + "step": 2309500 + }, + { + "epoch": 11.44, + "learning_rate": 4.427980410517085e-05, + "loss": 2.4267, + "step": 2310000 + }, + { + "epoch": 11.45, + "learning_rate": 4.427856551874477e-05, + "loss": 2.4294, + "step": 2310500 + }, + { + "epoch": 11.45, + "learning_rate": 4.4277326932318684e-05, + "loss": 2.4163, + "step": 2311000 + }, + { + "epoch": 11.45, + "learning_rate": 4.42760883458926e-05, + "loss": 2.406, + "step": 2311500 + }, + { + "epoch": 11.45, + "learning_rate": 4.427484975946652e-05, + "loss": 2.4035, + "step": 2312000 + }, + { + "epoch": 11.46, + "learning_rate": 4.4273611173040435e-05, + "loss": 2.4194, + "step": 2312500 + }, + { + "epoch": 11.46, + "learning_rate": 4.42723750637872e-05, + "loss": 2.4203, + "step": 2313000 + }, + { + "epoch": 11.46, + "learning_rate": 4.427113647736112e-05, + "loss": 2.4323, + "step": 2313500 + }, + { + "epoch": 11.46, + "learning_rate": 4.426989789093504e-05, + "loss": 2.4078, + "step": 2314000 + }, + { + "epoch": 11.47, + "learning_rate": 4.4268659304508954e-05, + "loss": 2.4321, + "step": 2314500 + }, + { + "epoch": 11.47, + "learning_rate": 4.426742071808287e-05, + "loss": 2.4087, + "step": 2315000 + }, + { + "epoch": 11.47, + "learning_rate": 4.426618213165679e-05, + "loss": 2.4277, + "step": 2315500 + }, + { + "epoch": 11.47, + "learning_rate": 4.4264943545230705e-05, + "loss": 2.41, + "step": 2316000 + }, + { + "epoch": 11.48, + "learning_rate": 4.426370495880462e-05, + "loss": 2.4286, + "step": 2316500 + }, + { + "epoch": 11.48, + "learning_rate": 4.426246637237854e-05, + "loss": 2.4222, + "step": 2317000 + }, + { + "epoch": 11.48, + "learning_rate": 4.4261227785952456e-05, + "loss": 2.4014, + "step": 2317500 + }, + { + "epoch": 11.48, + "learning_rate": 4.425998919952637e-05, + "loss": 2.4204, + "step": 2318000 + }, + { + "epoch": 11.49, + "learning_rate": 4.425875061310028e-05, + "loss": 2.3936, + "step": 2318500 + }, + { + "epoch": 11.49, + "learning_rate": 4.42575120266742e-05, + "loss": 2.4184, + "step": 2319000 + }, + { + "epoch": 11.49, + "learning_rate": 4.425627591742097e-05, + "loss": 2.4393, + "step": 2319500 + }, + { + "epoch": 11.49, + "learning_rate": 4.4255037330994885e-05, + "loss": 2.4238, + "step": 2320000 + }, + { + "epoch": 11.5, + "learning_rate": 4.4253801221741654e-05, + "loss": 2.4157, + "step": 2320500 + }, + { + "epoch": 11.5, + "learning_rate": 4.425256263531557e-05, + "loss": 2.4123, + "step": 2321000 + }, + { + "epoch": 11.5, + "learning_rate": 4.425132404888949e-05, + "loss": 2.4241, + "step": 2321500 + }, + { + "epoch": 11.5, + "learning_rate": 4.425008793963625e-05, + "loss": 2.4004, + "step": 2322000 + }, + { + "epoch": 11.51, + "learning_rate": 4.424884935321017e-05, + "loss": 2.4401, + "step": 2322500 + }, + { + "epoch": 11.51, + "learning_rate": 4.4247610766784084e-05, + "loss": 2.4317, + "step": 2323000 + }, + { + "epoch": 11.51, + "learning_rate": 4.4246372180358e-05, + "loss": 2.4115, + "step": 2323500 + }, + { + "epoch": 11.51, + "learning_rate": 4.424513359393192e-05, + "loss": 2.4012, + "step": 2324000 + }, + { + "epoch": 11.52, + "learning_rate": 4.424389748467869e-05, + "loss": 2.4115, + "step": 2324500 + }, + { + "epoch": 11.52, + "learning_rate": 4.4242658898252604e-05, + "loss": 2.392, + "step": 2325000 + }, + { + "epoch": 11.52, + "learning_rate": 4.424142031182652e-05, + "loss": 2.4307, + "step": 2325500 + }, + { + "epoch": 11.52, + "learning_rate": 4.424018172540044e-05, + "loss": 2.4094, + "step": 2326000 + }, + { + "epoch": 11.53, + "learning_rate": 4.4238943138974354e-05, + "loss": 2.4216, + "step": 2326500 + }, + { + "epoch": 11.53, + "learning_rate": 4.423770455254827e-05, + "loss": 2.4106, + "step": 2327000 + }, + { + "epoch": 11.53, + "learning_rate": 4.423646596612219e-05, + "loss": 2.4049, + "step": 2327500 + }, + { + "epoch": 11.53, + "learning_rate": 4.4235227379696105e-05, + "loss": 2.4113, + "step": 2328000 + }, + { + "epoch": 11.54, + "learning_rate": 4.423398879327002e-05, + "loss": 2.4132, + "step": 2328500 + }, + { + "epoch": 11.54, + "learning_rate": 4.423275020684394e-05, + "loss": 2.4204, + "step": 2329000 + }, + { + "epoch": 11.54, + "learning_rate": 4.42315140975907e-05, + "loss": 2.4299, + "step": 2329500 + }, + { + "epoch": 11.54, + "learning_rate": 4.423027551116462e-05, + "loss": 2.419, + "step": 2330000 + }, + { + "epoch": 11.55, + "learning_rate": 4.4229036924738535e-05, + "loss": 2.4446, + "step": 2330500 + }, + { + "epoch": 11.55, + "learning_rate": 4.422779833831245e-05, + "loss": 2.4427, + "step": 2331000 + }, + { + "epoch": 11.55, + "learning_rate": 4.422655975188637e-05, + "loss": 2.4376, + "step": 2331500 + }, + { + "epoch": 11.55, + "learning_rate": 4.4225321165460286e-05, + "loss": 2.4157, + "step": 2332000 + }, + { + "epoch": 11.56, + "learning_rate": 4.42240825790342e-05, + "loss": 2.4434, + "step": 2332500 + }, + { + "epoch": 11.56, + "learning_rate": 4.422284399260812e-05, + "loss": 2.4163, + "step": 2333000 + }, + { + "epoch": 11.56, + "learning_rate": 4.4221605406182036e-05, + "loss": 2.4218, + "step": 2333500 + }, + { + "epoch": 11.56, + "learning_rate": 4.4220369296928805e-05, + "loss": 2.4099, + "step": 2334000 + }, + { + "epoch": 11.57, + "learning_rate": 4.421913071050272e-05, + "loss": 2.4475, + "step": 2334500 + }, + { + "epoch": 11.57, + "learning_rate": 4.421789460124949e-05, + "loss": 2.4169, + "step": 2335000 + }, + { + "epoch": 11.57, + "learning_rate": 4.42166560148234e-05, + "loss": 2.4357, + "step": 2335500 + }, + { + "epoch": 11.57, + "learning_rate": 4.421541742839732e-05, + "loss": 2.4186, + "step": 2336000 + }, + { + "epoch": 11.58, + "learning_rate": 4.4214178841971235e-05, + "loss": 2.438, + "step": 2336500 + }, + { + "epoch": 11.58, + "learning_rate": 4.421294025554515e-05, + "loss": 2.4189, + "step": 2337000 + }, + { + "epoch": 11.58, + "learning_rate": 4.421170414629192e-05, + "loss": 2.4401, + "step": 2337500 + }, + { + "epoch": 11.58, + "learning_rate": 4.421046555986584e-05, + "loss": 2.4355, + "step": 2338000 + }, + { + "epoch": 11.59, + "learning_rate": 4.4209226973439755e-05, + "loss": 2.4278, + "step": 2338500 + }, + { + "epoch": 11.59, + "learning_rate": 4.420798838701367e-05, + "loss": 2.4345, + "step": 2339000 + }, + { + "epoch": 11.59, + "learning_rate": 4.420674980058759e-05, + "loss": 2.4363, + "step": 2339500 + }, + { + "epoch": 11.59, + "learning_rate": 4.4205511214161505e-05, + "loss": 2.4035, + "step": 2340000 + }, + { + "epoch": 11.6, + "learning_rate": 4.4204275104908274e-05, + "loss": 2.4417, + "step": 2340500 + }, + { + "epoch": 11.6, + "learning_rate": 4.420303651848219e-05, + "loss": 2.4219, + "step": 2341000 + }, + { + "epoch": 11.6, + "learning_rate": 4.420179793205611e-05, + "loss": 2.4465, + "step": 2341500 + }, + { + "epoch": 11.6, + "learning_rate": 4.420056182280287e-05, + "loss": 2.4282, + "step": 2342000 + }, + { + "epoch": 11.61, + "learning_rate": 4.419932323637679e-05, + "loss": 2.4328, + "step": 2342500 + }, + { + "epoch": 11.61, + "learning_rate": 4.4198084649950704e-05, + "loss": 2.4098, + "step": 2343000 + }, + { + "epoch": 11.61, + "learning_rate": 4.419684606352462e-05, + "loss": 2.4214, + "step": 2343500 + }, + { + "epoch": 11.61, + "learning_rate": 4.419560747709854e-05, + "loss": 2.442, + "step": 2344000 + }, + { + "epoch": 11.62, + "learning_rate": 4.4194368890672455e-05, + "loss": 2.4259, + "step": 2344500 + }, + { + "epoch": 11.62, + "learning_rate": 4.419313030424637e-05, + "loss": 2.4322, + "step": 2345000 + }, + { + "epoch": 11.62, + "learning_rate": 4.419189171782029e-05, + "loss": 2.4194, + "step": 2345500 + }, + { + "epoch": 11.62, + "learning_rate": 4.4190653131394206e-05, + "loss": 2.4452, + "step": 2346000 + }, + { + "epoch": 11.63, + "learning_rate": 4.418941454496812e-05, + "loss": 2.4208, + "step": 2346500 + }, + { + "epoch": 11.63, + "learning_rate": 4.418817595854204e-05, + "loss": 2.4285, + "step": 2347000 + }, + { + "epoch": 11.63, + "learning_rate": 4.418693984928881e-05, + "loss": 2.4065, + "step": 2347500 + }, + { + "epoch": 11.63, + "learning_rate": 4.418570374003557e-05, + "loss": 2.42, + "step": 2348000 + }, + { + "epoch": 11.64, + "learning_rate": 4.418446515360949e-05, + "loss": 2.4344, + "step": 2348500 + }, + { + "epoch": 11.64, + "learning_rate": 4.4183226567183404e-05, + "loss": 2.4254, + "step": 2349000 + }, + { + "epoch": 11.64, + "learning_rate": 4.418198798075732e-05, + "loss": 2.4336, + "step": 2349500 + }, + { + "epoch": 11.64, + "learning_rate": 4.418074939433124e-05, + "loss": 2.4124, + "step": 2350000 + }, + { + "epoch": 11.65, + "learning_rate": 4.4179510807905155e-05, + "loss": 2.4268, + "step": 2350500 + }, + { + "epoch": 11.65, + "learning_rate": 4.417827222147907e-05, + "loss": 2.4369, + "step": 2351000 + }, + { + "epoch": 11.65, + "learning_rate": 4.417703363505299e-05, + "loss": 2.4431, + "step": 2351500 + }, + { + "epoch": 11.65, + "learning_rate": 4.4175795048626906e-05, + "loss": 2.4248, + "step": 2352000 + }, + { + "epoch": 11.66, + "learning_rate": 4.417455646220082e-05, + "loss": 2.3927, + "step": 2352500 + }, + { + "epoch": 11.66, + "learning_rate": 4.417331787577474e-05, + "loss": 2.4374, + "step": 2353000 + }, + { + "epoch": 11.66, + "learning_rate": 4.4172079289348656e-05, + "loss": 2.4195, + "step": 2353500 + }, + { + "epoch": 11.66, + "learning_rate": 4.417084070292257e-05, + "loss": 2.4295, + "step": 2354000 + }, + { + "epoch": 11.67, + "learning_rate": 4.416960459366934e-05, + "loss": 2.4114, + "step": 2354500 + }, + { + "epoch": 11.67, + "learning_rate": 4.416836600724326e-05, + "loss": 2.4245, + "step": 2355000 + }, + { + "epoch": 11.67, + "learning_rate": 4.416712742081717e-05, + "loss": 2.4333, + "step": 2355500 + }, + { + "epoch": 11.67, + "learning_rate": 4.416589131156394e-05, + "loss": 2.4191, + "step": 2356000 + }, + { + "epoch": 11.67, + "learning_rate": 4.4164652725137855e-05, + "loss": 2.3961, + "step": 2356500 + }, + { + "epoch": 11.68, + "learning_rate": 4.4163416615884624e-05, + "loss": 2.4087, + "step": 2357000 + }, + { + "epoch": 11.68, + "learning_rate": 4.416217802945854e-05, + "loss": 2.4227, + "step": 2357500 + }, + { + "epoch": 11.68, + "learning_rate": 4.416093944303246e-05, + "loss": 2.4177, + "step": 2358000 + }, + { + "epoch": 11.68, + "learning_rate": 4.4159700856606375e-05, + "loss": 2.4067, + "step": 2358500 + }, + { + "epoch": 11.69, + "learning_rate": 4.415846227018029e-05, + "loss": 2.4215, + "step": 2359000 + }, + { + "epoch": 11.69, + "learning_rate": 4.4157226160927054e-05, + "loss": 2.4338, + "step": 2359500 + }, + { + "epoch": 11.69, + "learning_rate": 4.415598757450097e-05, + "loss": 2.4406, + "step": 2360000 + }, + { + "epoch": 11.69, + "learning_rate": 4.415474898807489e-05, + "loss": 2.4358, + "step": 2360500 + }, + { + "epoch": 11.7, + "learning_rate": 4.4153510401648804e-05, + "loss": 2.4452, + "step": 2361000 + }, + { + "epoch": 11.7, + "learning_rate": 4.415227181522272e-05, + "loss": 2.4173, + "step": 2361500 + }, + { + "epoch": 11.7, + "learning_rate": 4.415103322879664e-05, + "loss": 2.4111, + "step": 2362000 + }, + { + "epoch": 11.7, + "learning_rate": 4.4149794642370555e-05, + "loss": 2.4227, + "step": 2362500 + }, + { + "epoch": 11.71, + "learning_rate": 4.414855605594447e-05, + "loss": 2.4011, + "step": 2363000 + }, + { + "epoch": 11.71, + "learning_rate": 4.414731994669124e-05, + "loss": 2.4455, + "step": 2363500 + }, + { + "epoch": 11.71, + "learning_rate": 4.414608136026516e-05, + "loss": 2.4459, + "step": 2364000 + }, + { + "epoch": 11.71, + "learning_rate": 4.4144842773839075e-05, + "loss": 2.4196, + "step": 2364500 + }, + { + "epoch": 11.72, + "learning_rate": 4.414360418741299e-05, + "loss": 2.4346, + "step": 2365000 + }, + { + "epoch": 11.72, + "learning_rate": 4.414236560098691e-05, + "loss": 2.4307, + "step": 2365500 + }, + { + "epoch": 11.72, + "learning_rate": 4.414113196890653e-05, + "loss": 2.4386, + "step": 2366000 + }, + { + "epoch": 11.72, + "learning_rate": 4.4139893382480446e-05, + "loss": 2.4333, + "step": 2366500 + }, + { + "epoch": 11.73, + "learning_rate": 4.413865479605436e-05, + "loss": 2.4199, + "step": 2367000 + }, + { + "epoch": 11.73, + "learning_rate": 4.413741620962828e-05, + "loss": 2.4048, + "step": 2367500 + }, + { + "epoch": 11.73, + "learning_rate": 4.41361776232022e-05, + "loss": 2.4399, + "step": 2368000 + }, + { + "epoch": 11.73, + "learning_rate": 4.4134939036776114e-05, + "loss": 2.4399, + "step": 2368500 + }, + { + "epoch": 11.74, + "learning_rate": 4.4133700450350024e-05, + "loss": 2.4265, + "step": 2369000 + }, + { + "epoch": 11.74, + "learning_rate": 4.413246186392394e-05, + "loss": 2.4374, + "step": 2369500 + }, + { + "epoch": 11.74, + "learning_rate": 4.413122327749786e-05, + "loss": 2.4099, + "step": 2370000 + }, + { + "epoch": 11.74, + "learning_rate": 4.4129984691071775e-05, + "loss": 2.3965, + "step": 2370500 + }, + { + "epoch": 11.75, + "learning_rate": 4.4128748581818544e-05, + "loss": 2.4322, + "step": 2371000 + }, + { + "epoch": 11.75, + "learning_rate": 4.412750999539246e-05, + "loss": 2.4613, + "step": 2371500 + }, + { + "epoch": 11.75, + "learning_rate": 4.412627140896638e-05, + "loss": 2.4158, + "step": 2372000 + }, + { + "epoch": 11.75, + "learning_rate": 4.412503282254029e-05, + "loss": 2.4332, + "step": 2372500 + }, + { + "epoch": 11.76, + "learning_rate": 4.4123794236114205e-05, + "loss": 2.4335, + "step": 2373000 + }, + { + "epoch": 11.76, + "learning_rate": 4.412255812686098e-05, + "loss": 2.4264, + "step": 2373500 + }, + { + "epoch": 11.76, + "learning_rate": 4.41213195404349e-05, + "loss": 2.3871, + "step": 2374000 + }, + { + "epoch": 11.76, + "learning_rate": 4.4120080954008814e-05, + "loss": 2.4349, + "step": 2374500 + }, + { + "epoch": 11.77, + "learning_rate": 4.411884236758273e-05, + "loss": 2.4322, + "step": 2375000 + }, + { + "epoch": 11.77, + "learning_rate": 4.411760378115664e-05, + "loss": 2.4205, + "step": 2375500 + }, + { + "epoch": 11.77, + "learning_rate": 4.411637014907626e-05, + "loss": 2.427, + "step": 2376000 + }, + { + "epoch": 11.77, + "learning_rate": 4.411513403982303e-05, + "loss": 2.4348, + "step": 2376500 + }, + { + "epoch": 11.78, + "learning_rate": 4.411389545339695e-05, + "loss": 2.4465, + "step": 2377000 + }, + { + "epoch": 11.78, + "learning_rate": 4.4112656866970864e-05, + "loss": 2.4235, + "step": 2377500 + }, + { + "epoch": 11.78, + "learning_rate": 4.411141828054478e-05, + "loss": 2.4328, + "step": 2378000 + }, + { + "epoch": 11.78, + "learning_rate": 4.41101796941187e-05, + "loss": 2.3982, + "step": 2378500 + }, + { + "epoch": 11.79, + "learning_rate": 4.4108941107692615e-05, + "loss": 2.4139, + "step": 2379000 + }, + { + "epoch": 11.79, + "learning_rate": 4.410770252126653e-05, + "loss": 2.4416, + "step": 2379500 + }, + { + "epoch": 11.79, + "learning_rate": 4.410646393484045e-05, + "loss": 2.4149, + "step": 2380000 + }, + { + "epoch": 11.79, + "learning_rate": 4.4105225348414366e-05, + "loss": 2.4093, + "step": 2380500 + }, + { + "epoch": 11.8, + "learning_rate": 4.410398676198828e-05, + "loss": 2.4322, + "step": 2381000 + }, + { + "epoch": 11.8, + "learning_rate": 4.41027481755622e-05, + "loss": 2.4391, + "step": 2381500 + }, + { + "epoch": 11.8, + "learning_rate": 4.410150958913612e-05, + "loss": 2.4264, + "step": 2382000 + }, + { + "epoch": 11.8, + "learning_rate": 4.4100271002710034e-05, + "loss": 2.4302, + "step": 2382500 + }, + { + "epoch": 11.81, + "learning_rate": 4.409903241628395e-05, + "loss": 2.4337, + "step": 2383000 + }, + { + "epoch": 11.81, + "learning_rate": 4.409779630703071e-05, + "loss": 2.404, + "step": 2383500 + }, + { + "epoch": 11.81, + "learning_rate": 4.409655772060463e-05, + "loss": 2.4127, + "step": 2384000 + }, + { + "epoch": 11.81, + "learning_rate": 4.4095319134178547e-05, + "loss": 2.4261, + "step": 2384500 + }, + { + "epoch": 11.82, + "learning_rate": 4.4094080547752463e-05, + "loss": 2.4263, + "step": 2385000 + }, + { + "epoch": 11.82, + "learning_rate": 4.409284196132638e-05, + "loss": 2.4344, + "step": 2385500 + }, + { + "epoch": 11.82, + "learning_rate": 4.4091608329246e-05, + "loss": 2.4243, + "step": 2386000 + }, + { + "epoch": 11.82, + "learning_rate": 4.409036974281992e-05, + "loss": 2.4161, + "step": 2386500 + }, + { + "epoch": 11.83, + "learning_rate": 4.408913115639383e-05, + "loss": 2.4354, + "step": 2387000 + }, + { + "epoch": 11.83, + "learning_rate": 4.4087895047140604e-05, + "loss": 2.398, + "step": 2387500 + }, + { + "epoch": 11.83, + "learning_rate": 4.408665646071452e-05, + "loss": 2.4091, + "step": 2388000 + }, + { + "epoch": 11.83, + "learning_rate": 4.408541787428843e-05, + "loss": 2.4046, + "step": 2388500 + }, + { + "epoch": 11.84, + "learning_rate": 4.408417928786235e-05, + "loss": 2.4116, + "step": 2389000 + }, + { + "epoch": 11.84, + "learning_rate": 4.4082940701436265e-05, + "loss": 2.4345, + "step": 2389500 + }, + { + "epoch": 11.84, + "learning_rate": 4.408170211501018e-05, + "loss": 2.4339, + "step": 2390000 + }, + { + "epoch": 11.84, + "learning_rate": 4.40804635285841e-05, + "loss": 2.4014, + "step": 2390500 + }, + { + "epoch": 11.85, + "learning_rate": 4.4079224942158015e-05, + "loss": 2.4364, + "step": 2391000 + }, + { + "epoch": 11.85, + "learning_rate": 4.407798635573193e-05, + "loss": 2.4408, + "step": 2391500 + }, + { + "epoch": 11.85, + "learning_rate": 4.407674776930585e-05, + "loss": 2.4218, + "step": 2392000 + }, + { + "epoch": 11.85, + "learning_rate": 4.4075509182879766e-05, + "loss": 2.4203, + "step": 2392500 + }, + { + "epoch": 11.86, + "learning_rate": 4.407427059645368e-05, + "loss": 2.4449, + "step": 2393000 + }, + { + "epoch": 11.86, + "learning_rate": 4.40730320100276e-05, + "loss": 2.4359, + "step": 2393500 + }, + { + "epoch": 11.86, + "learning_rate": 4.407179342360152e-05, + "loss": 2.4611, + "step": 2394000 + }, + { + "epoch": 11.86, + "learning_rate": 4.4070554837175434e-05, + "loss": 2.4342, + "step": 2394500 + }, + { + "epoch": 11.87, + "learning_rate": 4.406931625074935e-05, + "loss": 2.4223, + "step": 2395000 + }, + { + "epoch": 11.87, + "learning_rate": 4.406807766432327e-05, + "loss": 2.4426, + "step": 2395500 + }, + { + "epoch": 11.87, + "learning_rate": 4.4066839077897185e-05, + "loss": 2.4305, + "step": 2396000 + }, + { + "epoch": 11.87, + "learning_rate": 4.406560296864395e-05, + "loss": 2.4321, + "step": 2396500 + }, + { + "epoch": 11.88, + "learning_rate": 4.4064366859390716e-05, + "loss": 2.4089, + "step": 2397000 + }, + { + "epoch": 11.88, + "learning_rate": 4.4063133227310336e-05, + "loss": 2.414, + "step": 2397500 + }, + { + "epoch": 11.88, + "learning_rate": 4.406189464088425e-05, + "loss": 2.4312, + "step": 2398000 + }, + { + "epoch": 11.88, + "learning_rate": 4.406065605445817e-05, + "loss": 2.405, + "step": 2398500 + }, + { + "epoch": 11.89, + "learning_rate": 4.405941746803209e-05, + "loss": 2.4159, + "step": 2399000 + }, + { + "epoch": 11.89, + "learning_rate": 4.4058178881606004e-05, + "loss": 2.417, + "step": 2399500 + }, + { + "epoch": 11.89, + "learning_rate": 4.405694277235277e-05, + "loss": 2.3933, + "step": 2400000 + }, + { + "epoch": 11.89, + "learning_rate": 4.405570418592669e-05, + "loss": 2.4296, + "step": 2400500 + }, + { + "epoch": 11.9, + "learning_rate": 4.405446559950061e-05, + "loss": 2.4389, + "step": 2401000 + }, + { + "epoch": 11.9, + "learning_rate": 4.4053227013074524e-05, + "loss": 2.4468, + "step": 2401500 + }, + { + "epoch": 11.9, + "learning_rate": 4.405198842664844e-05, + "loss": 2.4335, + "step": 2402000 + }, + { + "epoch": 11.9, + "learning_rate": 4.405074984022236e-05, + "loss": 2.4179, + "step": 2402500 + }, + { + "epoch": 11.91, + "learning_rate": 4.4049511253796274e-05, + "loss": 2.4189, + "step": 2403000 + }, + { + "epoch": 11.91, + "learning_rate": 4.404827266737019e-05, + "loss": 2.4311, + "step": 2403500 + }, + { + "epoch": 11.91, + "learning_rate": 4.40470340809441e-05, + "loss": 2.4236, + "step": 2404000 + }, + { + "epoch": 11.91, + "learning_rate": 4.404579549451802e-05, + "loss": 2.4249, + "step": 2404500 + }, + { + "epoch": 11.92, + "learning_rate": 4.4044556908091935e-05, + "loss": 2.4524, + "step": 2405000 + }, + { + "epoch": 11.92, + "learning_rate": 4.404331832166585e-05, + "loss": 2.4291, + "step": 2405500 + }, + { + "epoch": 11.92, + "learning_rate": 4.404207973523977e-05, + "loss": 2.4173, + "step": 2406000 + }, + { + "epoch": 11.92, + "learning_rate": 4.4040841148813686e-05, + "loss": 2.448, + "step": 2406500 + }, + { + "epoch": 11.93, + "learning_rate": 4.4039602562387596e-05, + "loss": 2.4346, + "step": 2407000 + }, + { + "epoch": 11.93, + "learning_rate": 4.403836397596151e-05, + "loss": 2.4152, + "step": 2407500 + }, + { + "epoch": 11.93, + "learning_rate": 4.403712538953543e-05, + "loss": 2.4002, + "step": 2408000 + }, + { + "epoch": 11.93, + "learning_rate": 4.40358892802822e-05, + "loss": 2.419, + "step": 2408500 + }, + { + "epoch": 11.94, + "learning_rate": 4.4034653171028975e-05, + "loss": 2.4215, + "step": 2409000 + }, + { + "epoch": 11.94, + "learning_rate": 4.403341458460289e-05, + "loss": 2.4047, + "step": 2409500 + }, + { + "epoch": 11.94, + "learning_rate": 4.4032178475349653e-05, + "loss": 2.4267, + "step": 2410000 + }, + { + "epoch": 11.94, + "learning_rate": 4.403093988892357e-05, + "loss": 2.4454, + "step": 2410500 + }, + { + "epoch": 11.94, + "learning_rate": 4.402970130249749e-05, + "loss": 2.3954, + "step": 2411000 + }, + { + "epoch": 11.95, + "learning_rate": 4.4028462716071404e-05, + "loss": 2.4412, + "step": 2411500 + }, + { + "epoch": 11.95, + "learning_rate": 4.402722412964532e-05, + "loss": 2.4209, + "step": 2412000 + }, + { + "epoch": 11.95, + "learning_rate": 4.402598554321924e-05, + "loss": 2.4415, + "step": 2412500 + }, + { + "epoch": 11.95, + "learning_rate": 4.4024746956793155e-05, + "loss": 2.408, + "step": 2413000 + }, + { + "epoch": 11.96, + "learning_rate": 4.4023508370367065e-05, + "loss": 2.4469, + "step": 2413500 + }, + { + "epoch": 11.96, + "learning_rate": 4.402226978394098e-05, + "loss": 2.4386, + "step": 2414000 + }, + { + "epoch": 11.96, + "learning_rate": 4.402103367468776e-05, + "loss": 2.4394, + "step": 2414500 + }, + { + "epoch": 11.96, + "learning_rate": 4.401979756543452e-05, + "loss": 2.426, + "step": 2415000 + }, + { + "epoch": 11.97, + "learning_rate": 4.401855897900844e-05, + "loss": 2.4471, + "step": 2415500 + }, + { + "epoch": 11.97, + "learning_rate": 4.4017320392582354e-05, + "loss": 2.4293, + "step": 2416000 + }, + { + "epoch": 11.97, + "learning_rate": 4.401608180615627e-05, + "loss": 2.4297, + "step": 2416500 + }, + { + "epoch": 11.97, + "learning_rate": 4.401484321973019e-05, + "loss": 2.4321, + "step": 2417000 + }, + { + "epoch": 11.98, + "learning_rate": 4.4013604633304104e-05, + "loss": 2.4261, + "step": 2417500 + }, + { + "epoch": 11.98, + "learning_rate": 4.401236604687802e-05, + "loss": 2.4338, + "step": 2418000 + }, + { + "epoch": 11.98, + "learning_rate": 4.401112746045194e-05, + "loss": 2.4304, + "step": 2418500 + }, + { + "epoch": 11.98, + "learning_rate": 4.400989135119871e-05, + "loss": 2.3984, + "step": 2419000 + }, + { + "epoch": 11.99, + "learning_rate": 4.4008652764772624e-05, + "loss": 2.437, + "step": 2419500 + }, + { + "epoch": 11.99, + "learning_rate": 4.400741665551939e-05, + "loss": 2.4412, + "step": 2420000 + }, + { + "epoch": 11.99, + "learning_rate": 4.400617806909331e-05, + "loss": 2.4605, + "step": 2420500 + }, + { + "epoch": 11.99, + "learning_rate": 4.400493948266723e-05, + "loss": 2.4154, + "step": 2421000 + }, + { + "epoch": 12.0, + "learning_rate": 4.400370089624114e-05, + "loss": 2.4555, + "step": 2421500 + }, + { + "epoch": 12.0, + "learning_rate": 4.4002462309815054e-05, + "loss": 2.4047, + "step": 2422000 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.6447791117813911, + "eval_accuracy_mlm": 0.5983567225929853, + "eval_accuracy_nsp": 0.8637231868653391, + "eval_loss": 2.398054361343384, + "eval_runtime": 146.0821, + "eval_samples_per_second": 1745.314, + "eval_steps_per_second": 72.726, + "step": 2422116 + }, + { + "epoch": 12.0, + "learning_rate": 4.400122372338897e-05, + "loss": 2.3898, + "step": 2422500 + }, + { + "epoch": 12.0, + "learning_rate": 4.399998513696289e-05, + "loss": 2.4349, + "step": 2423000 + }, + { + "epoch": 12.01, + "learning_rate": 4.3998746550536804e-05, + "loss": 2.3875, + "step": 2423500 + }, + { + "epoch": 12.01, + "learning_rate": 4.399750796411072e-05, + "loss": 2.3812, + "step": 2424000 + }, + { + "epoch": 12.01, + "learning_rate": 4.399626937768464e-05, + "loss": 2.3915, + "step": 2424500 + }, + { + "epoch": 12.01, + "learning_rate": 4.3995030791258555e-05, + "loss": 2.4071, + "step": 2425000 + }, + { + "epoch": 12.02, + "learning_rate": 4.399379220483247e-05, + "loss": 2.3903, + "step": 2425500 + }, + { + "epoch": 12.02, + "learning_rate": 4.399255609557924e-05, + "loss": 2.3863, + "step": 2426000 + }, + { + "epoch": 12.02, + "learning_rate": 4.399131998632601e-05, + "loss": 2.3889, + "step": 2426500 + }, + { + "epoch": 12.02, + "learning_rate": 4.399008139989993e-05, + "loss": 2.3892, + "step": 2427000 + }, + { + "epoch": 12.03, + "learning_rate": 4.3988842813473844e-05, + "loss": 2.4049, + "step": 2427500 + }, + { + "epoch": 12.03, + "learning_rate": 4.398760422704776e-05, + "loss": 2.3953, + "step": 2428000 + }, + { + "epoch": 12.03, + "learning_rate": 4.398636564062167e-05, + "loss": 2.3994, + "step": 2428500 + }, + { + "epoch": 12.03, + "learning_rate": 4.398512705419559e-05, + "loss": 2.3911, + "step": 2429000 + }, + { + "epoch": 12.04, + "learning_rate": 4.3983888467769505e-05, + "loss": 2.4036, + "step": 2429500 + }, + { + "epoch": 12.04, + "learning_rate": 4.3982652358516273e-05, + "loss": 2.3935, + "step": 2430000 + }, + { + "epoch": 12.04, + "learning_rate": 4.398141624926304e-05, + "loss": 2.4119, + "step": 2430500 + }, + { + "epoch": 12.04, + "learning_rate": 4.398018014000981e-05, + "loss": 2.4128, + "step": 2431000 + }, + { + "epoch": 12.05, + "learning_rate": 4.397894155358373e-05, + "loss": 2.3834, + "step": 2431500 + }, + { + "epoch": 12.05, + "learning_rate": 4.3977702967157645e-05, + "loss": 2.402, + "step": 2432000 + }, + { + "epoch": 12.05, + "learning_rate": 4.397646438073156e-05, + "loss": 2.3786, + "step": 2432500 + }, + { + "epoch": 12.05, + "learning_rate": 4.397522579430547e-05, + "loss": 2.3985, + "step": 2433000 + }, + { + "epoch": 12.06, + "learning_rate": 4.397398720787939e-05, + "loss": 2.3975, + "step": 2433500 + }, + { + "epoch": 12.06, + "learning_rate": 4.3972748621453306e-05, + "loss": 2.395, + "step": 2434000 + }, + { + "epoch": 12.06, + "learning_rate": 4.397151251220008e-05, + "loss": 2.3914, + "step": 2434500 + }, + { + "epoch": 12.06, + "learning_rate": 4.397027640294685e-05, + "loss": 2.4154, + "step": 2435000 + }, + { + "epoch": 12.07, + "learning_rate": 4.396903781652077e-05, + "loss": 2.4228, + "step": 2435500 + }, + { + "epoch": 12.07, + "learning_rate": 4.396779923009468e-05, + "loss": 2.4234, + "step": 2436000 + }, + { + "epoch": 12.07, + "learning_rate": 4.3966560643668594e-05, + "loss": 2.4103, + "step": 2436500 + }, + { + "epoch": 12.07, + "learning_rate": 4.396532205724251e-05, + "loss": 2.4163, + "step": 2437000 + }, + { + "epoch": 12.08, + "learning_rate": 4.396408347081643e-05, + "loss": 2.408, + "step": 2437500 + }, + { + "epoch": 12.08, + "learning_rate": 4.3962844884390345e-05, + "loss": 2.3927, + "step": 2438000 + }, + { + "epoch": 12.08, + "learning_rate": 4.396160629796426e-05, + "loss": 2.3937, + "step": 2438500 + }, + { + "epoch": 12.08, + "learning_rate": 4.396036771153817e-05, + "loss": 2.3874, + "step": 2439000 + }, + { + "epoch": 12.09, + "learning_rate": 4.395912912511209e-05, + "loss": 2.4065, + "step": 2439500 + }, + { + "epoch": 12.09, + "learning_rate": 4.3957890538686006e-05, + "loss": 2.3979, + "step": 2440000 + }, + { + "epoch": 12.09, + "learning_rate": 4.395665442943278e-05, + "loss": 2.3901, + "step": 2440500 + }, + { + "epoch": 12.09, + "learning_rate": 4.39554158430067e-05, + "loss": 2.4009, + "step": 2441000 + }, + { + "epoch": 12.1, + "learning_rate": 4.3954177256580615e-05, + "loss": 2.396, + "step": 2441500 + }, + { + "epoch": 12.1, + "learning_rate": 4.3952941147327384e-05, + "loss": 2.4246, + "step": 2442000 + }, + { + "epoch": 12.1, + "learning_rate": 4.39517025609013e-05, + "loss": 2.4031, + "step": 2442500 + }, + { + "epoch": 12.1, + "learning_rate": 4.395046397447521e-05, + "loss": 2.4032, + "step": 2443000 + }, + { + "epoch": 12.11, + "learning_rate": 4.394922538804913e-05, + "loss": 2.3833, + "step": 2443500 + }, + { + "epoch": 12.11, + "learning_rate": 4.3947986801623045e-05, + "loss": 2.3917, + "step": 2444000 + }, + { + "epoch": 12.11, + "learning_rate": 4.394674821519696e-05, + "loss": 2.4239, + "step": 2444500 + }, + { + "epoch": 12.11, + "learning_rate": 4.394550962877088e-05, + "loss": 2.4095, + "step": 2445000 + }, + { + "epoch": 12.12, + "learning_rate": 4.394427104234479e-05, + "loss": 2.4093, + "step": 2445500 + }, + { + "epoch": 12.12, + "learning_rate": 4.3943032455918706e-05, + "loss": 2.4138, + "step": 2446000 + }, + { + "epoch": 12.12, + "learning_rate": 4.394179386949262e-05, + "loss": 2.3883, + "step": 2446500 + }, + { + "epoch": 12.12, + "learning_rate": 4.394055528306654e-05, + "loss": 2.3971, + "step": 2447000 + }, + { + "epoch": 12.13, + "learning_rate": 4.393931669664046e-05, + "loss": 2.4231, + "step": 2447500 + }, + { + "epoch": 12.13, + "learning_rate": 4.3938078110214374e-05, + "loss": 2.4131, + "step": 2448000 + }, + { + "epoch": 12.13, + "learning_rate": 4.393683952378829e-05, + "loss": 2.4189, + "step": 2448500 + }, + { + "epoch": 12.13, + "learning_rate": 4.393560093736221e-05, + "loss": 2.3864, + "step": 2449000 + }, + { + "epoch": 12.14, + "learning_rate": 4.3934362350936125e-05, + "loss": 2.4134, + "step": 2449500 + }, + { + "epoch": 12.14, + "learning_rate": 4.393312376451004e-05, + "loss": 2.3885, + "step": 2450000 + }, + { + "epoch": 12.14, + "learning_rate": 4.393188517808396e-05, + "loss": 2.4138, + "step": 2450500 + }, + { + "epoch": 12.14, + "learning_rate": 4.393064906883073e-05, + "loss": 2.42, + "step": 2451000 + }, + { + "epoch": 12.15, + "learning_rate": 4.3929410482404644e-05, + "loss": 2.4087, + "step": 2451500 + }, + { + "epoch": 12.15, + "learning_rate": 4.392817189597856e-05, + "loss": 2.4067, + "step": 2452000 + }, + { + "epoch": 12.15, + "learning_rate": 4.392693330955248e-05, + "loss": 2.4229, + "step": 2452500 + }, + { + "epoch": 12.15, + "learning_rate": 4.3925694723126395e-05, + "loss": 2.3739, + "step": 2453000 + }, + { + "epoch": 12.16, + "learning_rate": 4.392445613670031e-05, + "loss": 2.3939, + "step": 2453500 + }, + { + "epoch": 12.16, + "learning_rate": 4.3923220027447074e-05, + "loss": 2.4108, + "step": 2454000 + }, + { + "epoch": 12.16, + "learning_rate": 4.392198391819384e-05, + "loss": 2.3995, + "step": 2454500 + }, + { + "epoch": 12.16, + "learning_rate": 4.392074533176776e-05, + "loss": 2.396, + "step": 2455000 + }, + { + "epoch": 12.17, + "learning_rate": 4.3919506745341677e-05, + "loss": 2.4091, + "step": 2455500 + }, + { + "epoch": 12.17, + "learning_rate": 4.3918268158915594e-05, + "loss": 2.4122, + "step": 2456000 + }, + { + "epoch": 12.17, + "learning_rate": 4.391702957248951e-05, + "loss": 2.3943, + "step": 2456500 + }, + { + "epoch": 12.17, + "learning_rate": 4.391579098606343e-05, + "loss": 2.4066, + "step": 2457000 + }, + { + "epoch": 12.18, + "learning_rate": 4.3914552399637344e-05, + "loss": 2.4244, + "step": 2457500 + }, + { + "epoch": 12.18, + "learning_rate": 4.391331381321126e-05, + "loss": 2.386, + "step": 2458000 + }, + { + "epoch": 12.18, + "learning_rate": 4.391207522678518e-05, + "loss": 2.4081, + "step": 2458500 + }, + { + "epoch": 12.18, + "learning_rate": 4.3910836640359095e-05, + "loss": 2.376, + "step": 2459000 + }, + { + "epoch": 12.19, + "learning_rate": 4.390959805393301e-05, + "loss": 2.4178, + "step": 2459500 + }, + { + "epoch": 12.19, + "learning_rate": 4.3908361944679774e-05, + "loss": 2.4064, + "step": 2460000 + }, + { + "epoch": 12.19, + "learning_rate": 4.390712335825369e-05, + "loss": 2.4019, + "step": 2460500 + }, + { + "epoch": 12.19, + "learning_rate": 4.390588477182761e-05, + "loss": 2.3993, + "step": 2461000 + }, + { + "epoch": 12.2, + "learning_rate": 4.3904646185401525e-05, + "loss": 2.3876, + "step": 2461500 + }, + { + "epoch": 12.2, + "learning_rate": 4.390340759897544e-05, + "loss": 2.388, + "step": 2462000 + }, + { + "epoch": 12.2, + "learning_rate": 4.390216901254936e-05, + "loss": 2.4285, + "step": 2462500 + }, + { + "epoch": 12.2, + "learning_rate": 4.390093290329613e-05, + "loss": 2.4094, + "step": 2463000 + }, + { + "epoch": 12.21, + "learning_rate": 4.3899696794042896e-05, + "loss": 2.3865, + "step": 2463500 + }, + { + "epoch": 12.21, + "learning_rate": 4.389845820761681e-05, + "loss": 2.4204, + "step": 2464000 + }, + { + "epoch": 12.21, + "learning_rate": 4.389721962119073e-05, + "loss": 2.4236, + "step": 2464500 + }, + { + "epoch": 12.21, + "learning_rate": 4.389598103476465e-05, + "loss": 2.3961, + "step": 2465000 + }, + { + "epoch": 12.21, + "learning_rate": 4.3894744925511416e-05, + "loss": 2.397, + "step": 2465500 + }, + { + "epoch": 12.22, + "learning_rate": 4.389350633908533e-05, + "loss": 2.3895, + "step": 2466000 + }, + { + "epoch": 12.22, + "learning_rate": 4.389226775265925e-05, + "loss": 2.4102, + "step": 2466500 + }, + { + "epoch": 12.22, + "learning_rate": 4.389102916623316e-05, + "loss": 2.405, + "step": 2467000 + }, + { + "epoch": 12.22, + "learning_rate": 4.388979057980708e-05, + "loss": 2.4291, + "step": 2467500 + }, + { + "epoch": 12.23, + "learning_rate": 4.3888551993380994e-05, + "loss": 2.4213, + "step": 2468000 + }, + { + "epoch": 12.23, + "learning_rate": 4.388731340695491e-05, + "loss": 2.4009, + "step": 2468500 + }, + { + "epoch": 12.23, + "learning_rate": 4.388607482052883e-05, + "loss": 2.4047, + "step": 2469000 + }, + { + "epoch": 12.23, + "learning_rate": 4.3884836234102745e-05, + "loss": 2.385, + "step": 2469500 + }, + { + "epoch": 12.24, + "learning_rate": 4.388360012484951e-05, + "loss": 2.4111, + "step": 2470000 + }, + { + "epoch": 12.24, + "learning_rate": 4.388236153842343e-05, + "loss": 2.4104, + "step": 2470500 + }, + { + "epoch": 12.24, + "learning_rate": 4.388112295199735e-05, + "loss": 2.4294, + "step": 2471000 + }, + { + "epoch": 12.24, + "learning_rate": 4.3879884365571264e-05, + "loss": 2.4056, + "step": 2471500 + }, + { + "epoch": 12.25, + "learning_rate": 4.387864577914518e-05, + "loss": 2.396, + "step": 2472000 + }, + { + "epoch": 12.25, + "learning_rate": 4.387740719271909e-05, + "loss": 2.3991, + "step": 2472500 + }, + { + "epoch": 12.25, + "learning_rate": 4.387617108346587e-05, + "loss": 2.3729, + "step": 2473000 + }, + { + "epoch": 12.25, + "learning_rate": 4.387493249703978e-05, + "loss": 2.4009, + "step": 2473500 + }, + { + "epoch": 12.26, + "learning_rate": 4.3873693910613694e-05, + "loss": 2.4272, + "step": 2474000 + }, + { + "epoch": 12.26, + "learning_rate": 4.387245532418761e-05, + "loss": 2.4015, + "step": 2474500 + }, + { + "epoch": 12.26, + "learning_rate": 4.387121673776153e-05, + "loss": 2.4204, + "step": 2475000 + }, + { + "epoch": 12.26, + "learning_rate": 4.3869978151335445e-05, + "loss": 2.4145, + "step": 2475500 + }, + { + "epoch": 12.27, + "learning_rate": 4.386873956490936e-05, + "loss": 2.4175, + "step": 2476000 + }, + { + "epoch": 12.27, + "learning_rate": 4.386750097848328e-05, + "loss": 2.3936, + "step": 2476500 + }, + { + "epoch": 12.27, + "learning_rate": 4.3866262392057195e-05, + "loss": 2.382, + "step": 2477000 + }, + { + "epoch": 12.27, + "learning_rate": 4.3865026282803964e-05, + "loss": 2.4075, + "step": 2477500 + }, + { + "epoch": 12.28, + "learning_rate": 4.386378769637788e-05, + "loss": 2.3898, + "step": 2478000 + }, + { + "epoch": 12.28, + "learning_rate": 4.38625491099518e-05, + "loss": 2.4162, + "step": 2478500 + }, + { + "epoch": 12.28, + "learning_rate": 4.386131052352571e-05, + "loss": 2.4163, + "step": 2479000 + }, + { + "epoch": 12.28, + "learning_rate": 4.3860071937099625e-05, + "loss": 2.4165, + "step": 2479500 + }, + { + "epoch": 12.29, + "learning_rate": 4.385883335067354e-05, + "loss": 2.4039, + "step": 2480000 + }, + { + "epoch": 12.29, + "learning_rate": 4.385759476424746e-05, + "loss": 2.4346, + "step": 2480500 + }, + { + "epoch": 12.29, + "learning_rate": 4.385635865499423e-05, + "loss": 2.427, + "step": 2481000 + }, + { + "epoch": 12.29, + "learning_rate": 4.3855120068568145e-05, + "loss": 2.3986, + "step": 2481500 + }, + { + "epoch": 12.3, + "learning_rate": 4.385388148214206e-05, + "loss": 2.4292, + "step": 2482000 + }, + { + "epoch": 12.3, + "learning_rate": 4.385264537288884e-05, + "loss": 2.4181, + "step": 2482500 + }, + { + "epoch": 12.3, + "learning_rate": 4.385140678646275e-05, + "loss": 2.4078, + "step": 2483000 + }, + { + "epoch": 12.3, + "learning_rate": 4.3850168200036664e-05, + "loss": 2.426, + "step": 2483500 + }, + { + "epoch": 12.31, + "learning_rate": 4.384892961361058e-05, + "loss": 2.4333, + "step": 2484000 + }, + { + "epoch": 12.31, + "learning_rate": 4.38476910271845e-05, + "loss": 2.4103, + "step": 2484500 + }, + { + "epoch": 12.31, + "learning_rate": 4.3846452440758415e-05, + "loss": 2.418, + "step": 2485000 + }, + { + "epoch": 12.31, + "learning_rate": 4.384521385433233e-05, + "loss": 2.399, + "step": 2485500 + }, + { + "epoch": 12.32, + "learning_rate": 4.384397526790624e-05, + "loss": 2.408, + "step": 2486000 + }, + { + "epoch": 12.32, + "learning_rate": 4.384273668148016e-05, + "loss": 2.3786, + "step": 2486500 + }, + { + "epoch": 12.32, + "learning_rate": 4.3841498095054076e-05, + "loss": 2.434, + "step": 2487000 + }, + { + "epoch": 12.32, + "learning_rate": 4.384025950862799e-05, + "loss": 2.4179, + "step": 2487500 + }, + { + "epoch": 12.33, + "learning_rate": 4.383902339937476e-05, + "loss": 2.4173, + "step": 2488000 + }, + { + "epoch": 12.33, + "learning_rate": 4.383778729012154e-05, + "loss": 2.4215, + "step": 2488500 + }, + { + "epoch": 12.33, + "learning_rate": 4.383655365804115e-05, + "loss": 2.4273, + "step": 2489000 + }, + { + "epoch": 12.33, + "learning_rate": 4.383531507161507e-05, + "loss": 2.4262, + "step": 2489500 + }, + { + "epoch": 12.34, + "learning_rate": 4.3834076485188985e-05, + "loss": 2.4236, + "step": 2490000 + }, + { + "epoch": 12.34, + "learning_rate": 4.38328378987629e-05, + "loss": 2.3886, + "step": 2490500 + }, + { + "epoch": 12.34, + "learning_rate": 4.383159931233682e-05, + "loss": 2.4172, + "step": 2491000 + }, + { + "epoch": 12.34, + "learning_rate": 4.3830360725910736e-05, + "loss": 2.4084, + "step": 2491500 + }, + { + "epoch": 12.35, + "learning_rate": 4.382912213948465e-05, + "loss": 2.4236, + "step": 2492000 + }, + { + "epoch": 12.35, + "learning_rate": 4.382788355305857e-05, + "loss": 2.4233, + "step": 2492500 + }, + { + "epoch": 12.35, + "learning_rate": 4.382664496663249e-05, + "loss": 2.4178, + "step": 2493000 + }, + { + "epoch": 12.35, + "learning_rate": 4.3825406380206404e-05, + "loss": 2.3815, + "step": 2493500 + }, + { + "epoch": 12.36, + "learning_rate": 4.382416779378032e-05, + "loss": 2.4237, + "step": 2494000 + }, + { + "epoch": 12.36, + "learning_rate": 4.382292920735424e-05, + "loss": 2.397, + "step": 2494500 + }, + { + "epoch": 12.36, + "learning_rate": 4.3821690620928154e-05, + "loss": 2.4008, + "step": 2495000 + }, + { + "epoch": 12.36, + "learning_rate": 4.3820452034502065e-05, + "loss": 2.4192, + "step": 2495500 + }, + { + "epoch": 12.37, + "learning_rate": 4.3819215925248833e-05, + "loss": 2.3788, + "step": 2496000 + }, + { + "epoch": 12.37, + "learning_rate": 4.381797733882275e-05, + "loss": 2.3979, + "step": 2496500 + }, + { + "epoch": 12.37, + "learning_rate": 4.381673875239667e-05, + "loss": 2.3989, + "step": 2497000 + }, + { + "epoch": 12.37, + "learning_rate": 4.3815500165970584e-05, + "loss": 2.4017, + "step": 2497500 + }, + { + "epoch": 12.38, + "learning_rate": 4.38142615795445e-05, + "loss": 2.4174, + "step": 2498000 + }, + { + "epoch": 12.38, + "learning_rate": 4.381302299311841e-05, + "loss": 2.4145, + "step": 2498500 + }, + { + "epoch": 12.38, + "learning_rate": 4.381178440669233e-05, + "loss": 2.4302, + "step": 2499000 + }, + { + "epoch": 12.38, + "learning_rate": 4.3810545820266245e-05, + "loss": 2.4266, + "step": 2499500 + }, + { + "epoch": 12.39, + "learning_rate": 4.380930971101302e-05, + "loss": 2.3984, + "step": 2500000 + }, + { + "epoch": 12.39, + "learning_rate": 4.380807360175978e-05, + "loss": 2.4286, + "step": 2500500 + }, + { + "epoch": 12.39, + "learning_rate": 4.38068350153337e-05, + "loss": 2.3976, + "step": 2501000 + }, + { + "epoch": 12.39, + "learning_rate": 4.380559890608047e-05, + "loss": 2.4343, + "step": 2501500 + }, + { + "epoch": 12.4, + "learning_rate": 4.3804360319654385e-05, + "loss": 2.4084, + "step": 2502000 + }, + { + "epoch": 12.4, + "learning_rate": 4.38031217332283e-05, + "loss": 2.4167, + "step": 2502500 + }, + { + "epoch": 12.4, + "learning_rate": 4.380188314680222e-05, + "loss": 2.4184, + "step": 2503000 + }, + { + "epoch": 12.4, + "learning_rate": 4.3800644560376136e-05, + "loss": 2.424, + "step": 2503500 + }, + { + "epoch": 12.41, + "learning_rate": 4.3799408451122905e-05, + "loss": 2.4085, + "step": 2504000 + }, + { + "epoch": 12.41, + "learning_rate": 4.379816986469682e-05, + "loss": 2.4307, + "step": 2504500 + }, + { + "epoch": 12.41, + "learning_rate": 4.379693127827074e-05, + "loss": 2.4117, + "step": 2505000 + }, + { + "epoch": 12.41, + "learning_rate": 4.3795692691844656e-05, + "loss": 2.3954, + "step": 2505500 + }, + { + "epoch": 12.42, + "learning_rate": 4.379445658259142e-05, + "loss": 2.4061, + "step": 2506000 + }, + { + "epoch": 12.42, + "learning_rate": 4.3793217996165335e-05, + "loss": 2.4096, + "step": 2506500 + }, + { + "epoch": 12.42, + "learning_rate": 4.379197940973925e-05, + "loss": 2.3931, + "step": 2507000 + }, + { + "epoch": 12.42, + "learning_rate": 4.379074082331317e-05, + "loss": 2.4206, + "step": 2507500 + }, + { + "epoch": 12.43, + "learning_rate": 4.3789502236887086e-05, + "loss": 2.4262, + "step": 2508000 + }, + { + "epoch": 12.43, + "learning_rate": 4.3788263650461e-05, + "loss": 2.3758, + "step": 2508500 + }, + { + "epoch": 12.43, + "learning_rate": 4.378702506403492e-05, + "loss": 2.3879, + "step": 2509000 + }, + { + "epoch": 12.43, + "learning_rate": 4.3785786477608836e-05, + "loss": 2.4278, + "step": 2509500 + }, + { + "epoch": 12.44, + "learning_rate": 4.3784550368355605e-05, + "loss": 2.4015, + "step": 2510000 + }, + { + "epoch": 12.44, + "learning_rate": 4.378331178192952e-05, + "loss": 2.4261, + "step": 2510500 + }, + { + "epoch": 12.44, + "learning_rate": 4.378207319550344e-05, + "loss": 2.4078, + "step": 2511000 + }, + { + "epoch": 12.44, + "learning_rate": 4.3780834609077356e-05, + "loss": 2.3968, + "step": 2511500 + }, + { + "epoch": 12.45, + "learning_rate": 4.377959602265127e-05, + "loss": 2.3812, + "step": 2512000 + }, + { + "epoch": 12.45, + "learning_rate": 4.377835743622519e-05, + "loss": 2.3986, + "step": 2512500 + }, + { + "epoch": 12.45, + "learning_rate": 4.377711884979911e-05, + "loss": 2.4222, + "step": 2513000 + }, + { + "epoch": 12.45, + "learning_rate": 4.377588026337302e-05, + "loss": 2.4022, + "step": 2513500 + }, + { + "epoch": 12.46, + "learning_rate": 4.3774641676946934e-05, + "loss": 2.4151, + "step": 2514000 + }, + { + "epoch": 12.46, + "learning_rate": 4.377340309052085e-05, + "loss": 2.4226, + "step": 2514500 + }, + { + "epoch": 12.46, + "learning_rate": 4.377216450409477e-05, + "loss": 2.3848, + "step": 2515000 + }, + { + "epoch": 12.46, + "learning_rate": 4.3770925917668685e-05, + "loss": 2.4285, + "step": 2515500 + }, + { + "epoch": 12.47, + "learning_rate": 4.37696873312426e-05, + "loss": 2.4243, + "step": 2516000 + }, + { + "epoch": 12.47, + "learning_rate": 4.376845122198937e-05, + "loss": 2.4008, + "step": 2516500 + }, + { + "epoch": 12.47, + "learning_rate": 4.376721263556329e-05, + "loss": 2.4064, + "step": 2517000 + }, + { + "epoch": 12.47, + "learning_rate": 4.3765974049137204e-05, + "loss": 2.4022, + "step": 2517500 + }, + { + "epoch": 12.48, + "learning_rate": 4.376473793988397e-05, + "loss": 2.3923, + "step": 2518000 + }, + { + "epoch": 12.48, + "learning_rate": 4.376349935345789e-05, + "loss": 2.4132, + "step": 2518500 + }, + { + "epoch": 12.48, + "learning_rate": 4.376226076703181e-05, + "loss": 2.4178, + "step": 2519000 + }, + { + "epoch": 12.48, + "learning_rate": 4.3761022180605724e-05, + "loss": 2.4013, + "step": 2519500 + }, + { + "epoch": 12.48, + "learning_rate": 4.375978359417964e-05, + "loss": 2.4077, + "step": 2520000 + }, + { + "epoch": 12.49, + "learning_rate": 4.375854500775355e-05, + "loss": 2.4075, + "step": 2520500 + }, + { + "epoch": 12.49, + "learning_rate": 4.375730642132747e-05, + "loss": 2.4111, + "step": 2521000 + }, + { + "epoch": 12.49, + "learning_rate": 4.375607031207424e-05, + "loss": 2.3895, + "step": 2521500 + }, + { + "epoch": 12.49, + "learning_rate": 4.3754831725648154e-05, + "loss": 2.3763, + "step": 2522000 + }, + { + "epoch": 12.5, + "learning_rate": 4.375359313922207e-05, + "loss": 2.4228, + "step": 2522500 + }, + { + "epoch": 12.5, + "learning_rate": 4.375235455279599e-05, + "loss": 2.3938, + "step": 2523000 + }, + { + "epoch": 12.5, + "learning_rate": 4.3751115966369904e-05, + "loss": 2.404, + "step": 2523500 + }, + { + "epoch": 12.5, + "learning_rate": 4.374987985711667e-05, + "loss": 2.4042, + "step": 2524000 + }, + { + "epoch": 12.51, + "learning_rate": 4.374864127069059e-05, + "loss": 2.4155, + "step": 2524500 + }, + { + "epoch": 12.51, + "learning_rate": 4.374740268426451e-05, + "loss": 2.4128, + "step": 2525000 + }, + { + "epoch": 12.51, + "learning_rate": 4.3746164097838424e-05, + "loss": 2.3995, + "step": 2525500 + }, + { + "epoch": 12.51, + "learning_rate": 4.374492551141234e-05, + "loss": 2.3965, + "step": 2526000 + }, + { + "epoch": 12.52, + "learning_rate": 4.374368692498626e-05, + "loss": 2.426, + "step": 2526500 + }, + { + "epoch": 12.52, + "learning_rate": 4.3742448338560175e-05, + "loss": 2.4275, + "step": 2527000 + }, + { + "epoch": 12.52, + "learning_rate": 4.3741209752134085e-05, + "loss": 2.407, + "step": 2527500 + }, + { + "epoch": 12.52, + "learning_rate": 4.3739973642880854e-05, + "loss": 2.376, + "step": 2528000 + }, + { + "epoch": 12.53, + "learning_rate": 4.373873505645477e-05, + "loss": 2.4029, + "step": 2528500 + }, + { + "epoch": 12.53, + "learning_rate": 4.373749647002869e-05, + "loss": 2.4195, + "step": 2529000 + }, + { + "epoch": 12.53, + "learning_rate": 4.3736257883602604e-05, + "loss": 2.3921, + "step": 2529500 + }, + { + "epoch": 12.53, + "learning_rate": 4.373501929717652e-05, + "loss": 2.4134, + "step": 2530000 + }, + { + "epoch": 12.54, + "learning_rate": 4.3733785665096135e-05, + "loss": 2.4015, + "step": 2530500 + }, + { + "epoch": 12.54, + "learning_rate": 4.373254707867005e-05, + "loss": 2.4044, + "step": 2531000 + }, + { + "epoch": 12.54, + "learning_rate": 4.373130849224397e-05, + "loss": 2.4226, + "step": 2531500 + }, + { + "epoch": 12.54, + "learning_rate": 4.3730069905817886e-05, + "loss": 2.4238, + "step": 2532000 + }, + { + "epoch": 12.55, + "learning_rate": 4.37288313193918e-05, + "loss": 2.3989, + "step": 2532500 + }, + { + "epoch": 12.55, + "learning_rate": 4.372759273296572e-05, + "loss": 2.4198, + "step": 2533000 + }, + { + "epoch": 12.55, + "learning_rate": 4.372635414653964e-05, + "loss": 2.3914, + "step": 2533500 + }, + { + "epoch": 12.55, + "learning_rate": 4.3725115560113554e-05, + "loss": 2.4127, + "step": 2534000 + }, + { + "epoch": 12.56, + "learning_rate": 4.372387697368747e-05, + "loss": 2.4067, + "step": 2534500 + }, + { + "epoch": 12.56, + "learning_rate": 4.372264086443424e-05, + "loss": 2.4096, + "step": 2535000 + }, + { + "epoch": 12.56, + "learning_rate": 4.3721402278008156e-05, + "loss": 2.392, + "step": 2535500 + }, + { + "epoch": 12.56, + "learning_rate": 4.372016369158207e-05, + "loss": 2.4025, + "step": 2536000 + }, + { + "epoch": 12.57, + "learning_rate": 4.371892510515599e-05, + "loss": 2.3878, + "step": 2536500 + }, + { + "epoch": 12.57, + "learning_rate": 4.371768651872991e-05, + "loss": 2.428, + "step": 2537000 + }, + { + "epoch": 12.57, + "learning_rate": 4.3716447932303824e-05, + "loss": 2.3988, + "step": 2537500 + }, + { + "epoch": 12.57, + "learning_rate": 4.371520934587774e-05, + "loss": 2.4164, + "step": 2538000 + }, + { + "epoch": 12.58, + "learning_rate": 4.37139732366245e-05, + "loss": 2.4146, + "step": 2538500 + }, + { + "epoch": 12.58, + "learning_rate": 4.371273465019842e-05, + "loss": 2.4205, + "step": 2539000 + }, + { + "epoch": 12.58, + "learning_rate": 4.3711498540945196e-05, + "loss": 2.4266, + "step": 2539500 + }, + { + "epoch": 12.58, + "learning_rate": 4.3710259954519106e-05, + "loss": 2.405, + "step": 2540000 + }, + { + "epoch": 12.59, + "learning_rate": 4.370902384526588e-05, + "loss": 2.4024, + "step": 2540500 + }, + { + "epoch": 12.59, + "learning_rate": 4.37077852588398e-05, + "loss": 2.4214, + "step": 2541000 + }, + { + "epoch": 12.59, + "learning_rate": 4.370654667241371e-05, + "loss": 2.4359, + "step": 2541500 + }, + { + "epoch": 12.59, + "learning_rate": 4.3705308085987625e-05, + "loss": 2.4429, + "step": 2542000 + }, + { + "epoch": 12.6, + "learning_rate": 4.370406949956154e-05, + "loss": 2.4162, + "step": 2542500 + }, + { + "epoch": 12.6, + "learning_rate": 4.370283339030831e-05, + "loss": 2.4065, + "step": 2543000 + }, + { + "epoch": 12.6, + "learning_rate": 4.370159480388223e-05, + "loss": 2.4123, + "step": 2543500 + }, + { + "epoch": 12.6, + "learning_rate": 4.3700356217456145e-05, + "loss": 2.3914, + "step": 2544000 + }, + { + "epoch": 12.61, + "learning_rate": 4.369911763103006e-05, + "loss": 2.4199, + "step": 2544500 + }, + { + "epoch": 12.61, + "learning_rate": 4.369787904460398e-05, + "loss": 2.4165, + "step": 2545000 + }, + { + "epoch": 12.61, + "learning_rate": 4.3696640458177896e-05, + "loss": 2.4082, + "step": 2545500 + }, + { + "epoch": 12.61, + "learning_rate": 4.3695401871751806e-05, + "loss": 2.4234, + "step": 2546000 + }, + { + "epoch": 12.62, + "learning_rate": 4.369416576249858e-05, + "loss": 2.4354, + "step": 2546500 + }, + { + "epoch": 12.62, + "learning_rate": 4.36929271760725e-05, + "loss": 2.434, + "step": 2547000 + }, + { + "epoch": 12.62, + "learning_rate": 4.3691688589646415e-05, + "loss": 2.3977, + "step": 2547500 + }, + { + "epoch": 12.62, + "learning_rate": 4.369045000322033e-05, + "loss": 2.3974, + "step": 2548000 + }, + { + "epoch": 12.63, + "learning_rate": 4.368921141679424e-05, + "loss": 2.4089, + "step": 2548500 + }, + { + "epoch": 12.63, + "learning_rate": 4.368797283036816e-05, + "loss": 2.4315, + "step": 2549000 + }, + { + "epoch": 12.63, + "learning_rate": 4.3686734243942076e-05, + "loss": 2.4032, + "step": 2549500 + }, + { + "epoch": 12.63, + "learning_rate": 4.368549565751599e-05, + "loss": 2.4246, + "step": 2550000 + }, + { + "epoch": 12.64, + "learning_rate": 4.368425707108991e-05, + "loss": 2.3949, + "step": 2550500 + }, + { + "epoch": 12.64, + "learning_rate": 4.368302096183668e-05, + "loss": 2.4226, + "step": 2551000 + }, + { + "epoch": 12.64, + "learning_rate": 4.3681782375410596e-05, + "loss": 2.4192, + "step": 2551500 + }, + { + "epoch": 12.64, + "learning_rate": 4.368054378898451e-05, + "loss": 2.4099, + "step": 2552000 + }, + { + "epoch": 12.65, + "learning_rate": 4.367930520255842e-05, + "loss": 2.4061, + "step": 2552500 + }, + { + "epoch": 12.65, + "learning_rate": 4.367806661613234e-05, + "loss": 2.4307, + "step": 2553000 + }, + { + "epoch": 12.65, + "learning_rate": 4.367683298405196e-05, + "loss": 2.3988, + "step": 2553500 + }, + { + "epoch": 12.65, + "learning_rate": 4.367559439762588e-05, + "loss": 2.3905, + "step": 2554000 + }, + { + "epoch": 12.66, + "learning_rate": 4.3674355811199794e-05, + "loss": 2.4154, + "step": 2554500 + }, + { + "epoch": 12.66, + "learning_rate": 4.367311722477371e-05, + "loss": 2.4097, + "step": 2555000 + }, + { + "epoch": 12.66, + "learning_rate": 4.367187863834763e-05, + "loss": 2.4109, + "step": 2555500 + }, + { + "epoch": 12.66, + "learning_rate": 4.3670640051921545e-05, + "loss": 2.3961, + "step": 2556000 + }, + { + "epoch": 12.67, + "learning_rate": 4.366940146549546e-05, + "loss": 2.4039, + "step": 2556500 + }, + { + "epoch": 12.67, + "learning_rate": 4.366816287906938e-05, + "loss": 2.3979, + "step": 2557000 + }, + { + "epoch": 12.67, + "learning_rate": 4.366692676981615e-05, + "loss": 2.4154, + "step": 2557500 + }, + { + "epoch": 12.67, + "learning_rate": 4.3665688183390065e-05, + "loss": 2.4105, + "step": 2558000 + }, + { + "epoch": 12.68, + "learning_rate": 4.366444959696398e-05, + "loss": 2.4075, + "step": 2558500 + }, + { + "epoch": 12.68, + "learning_rate": 4.36632110105379e-05, + "loss": 2.4176, + "step": 2559000 + }, + { + "epoch": 12.68, + "learning_rate": 4.3661972424111816e-05, + "loss": 2.4234, + "step": 2559500 + }, + { + "epoch": 12.68, + "learning_rate": 4.366073631485858e-05, + "loss": 2.414, + "step": 2560000 + }, + { + "epoch": 12.69, + "learning_rate": 4.3659497728432495e-05, + "loss": 2.4034, + "step": 2560500 + }, + { + "epoch": 12.69, + "learning_rate": 4.365825914200641e-05, + "loss": 2.4229, + "step": 2561000 + }, + { + "epoch": 12.69, + "learning_rate": 4.365702055558033e-05, + "loss": 2.3905, + "step": 2561500 + }, + { + "epoch": 12.69, + "learning_rate": 4.3655781969154245e-05, + "loss": 2.4277, + "step": 2562000 + }, + { + "epoch": 12.7, + "learning_rate": 4.365454338272816e-05, + "loss": 2.4012, + "step": 2562500 + }, + { + "epoch": 12.7, + "learning_rate": 4.365330479630208e-05, + "loss": 2.4179, + "step": 2563000 + }, + { + "epoch": 12.7, + "learning_rate": 4.365206868704885e-05, + "loss": 2.4339, + "step": 2563500 + }, + { + "epoch": 12.7, + "learning_rate": 4.3650830100622765e-05, + "loss": 2.4012, + "step": 2564000 + }, + { + "epoch": 12.71, + "learning_rate": 4.364959151419668e-05, + "loss": 2.4154, + "step": 2564500 + }, + { + "epoch": 12.71, + "learning_rate": 4.36483529277706e-05, + "loss": 2.3908, + "step": 2565000 + }, + { + "epoch": 12.71, + "learning_rate": 4.3647114341344516e-05, + "loss": 2.4239, + "step": 2565500 + }, + { + "epoch": 12.71, + "learning_rate": 4.364587575491843e-05, + "loss": 2.403, + "step": 2566000 + }, + { + "epoch": 12.72, + "learning_rate": 4.364463716849235e-05, + "loss": 2.3735, + "step": 2566500 + }, + { + "epoch": 12.72, + "learning_rate": 4.3643398582066267e-05, + "loss": 2.4058, + "step": 2567000 + }, + { + "epoch": 12.72, + "learning_rate": 4.364216247281303e-05, + "loss": 2.4092, + "step": 2567500 + }, + { + "epoch": 12.72, + "learning_rate": 4.36409263635598e-05, + "loss": 2.4055, + "step": 2568000 + }, + { + "epoch": 12.73, + "learning_rate": 4.3639687777133714e-05, + "loss": 2.4084, + "step": 2568500 + }, + { + "epoch": 12.73, + "learning_rate": 4.363844919070763e-05, + "loss": 2.4078, + "step": 2569000 + }, + { + "epoch": 12.73, + "learning_rate": 4.363721060428155e-05, + "loss": 2.3979, + "step": 2569500 + }, + { + "epoch": 12.73, + "learning_rate": 4.363597449502832e-05, + "loss": 2.4249, + "step": 2570000 + }, + { + "epoch": 12.74, + "learning_rate": 4.3634735908602234e-05, + "loss": 2.4067, + "step": 2570500 + }, + { + "epoch": 12.74, + "learning_rate": 4.3633499799349e-05, + "loss": 2.4111, + "step": 2571000 + }, + { + "epoch": 12.74, + "learning_rate": 4.363226121292291e-05, + "loss": 2.3941, + "step": 2571500 + }, + { + "epoch": 12.74, + "learning_rate": 4.363102262649683e-05, + "loss": 2.4572, + "step": 2572000 + }, + { + "epoch": 12.75, + "learning_rate": 4.362978404007075e-05, + "loss": 2.428, + "step": 2572500 + }, + { + "epoch": 12.75, + "learning_rate": 4.3628545453644664e-05, + "loss": 2.4121, + "step": 2573000 + }, + { + "epoch": 12.75, + "learning_rate": 4.362730686721858e-05, + "loss": 2.4407, + "step": 2573500 + }, + { + "epoch": 12.75, + "learning_rate": 4.36260682807925e-05, + "loss": 2.4202, + "step": 2574000 + }, + { + "epoch": 12.75, + "learning_rate": 4.3624829694366414e-05, + "loss": 2.3971, + "step": 2574500 + }, + { + "epoch": 12.76, + "learning_rate": 4.362359358511318e-05, + "loss": 2.4459, + "step": 2575000 + }, + { + "epoch": 12.76, + "learning_rate": 4.36223549986871e-05, + "loss": 2.4038, + "step": 2575500 + }, + { + "epoch": 12.76, + "learning_rate": 4.362111641226102e-05, + "loss": 2.4283, + "step": 2576000 + }, + { + "epoch": 12.76, + "learning_rate": 4.3619877825834934e-05, + "loss": 2.4301, + "step": 2576500 + }, + { + "epoch": 12.77, + "learning_rate": 4.361863923940885e-05, + "loss": 2.4261, + "step": 2577000 + }, + { + "epoch": 12.77, + "learning_rate": 4.361740313015562e-05, + "loss": 2.4179, + "step": 2577500 + }, + { + "epoch": 12.77, + "learning_rate": 4.361616702090239e-05, + "loss": 2.4112, + "step": 2578000 + }, + { + "epoch": 12.77, + "learning_rate": 4.3614928434476305e-05, + "loss": 2.4111, + "step": 2578500 + }, + { + "epoch": 12.78, + "learning_rate": 4.361368984805022e-05, + "loss": 2.4339, + "step": 2579000 + }, + { + "epoch": 12.78, + "learning_rate": 4.361245126162414e-05, + "loss": 2.4154, + "step": 2579500 + }, + { + "epoch": 12.78, + "learning_rate": 4.3611212675198056e-05, + "loss": 2.4092, + "step": 2580000 + }, + { + "epoch": 12.78, + "learning_rate": 4.360997408877197e-05, + "loss": 2.4017, + "step": 2580500 + }, + { + "epoch": 12.79, + "learning_rate": 4.3608737979518735e-05, + "loss": 2.3993, + "step": 2581000 + }, + { + "epoch": 12.79, + "learning_rate": 4.360749939309265e-05, + "loss": 2.4092, + "step": 2581500 + }, + { + "epoch": 12.79, + "learning_rate": 4.360626080666657e-05, + "loss": 2.4148, + "step": 2582000 + }, + { + "epoch": 12.79, + "learning_rate": 4.3605022220240486e-05, + "loss": 2.4223, + "step": 2582500 + }, + { + "epoch": 12.8, + "learning_rate": 4.36037836338144e-05, + "loss": 2.4065, + "step": 2583000 + }, + { + "epoch": 12.8, + "learning_rate": 4.360254504738832e-05, + "loss": 2.4275, + "step": 2583500 + }, + { + "epoch": 12.8, + "learning_rate": 4.360130646096223e-05, + "loss": 2.3997, + "step": 2584000 + }, + { + "epoch": 12.8, + "learning_rate": 4.360006787453615e-05, + "loss": 2.4098, + "step": 2584500 + }, + { + "epoch": 12.81, + "learning_rate": 4.3598829288110064e-05, + "loss": 2.4142, + "step": 2585000 + }, + { + "epoch": 12.81, + "learning_rate": 4.359759070168398e-05, + "loss": 2.4112, + "step": 2585500 + }, + { + "epoch": 12.81, + "learning_rate": 4.3596354592430756e-05, + "loss": 2.4065, + "step": 2586000 + }, + { + "epoch": 12.81, + "learning_rate": 4.359511600600467e-05, + "loss": 2.4327, + "step": 2586500 + }, + { + "epoch": 12.82, + "learning_rate": 4.3593879896751435e-05, + "loss": 2.4185, + "step": 2587000 + }, + { + "epoch": 12.82, + "learning_rate": 4.359264131032535e-05, + "loss": 2.3929, + "step": 2587500 + }, + { + "epoch": 12.82, + "learning_rate": 4.359140272389927e-05, + "loss": 2.4129, + "step": 2588000 + }, + { + "epoch": 12.82, + "learning_rate": 4.3590164137473186e-05, + "loss": 2.4261, + "step": 2588500 + }, + { + "epoch": 12.83, + "learning_rate": 4.35889255510471e-05, + "loss": 2.4036, + "step": 2589000 + }, + { + "epoch": 12.83, + "learning_rate": 4.358768696462102e-05, + "loss": 2.4252, + "step": 2589500 + }, + { + "epoch": 12.83, + "learning_rate": 4.358644837819494e-05, + "loss": 2.4257, + "step": 2590000 + }, + { + "epoch": 12.83, + "learning_rate": 4.358520979176885e-05, + "loss": 2.3993, + "step": 2590500 + }, + { + "epoch": 12.84, + "learning_rate": 4.3583971205342764e-05, + "loss": 2.4082, + "step": 2591000 + }, + { + "epoch": 12.84, + "learning_rate": 4.358273261891668e-05, + "loss": 2.417, + "step": 2591500 + }, + { + "epoch": 12.84, + "learning_rate": 4.35814940324906e-05, + "loss": 2.4277, + "step": 2592000 + }, + { + "epoch": 12.84, + "learning_rate": 4.3580257923237373e-05, + "loss": 2.4523, + "step": 2592500 + }, + { + "epoch": 12.85, + "learning_rate": 4.357901933681129e-05, + "loss": 2.4193, + "step": 2593000 + }, + { + "epoch": 12.85, + "learning_rate": 4.35777807503852e-05, + "loss": 2.403, + "step": 2593500 + }, + { + "epoch": 12.85, + "learning_rate": 4.357654216395912e-05, + "loss": 2.4337, + "step": 2594000 + }, + { + "epoch": 12.85, + "learning_rate": 4.3575303577533034e-05, + "loss": 2.4078, + "step": 2594500 + }, + { + "epoch": 12.86, + "learning_rate": 4.35740674682798e-05, + "loss": 2.4119, + "step": 2595000 + }, + { + "epoch": 12.86, + "learning_rate": 4.357282888185372e-05, + "loss": 2.4088, + "step": 2595500 + }, + { + "epoch": 12.86, + "learning_rate": 4.357159029542764e-05, + "loss": 2.4246, + "step": 2596000 + }, + { + "epoch": 12.86, + "learning_rate": 4.357035170900155e-05, + "loss": 2.4001, + "step": 2596500 + }, + { + "epoch": 12.87, + "learning_rate": 4.3569113122575464e-05, + "loss": 2.4112, + "step": 2597000 + }, + { + "epoch": 12.87, + "learning_rate": 4.356787701332224e-05, + "loss": 2.4181, + "step": 2597500 + }, + { + "epoch": 12.87, + "learning_rate": 4.356664090406901e-05, + "loss": 2.43, + "step": 2598000 + }, + { + "epoch": 12.87, + "learning_rate": 4.356540479481577e-05, + "loss": 2.4409, + "step": 2598500 + }, + { + "epoch": 12.88, + "learning_rate": 4.3564168685562546e-05, + "loss": 2.4328, + "step": 2599000 + }, + { + "epoch": 12.88, + "learning_rate": 4.356293009913646e-05, + "loss": 2.4351, + "step": 2599500 + }, + { + "epoch": 12.88, + "learning_rate": 4.356169151271038e-05, + "loss": 2.4067, + "step": 2600000 + }, + { + "epoch": 12.88, + "learning_rate": 4.356045292628429e-05, + "loss": 2.4088, + "step": 2600500 + }, + { + "epoch": 12.89, + "learning_rate": 4.355921433985821e-05, + "loss": 2.4367, + "step": 2601000 + }, + { + "epoch": 12.89, + "learning_rate": 4.3557975753432124e-05, + "loss": 2.4095, + "step": 2601500 + }, + { + "epoch": 12.89, + "learning_rate": 4.355673716700604e-05, + "loss": 2.4283, + "step": 2602000 + }, + { + "epoch": 12.89, + "learning_rate": 4.355549858057996e-05, + "loss": 2.4218, + "step": 2602500 + }, + { + "epoch": 12.9, + "learning_rate": 4.3554259994153875e-05, + "loss": 2.4259, + "step": 2603000 + }, + { + "epoch": 12.9, + "learning_rate": 4.355302140772779e-05, + "loss": 2.3895, + "step": 2603500 + }, + { + "epoch": 12.9, + "learning_rate": 4.355178282130171e-05, + "loss": 2.416, + "step": 2604000 + }, + { + "epoch": 12.9, + "learning_rate": 4.3550544234875626e-05, + "loss": 2.3923, + "step": 2604500 + }, + { + "epoch": 12.91, + "learning_rate": 4.354930812562239e-05, + "loss": 2.3966, + "step": 2605000 + }, + { + "epoch": 12.91, + "learning_rate": 4.3548069539196305e-05, + "loss": 2.4156, + "step": 2605500 + }, + { + "epoch": 12.91, + "learning_rate": 4.354683095277022e-05, + "loss": 2.4116, + "step": 2606000 + }, + { + "epoch": 12.91, + "learning_rate": 4.354559236634414e-05, + "loss": 2.4211, + "step": 2606500 + }, + { + "epoch": 12.92, + "learning_rate": 4.3544353779918055e-05, + "loss": 2.4306, + "step": 2607000 + }, + { + "epoch": 12.92, + "learning_rate": 4.354311519349197e-05, + "loss": 2.3948, + "step": 2607500 + }, + { + "epoch": 12.92, + "learning_rate": 4.354187660706589e-05, + "loss": 2.4124, + "step": 2608000 + }, + { + "epoch": 12.92, + "learning_rate": 4.3540638020639806e-05, + "loss": 2.4227, + "step": 2608500 + }, + { + "epoch": 12.93, + "learning_rate": 4.353939943421372e-05, + "loss": 2.4166, + "step": 2609000 + }, + { + "epoch": 12.93, + "learning_rate": 4.353816084778764e-05, + "loss": 2.4322, + "step": 2609500 + }, + { + "epoch": 12.93, + "learning_rate": 4.353692226136156e-05, + "loss": 2.4191, + "step": 2610000 + }, + { + "epoch": 12.93, + "learning_rate": 4.3535686152108326e-05, + "loss": 2.424, + "step": 2610500 + }, + { + "epoch": 12.94, + "learning_rate": 4.353444756568224e-05, + "loss": 2.4131, + "step": 2611000 + }, + { + "epoch": 12.94, + "learning_rate": 4.353320897925616e-05, + "loss": 2.422, + "step": 2611500 + }, + { + "epoch": 12.94, + "learning_rate": 4.3531970392830076e-05, + "loss": 2.3948, + "step": 2612000 + }, + { + "epoch": 12.94, + "learning_rate": 4.3530731806403993e-05, + "loss": 2.4332, + "step": 2612500 + }, + { + "epoch": 12.95, + "learning_rate": 4.352949321997791e-05, + "loss": 2.4331, + "step": 2613000 + }, + { + "epoch": 12.95, + "learning_rate": 4.352825463355182e-05, + "loss": 2.4107, + "step": 2613500 + }, + { + "epoch": 12.95, + "learning_rate": 4.352701604712574e-05, + "loss": 2.4368, + "step": 2614000 + }, + { + "epoch": 12.95, + "learning_rate": 4.3525779937872506e-05, + "loss": 2.4095, + "step": 2614500 + }, + { + "epoch": 12.96, + "learning_rate": 4.352454135144642e-05, + "loss": 2.3953, + "step": 2615000 + }, + { + "epoch": 12.96, + "learning_rate": 4.352330276502034e-05, + "loss": 2.411, + "step": 2615500 + }, + { + "epoch": 12.96, + "learning_rate": 4.352206417859426e-05, + "loss": 2.3921, + "step": 2616000 + }, + { + "epoch": 12.96, + "learning_rate": 4.3520828069341026e-05, + "loss": 2.4145, + "step": 2616500 + }, + { + "epoch": 12.97, + "learning_rate": 4.351958948291494e-05, + "loss": 2.4006, + "step": 2617000 + }, + { + "epoch": 12.97, + "learning_rate": 4.351835089648886e-05, + "loss": 2.4215, + "step": 2617500 + }, + { + "epoch": 12.97, + "learning_rate": 4.3517112310062777e-05, + "loss": 2.4077, + "step": 2618000 + }, + { + "epoch": 12.97, + "learning_rate": 4.3515873723636694e-05, + "loss": 2.4019, + "step": 2618500 + }, + { + "epoch": 12.98, + "learning_rate": 4.351463513721061e-05, + "loss": 2.4322, + "step": 2619000 + }, + { + "epoch": 12.98, + "learning_rate": 4.351339655078453e-05, + "loss": 2.4438, + "step": 2619500 + }, + { + "epoch": 12.98, + "learning_rate": 4.3512157964358444e-05, + "loss": 2.3985, + "step": 2620000 + }, + { + "epoch": 12.98, + "learning_rate": 4.3510919377932354e-05, + "loss": 2.4106, + "step": 2620500 + }, + { + "epoch": 12.99, + "learning_rate": 4.350968326867912e-05, + "loss": 2.4127, + "step": 2621000 + }, + { + "epoch": 12.99, + "learning_rate": 4.350844468225304e-05, + "loss": 2.3968, + "step": 2621500 + }, + { + "epoch": 12.99, + "learning_rate": 4.350720609582696e-05, + "loss": 2.4192, + "step": 2622000 + }, + { + "epoch": 12.99, + "learning_rate": 4.3505967509400874e-05, + "loss": 2.4349, + "step": 2622500 + }, + { + "epoch": 13.0, + "learning_rate": 4.350472892297479e-05, + "loss": 2.403, + "step": 2623000 + }, + { + "epoch": 13.0, + "learning_rate": 4.350349281372156e-05, + "loss": 2.4137, + "step": 2623500 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.6461218698848585, + "eval_accuracy_mlm": 0.5996847093159505, + "eval_accuracy_nsp": 0.8652920665675657, + "eval_loss": 2.396927833557129, + "eval_runtime": 146.13, + "eval_samples_per_second": 1744.741, + "eval_steps_per_second": 72.702, + "step": 2623959 + }, + { + "epoch": 13.0, + "learning_rate": 4.350225422729548e-05, + "loss": 2.3977, + "step": 2624000 + }, + { + "epoch": 13.0, + "learning_rate": 4.3501015640869394e-05, + "loss": 2.374, + "step": 2624500 + }, + { + "epoch": 13.01, + "learning_rate": 4.3499779531616156e-05, + "loss": 2.3743, + "step": 2625000 + }, + { + "epoch": 13.01, + "learning_rate": 4.349854094519007e-05, + "loss": 2.3774, + "step": 2625500 + }, + { + "epoch": 13.01, + "learning_rate": 4.349730483593684e-05, + "loss": 2.3954, + "step": 2626000 + }, + { + "epoch": 13.01, + "learning_rate": 4.349606624951076e-05, + "loss": 2.3685, + "step": 2626500 + }, + { + "epoch": 13.02, + "learning_rate": 4.3494827663084675e-05, + "loss": 2.3846, + "step": 2627000 + }, + { + "epoch": 13.02, + "learning_rate": 4.349358907665859e-05, + "loss": 2.4169, + "step": 2627500 + }, + { + "epoch": 13.02, + "learning_rate": 4.349235049023251e-05, + "loss": 2.3881, + "step": 2628000 + }, + { + "epoch": 13.02, + "learning_rate": 4.3491111903806426e-05, + "loss": 2.3866, + "step": 2628500 + }, + { + "epoch": 13.02, + "learning_rate": 4.3489875794553195e-05, + "loss": 2.3958, + "step": 2629000 + }, + { + "epoch": 13.03, + "learning_rate": 4.3488639685299964e-05, + "loss": 2.3686, + "step": 2629500 + }, + { + "epoch": 13.03, + "learning_rate": 4.348740109887388e-05, + "loss": 2.3922, + "step": 2630000 + }, + { + "epoch": 13.03, + "learning_rate": 4.34861625124478e-05, + "loss": 2.3662, + "step": 2630500 + }, + { + "epoch": 13.03, + "learning_rate": 4.3484923926021714e-05, + "loss": 2.3793, + "step": 2631000 + }, + { + "epoch": 13.04, + "learning_rate": 4.3483685339595625e-05, + "loss": 2.3712, + "step": 2631500 + }, + { + "epoch": 13.04, + "learning_rate": 4.348244675316954e-05, + "loss": 2.4067, + "step": 2632000 + }, + { + "epoch": 13.04, + "learning_rate": 4.348120816674346e-05, + "loss": 2.3861, + "step": 2632500 + }, + { + "epoch": 13.04, + "learning_rate": 4.3479972057490234e-05, + "loss": 2.4153, + "step": 2633000 + }, + { + "epoch": 13.05, + "learning_rate": 4.347873347106415e-05, + "loss": 2.3681, + "step": 2633500 + }, + { + "epoch": 13.05, + "learning_rate": 4.347749488463807e-05, + "loss": 2.3843, + "step": 2634000 + }, + { + "epoch": 13.05, + "learning_rate": 4.347625629821198e-05, + "loss": 2.3793, + "step": 2634500 + }, + { + "epoch": 13.05, + "learning_rate": 4.3475017711785895e-05, + "loss": 2.3923, + "step": 2635000 + }, + { + "epoch": 13.06, + "learning_rate": 4.347377912535981e-05, + "loss": 2.3799, + "step": 2635500 + }, + { + "epoch": 13.06, + "learning_rate": 4.347254053893373e-05, + "loss": 2.3595, + "step": 2636000 + }, + { + "epoch": 13.06, + "learning_rate": 4.3471301952507646e-05, + "loss": 2.4064, + "step": 2636500 + }, + { + "epoch": 13.06, + "learning_rate": 4.3470063366081556e-05, + "loss": 2.4083, + "step": 2637000 + }, + { + "epoch": 13.07, + "learning_rate": 4.346882477965547e-05, + "loss": 2.3916, + "step": 2637500 + }, + { + "epoch": 13.07, + "learning_rate": 4.346758619322939e-05, + "loss": 2.372, + "step": 2638000 + }, + { + "epoch": 13.07, + "learning_rate": 4.346634760680331e-05, + "loss": 2.3781, + "step": 2638500 + }, + { + "epoch": 13.07, + "learning_rate": 4.3465109020377224e-05, + "loss": 2.408, + "step": 2639000 + }, + { + "epoch": 13.08, + "learning_rate": 4.346387043395114e-05, + "loss": 2.4136, + "step": 2639500 + }, + { + "epoch": 13.08, + "learning_rate": 4.346263432469791e-05, + "loss": 2.3621, + "step": 2640000 + }, + { + "epoch": 13.08, + "learning_rate": 4.3461398215444685e-05, + "loss": 2.3862, + "step": 2640500 + }, + { + "epoch": 13.08, + "learning_rate": 4.3460159629018595e-05, + "loss": 2.3855, + "step": 2641000 + }, + { + "epoch": 13.09, + "learning_rate": 4.345892104259251e-05, + "loss": 2.4046, + "step": 2641500 + }, + { + "epoch": 13.09, + "learning_rate": 4.345768245616643e-05, + "loss": 2.3887, + "step": 2642000 + }, + { + "epoch": 13.09, + "learning_rate": 4.3456443869740346e-05, + "loss": 2.3702, + "step": 2642500 + }, + { + "epoch": 13.09, + "learning_rate": 4.3455207760487115e-05, + "loss": 2.4123, + "step": 2643000 + }, + { + "epoch": 13.1, + "learning_rate": 4.345396917406103e-05, + "loss": 2.3679, + "step": 2643500 + }, + { + "epoch": 13.1, + "learning_rate": 4.345273058763494e-05, + "loss": 2.3806, + "step": 2644000 + }, + { + "epoch": 13.1, + "learning_rate": 4.345149200120886e-05, + "loss": 2.4071, + "step": 2644500 + }, + { + "epoch": 13.1, + "learning_rate": 4.3450253414782776e-05, + "loss": 2.3682, + "step": 2645000 + }, + { + "epoch": 13.11, + "learning_rate": 4.344901482835669e-05, + "loss": 2.375, + "step": 2645500 + }, + { + "epoch": 13.11, + "learning_rate": 4.344777624193061e-05, + "loss": 2.3903, + "step": 2646000 + }, + { + "epoch": 13.11, + "learning_rate": 4.3446537655504526e-05, + "loss": 2.3944, + "step": 2646500 + }, + { + "epoch": 13.11, + "learning_rate": 4.344529906907844e-05, + "loss": 2.3925, + "step": 2647000 + }, + { + "epoch": 13.12, + "learning_rate": 4.344406048265236e-05, + "loss": 2.3984, + "step": 2647500 + }, + { + "epoch": 13.12, + "learning_rate": 4.344282189622628e-05, + "loss": 2.4028, + "step": 2648000 + }, + { + "epoch": 13.12, + "learning_rate": 4.3441583309800194e-05, + "loss": 2.3867, + "step": 2648500 + }, + { + "epoch": 13.12, + "learning_rate": 4.344034472337411e-05, + "loss": 2.4064, + "step": 2649000 + }, + { + "epoch": 13.13, + "learning_rate": 4.343910613694803e-05, + "loss": 2.3914, + "step": 2649500 + }, + { + "epoch": 13.13, + "learning_rate": 4.34378700276948e-05, + "loss": 2.4027, + "step": 2650000 + }, + { + "epoch": 13.13, + "learning_rate": 4.343663391844156e-05, + "loss": 2.4052, + "step": 2650500 + }, + { + "epoch": 13.13, + "learning_rate": 4.3435395332015476e-05, + "loss": 2.3882, + "step": 2651000 + }, + { + "epoch": 13.14, + "learning_rate": 4.3434161699935096e-05, + "loss": 2.3821, + "step": 2651500 + }, + { + "epoch": 13.14, + "learning_rate": 4.3432923113509013e-05, + "loss": 2.4076, + "step": 2652000 + }, + { + "epoch": 13.14, + "learning_rate": 4.343168452708293e-05, + "loss": 2.3935, + "step": 2652500 + }, + { + "epoch": 13.14, + "learning_rate": 4.34304484178297e-05, + "loss": 2.3977, + "step": 2653000 + }, + { + "epoch": 13.15, + "learning_rate": 4.3429209831403616e-05, + "loss": 2.3899, + "step": 2653500 + }, + { + "epoch": 13.15, + "learning_rate": 4.342797124497753e-05, + "loss": 2.3781, + "step": 2654000 + }, + { + "epoch": 13.15, + "learning_rate": 4.342673265855145e-05, + "loss": 2.3914, + "step": 2654500 + }, + { + "epoch": 13.15, + "learning_rate": 4.342549407212537e-05, + "loss": 2.3926, + "step": 2655000 + }, + { + "epoch": 13.16, + "learning_rate": 4.3424255485699284e-05, + "loss": 2.4267, + "step": 2655500 + }, + { + "epoch": 13.16, + "learning_rate": 4.34230168992732e-05, + "loss": 2.3881, + "step": 2656000 + }, + { + "epoch": 13.16, + "learning_rate": 4.342177831284712e-05, + "loss": 2.3844, + "step": 2656500 + }, + { + "epoch": 13.16, + "learning_rate": 4.3420539726421035e-05, + "loss": 2.4005, + "step": 2657000 + }, + { + "epoch": 13.17, + "learning_rate": 4.341930113999495e-05, + "loss": 2.3819, + "step": 2657500 + }, + { + "epoch": 13.17, + "learning_rate": 4.341806255356887e-05, + "loss": 2.395, + "step": 2658000 + }, + { + "epoch": 13.17, + "learning_rate": 4.341682644431563e-05, + "loss": 2.371, + "step": 2658500 + }, + { + "epoch": 13.17, + "learning_rate": 4.341558785788955e-05, + "loss": 2.3832, + "step": 2659000 + }, + { + "epoch": 13.18, + "learning_rate": 4.3414349271463464e-05, + "loss": 2.391, + "step": 2659500 + }, + { + "epoch": 13.18, + "learning_rate": 4.341311068503738e-05, + "loss": 2.3821, + "step": 2660000 + }, + { + "epoch": 13.18, + "learning_rate": 4.34118720986113e-05, + "loss": 2.3738, + "step": 2660500 + }, + { + "epoch": 13.18, + "learning_rate": 4.3410633512185215e-05, + "loss": 2.398, + "step": 2661000 + }, + { + "epoch": 13.19, + "learning_rate": 4.340939492575913e-05, + "loss": 2.395, + "step": 2661500 + }, + { + "epoch": 13.19, + "learning_rate": 4.340815633933305e-05, + "loss": 2.3963, + "step": 2662000 + }, + { + "epoch": 13.19, + "learning_rate": 4.3406917752906966e-05, + "loss": 2.3994, + "step": 2662500 + }, + { + "epoch": 13.19, + "learning_rate": 4.3405679166480876e-05, + "loss": 2.3915, + "step": 2663000 + }, + { + "epoch": 13.2, + "learning_rate": 4.340444058005479e-05, + "loss": 2.4018, + "step": 2663500 + }, + { + "epoch": 13.2, + "learning_rate": 4.340320199362871e-05, + "loss": 2.3685, + "step": 2664000 + }, + { + "epoch": 13.2, + "learning_rate": 4.340196340720263e-05, + "loss": 2.4209, + "step": 2664500 + }, + { + "epoch": 13.2, + "learning_rate": 4.3400724820776544e-05, + "loss": 2.406, + "step": 2665000 + }, + { + "epoch": 13.21, + "learning_rate": 4.339948623435046e-05, + "loss": 2.3968, + "step": 2665500 + }, + { + "epoch": 13.21, + "learning_rate": 4.339824764792438e-05, + "loss": 2.3927, + "step": 2666000 + }, + { + "epoch": 13.21, + "learning_rate": 4.3397011538671146e-05, + "loss": 2.3701, + "step": 2666500 + }, + { + "epoch": 13.21, + "learning_rate": 4.339577295224506e-05, + "loss": 2.364, + "step": 2667000 + }, + { + "epoch": 13.22, + "learning_rate": 4.339453436581898e-05, + "loss": 2.3787, + "step": 2667500 + }, + { + "epoch": 13.22, + "learning_rate": 4.33932957793929e-05, + "loss": 2.3985, + "step": 2668000 + }, + { + "epoch": 13.22, + "learning_rate": 4.3392057192966814e-05, + "loss": 2.379, + "step": 2668500 + }, + { + "epoch": 13.22, + "learning_rate": 4.339081860654073e-05, + "loss": 2.3979, + "step": 2669000 + }, + { + "epoch": 13.23, + "learning_rate": 4.338958002011465e-05, + "loss": 2.3876, + "step": 2669500 + }, + { + "epoch": 13.23, + "learning_rate": 4.3388341433688565e-05, + "loss": 2.4065, + "step": 2670000 + }, + { + "epoch": 13.23, + "learning_rate": 4.338710284726248e-05, + "loss": 2.3983, + "step": 2670500 + }, + { + "epoch": 13.23, + "learning_rate": 4.338586426083639e-05, + "loss": 2.4001, + "step": 2671000 + }, + { + "epoch": 13.24, + "learning_rate": 4.338462567441031e-05, + "loss": 2.4228, + "step": 2671500 + }, + { + "epoch": 13.24, + "learning_rate": 4.338338956515708e-05, + "loss": 2.3995, + "step": 2672000 + }, + { + "epoch": 13.24, + "learning_rate": 4.3382153455903847e-05, + "loss": 2.3867, + "step": 2672500 + }, + { + "epoch": 13.24, + "learning_rate": 4.3380914869477763e-05, + "loss": 2.3989, + "step": 2673000 + }, + { + "epoch": 13.25, + "learning_rate": 4.337967628305168e-05, + "loss": 2.4051, + "step": 2673500 + }, + { + "epoch": 13.25, + "learning_rate": 4.33784376966256e-05, + "loss": 2.4195, + "step": 2674000 + }, + { + "epoch": 13.25, + "learning_rate": 4.3377199110199514e-05, + "loss": 2.4004, + "step": 2674500 + }, + { + "epoch": 13.25, + "learning_rate": 4.337596052377343e-05, + "loss": 2.3939, + "step": 2675000 + }, + { + "epoch": 13.26, + "learning_rate": 4.337472193734735e-05, + "loss": 2.3849, + "step": 2675500 + }, + { + "epoch": 13.26, + "learning_rate": 4.3373483350921265e-05, + "loss": 2.3963, + "step": 2676000 + }, + { + "epoch": 13.26, + "learning_rate": 4.337224476449518e-05, + "loss": 2.4051, + "step": 2676500 + }, + { + "epoch": 13.26, + "learning_rate": 4.3371008655241944e-05, + "loss": 2.3968, + "step": 2677000 + }, + { + "epoch": 13.27, + "learning_rate": 4.336977006881586e-05, + "loss": 2.4036, + "step": 2677500 + }, + { + "epoch": 13.27, + "learning_rate": 4.336853148238978e-05, + "loss": 2.4113, + "step": 2678000 + }, + { + "epoch": 13.27, + "learning_rate": 4.3367292895963695e-05, + "loss": 2.4127, + "step": 2678500 + }, + { + "epoch": 13.27, + "learning_rate": 4.336605430953761e-05, + "loss": 2.3876, + "step": 2679000 + }, + { + "epoch": 13.28, + "learning_rate": 4.336481572311153e-05, + "loss": 2.4134, + "step": 2679500 + }, + { + "epoch": 13.28, + "learning_rate": 4.33635796138583e-05, + "loss": 2.4021, + "step": 2680000 + }, + { + "epoch": 13.28, + "learning_rate": 4.3362343504605066e-05, + "loss": 2.401, + "step": 2680500 + }, + { + "epoch": 13.28, + "learning_rate": 4.336110491817898e-05, + "loss": 2.3873, + "step": 2681000 + }, + { + "epoch": 13.29, + "learning_rate": 4.335986633175289e-05, + "loss": 2.394, + "step": 2681500 + }, + { + "epoch": 13.29, + "learning_rate": 4.335863022249967e-05, + "loss": 2.4049, + "step": 2682000 + }, + { + "epoch": 13.29, + "learning_rate": 4.3357391636073586e-05, + "loss": 2.3955, + "step": 2682500 + }, + { + "epoch": 13.29, + "learning_rate": 4.33561530496475e-05, + "loss": 2.4062, + "step": 2683000 + }, + { + "epoch": 13.29, + "learning_rate": 4.335491446322142e-05, + "loss": 2.427, + "step": 2683500 + }, + { + "epoch": 13.3, + "learning_rate": 4.335367587679534e-05, + "loss": 2.4073, + "step": 2684000 + }, + { + "epoch": 13.3, + "learning_rate": 4.3352437290369254e-05, + "loss": 2.4097, + "step": 2684500 + }, + { + "epoch": 13.3, + "learning_rate": 4.3351198703943164e-05, + "loss": 2.3962, + "step": 2685000 + }, + { + "epoch": 13.3, + "learning_rate": 4.334996011751708e-05, + "loss": 2.3851, + "step": 2685500 + }, + { + "epoch": 13.31, + "learning_rate": 4.3348721531091e-05, + "loss": 2.4041, + "step": 2686000 + }, + { + "epoch": 13.31, + "learning_rate": 4.3347482944664914e-05, + "loss": 2.3944, + "step": 2686500 + }, + { + "epoch": 13.31, + "learning_rate": 4.334624435823883e-05, + "loss": 2.4004, + "step": 2687000 + }, + { + "epoch": 13.31, + "learning_rate": 4.334501072615845e-05, + "loss": 2.3767, + "step": 2687500 + }, + { + "epoch": 13.32, + "learning_rate": 4.334377213973237e-05, + "loss": 2.3833, + "step": 2688000 + }, + { + "epoch": 13.32, + "learning_rate": 4.3342533553306286e-05, + "loss": 2.3789, + "step": 2688500 + }, + { + "epoch": 13.32, + "learning_rate": 4.3341297444053055e-05, + "loss": 2.4131, + "step": 2689000 + }, + { + "epoch": 13.32, + "learning_rate": 4.334005885762697e-05, + "loss": 2.404, + "step": 2689500 + }, + { + "epoch": 13.33, + "learning_rate": 4.333882027120089e-05, + "loss": 2.4251, + "step": 2690000 + }, + { + "epoch": 13.33, + "learning_rate": 4.3337581684774806e-05, + "loss": 2.4047, + "step": 2690500 + }, + { + "epoch": 13.33, + "learning_rate": 4.333634309834872e-05, + "loss": 2.384, + "step": 2691000 + }, + { + "epoch": 13.33, + "learning_rate": 4.333510451192264e-05, + "loss": 2.4168, + "step": 2691500 + }, + { + "epoch": 13.34, + "learning_rate": 4.333386592549655e-05, + "loss": 2.3786, + "step": 2692000 + }, + { + "epoch": 13.34, + "learning_rate": 4.3332627339070467e-05, + "loss": 2.3896, + "step": 2692500 + }, + { + "epoch": 13.34, + "learning_rate": 4.3331388752644383e-05, + "loss": 2.3878, + "step": 2693000 + }, + { + "epoch": 13.34, + "learning_rate": 4.33301501662183e-05, + "loss": 2.3996, + "step": 2693500 + }, + { + "epoch": 13.35, + "learning_rate": 4.332891157979222e-05, + "loss": 2.4324, + "step": 2694000 + }, + { + "epoch": 13.35, + "learning_rate": 4.3327675470538986e-05, + "loss": 2.3555, + "step": 2694500 + }, + { + "epoch": 13.35, + "learning_rate": 4.33264368841129e-05, + "loss": 2.4025, + "step": 2695000 + }, + { + "epoch": 13.35, + "learning_rate": 4.332520077485967e-05, + "loss": 2.4122, + "step": 2695500 + }, + { + "epoch": 13.36, + "learning_rate": 4.332396218843359e-05, + "loss": 2.3975, + "step": 2696000 + }, + { + "epoch": 13.36, + "learning_rate": 4.3322723602007506e-05, + "loss": 2.3986, + "step": 2696500 + }, + { + "epoch": 13.36, + "learning_rate": 4.332148501558142e-05, + "loss": 2.378, + "step": 2697000 + }, + { + "epoch": 13.36, + "learning_rate": 4.332024642915534e-05, + "loss": 2.4197, + "step": 2697500 + }, + { + "epoch": 13.37, + "learning_rate": 4.3319007842729256e-05, + "loss": 2.4088, + "step": 2698000 + }, + { + "epoch": 13.37, + "learning_rate": 4.3317769256303173e-05, + "loss": 2.427, + "step": 2698500 + }, + { + "epoch": 13.37, + "learning_rate": 4.3316530669877084e-05, + "loss": 2.4063, + "step": 2699000 + }, + { + "epoch": 13.37, + "learning_rate": 4.3315292083451e-05, + "loss": 2.4056, + "step": 2699500 + }, + { + "epoch": 13.38, + "learning_rate": 4.331405349702492e-05, + "loss": 2.3971, + "step": 2700000 + }, + { + "epoch": 13.38, + "learning_rate": 4.3312814910598834e-05, + "loss": 2.4082, + "step": 2700500 + }, + { + "epoch": 13.38, + "learning_rate": 4.331157632417275e-05, + "loss": 2.401, + "step": 2701000 + }, + { + "epoch": 13.38, + "learning_rate": 4.331033773774666e-05, + "loss": 2.3885, + "step": 2701500 + }, + { + "epoch": 13.39, + "learning_rate": 4.330909915132058e-05, + "loss": 2.3773, + "step": 2702000 + }, + { + "epoch": 13.39, + "learning_rate": 4.3307860564894495e-05, + "loss": 2.441, + "step": 2702500 + }, + { + "epoch": 13.39, + "learning_rate": 4.330662197846841e-05, + "loss": 2.4094, + "step": 2703000 + }, + { + "epoch": 13.39, + "learning_rate": 4.330538339204233e-05, + "loss": 2.4056, + "step": 2703500 + }, + { + "epoch": 13.4, + "learning_rate": 4.3304144805616246e-05, + "loss": 2.4129, + "step": 2704000 + }, + { + "epoch": 13.4, + "learning_rate": 4.3302908696363015e-05, + "loss": 2.3796, + "step": 2704500 + }, + { + "epoch": 13.4, + "learning_rate": 4.330167010993693e-05, + "loss": 2.3743, + "step": 2705000 + }, + { + "epoch": 13.4, + "learning_rate": 4.330043152351085e-05, + "loss": 2.4091, + "step": 2705500 + }, + { + "epoch": 13.41, + "learning_rate": 4.329919789143047e-05, + "loss": 2.3977, + "step": 2706000 + }, + { + "epoch": 13.41, + "learning_rate": 4.3297959305004386e-05, + "loss": 2.3935, + "step": 2706500 + }, + { + "epoch": 13.41, + "learning_rate": 4.32967207185783e-05, + "loss": 2.4121, + "step": 2707000 + }, + { + "epoch": 13.41, + "learning_rate": 4.329548213215222e-05, + "loss": 2.4118, + "step": 2707500 + }, + { + "epoch": 13.42, + "learning_rate": 4.329424354572614e-05, + "loss": 2.4029, + "step": 2708000 + }, + { + "epoch": 13.42, + "learning_rate": 4.3293007436472906e-05, + "loss": 2.3758, + "step": 2708500 + }, + { + "epoch": 13.42, + "learning_rate": 4.329176885004682e-05, + "loss": 2.3919, + "step": 2709000 + }, + { + "epoch": 13.42, + "learning_rate": 4.329053026362074e-05, + "loss": 2.408, + "step": 2709500 + }, + { + "epoch": 13.43, + "learning_rate": 4.328929167719466e-05, + "loss": 2.3962, + "step": 2710000 + }, + { + "epoch": 13.43, + "learning_rate": 4.3288053090768574e-05, + "loss": 2.3936, + "step": 2710500 + }, + { + "epoch": 13.43, + "learning_rate": 4.328681450434249e-05, + "loss": 2.402, + "step": 2711000 + }, + { + "epoch": 13.43, + "learning_rate": 4.328557839508925e-05, + "loss": 2.3929, + "step": 2711500 + }, + { + "epoch": 13.44, + "learning_rate": 4.328433980866317e-05, + "loss": 2.3941, + "step": 2712000 + }, + { + "epoch": 13.44, + "learning_rate": 4.3283101222237086e-05, + "loss": 2.396, + "step": 2712500 + }, + { + "epoch": 13.44, + "learning_rate": 4.3281862635811003e-05, + "loss": 2.4242, + "step": 2713000 + }, + { + "epoch": 13.44, + "learning_rate": 4.328062404938492e-05, + "loss": 2.4014, + "step": 2713500 + }, + { + "epoch": 13.45, + "learning_rate": 4.327938546295884e-05, + "loss": 2.3839, + "step": 2714000 + }, + { + "epoch": 13.45, + "learning_rate": 4.3278146876532754e-05, + "loss": 2.4211, + "step": 2714500 + }, + { + "epoch": 13.45, + "learning_rate": 4.327691076727952e-05, + "loss": 2.3848, + "step": 2715000 + }, + { + "epoch": 13.45, + "learning_rate": 4.327567218085344e-05, + "loss": 2.4099, + "step": 2715500 + }, + { + "epoch": 13.46, + "learning_rate": 4.327443359442736e-05, + "loss": 2.4064, + "step": 2716000 + }, + { + "epoch": 13.46, + "learning_rate": 4.3273195008001274e-05, + "loss": 2.4105, + "step": 2716500 + }, + { + "epoch": 13.46, + "learning_rate": 4.327195642157519e-05, + "loss": 2.4022, + "step": 2717000 + }, + { + "epoch": 13.46, + "learning_rate": 4.327071783514911e-05, + "loss": 2.3729, + "step": 2717500 + }, + { + "epoch": 13.47, + "learning_rate": 4.3269479248723025e-05, + "loss": 2.4031, + "step": 2718000 + }, + { + "epoch": 13.47, + "learning_rate": 4.326824066229694e-05, + "loss": 2.3839, + "step": 2718500 + }, + { + "epoch": 13.47, + "learning_rate": 4.3267004553043704e-05, + "loss": 2.4294, + "step": 2719000 + }, + { + "epoch": 13.47, + "learning_rate": 4.326576596661762e-05, + "loss": 2.4108, + "step": 2719500 + }, + { + "epoch": 13.48, + "learning_rate": 4.326452738019154e-05, + "loss": 2.4364, + "step": 2720000 + }, + { + "epoch": 13.48, + "learning_rate": 4.3263288793765454e-05, + "loss": 2.4068, + "step": 2720500 + }, + { + "epoch": 13.48, + "learning_rate": 4.326205020733937e-05, + "loss": 2.4131, + "step": 2721000 + }, + { + "epoch": 13.48, + "learning_rate": 4.326081162091329e-05, + "loss": 2.3866, + "step": 2721500 + }, + { + "epoch": 13.49, + "learning_rate": 4.325957551166006e-05, + "loss": 2.388, + "step": 2722000 + }, + { + "epoch": 13.49, + "learning_rate": 4.3258336925233974e-05, + "loss": 2.3977, + "step": 2722500 + }, + { + "epoch": 13.49, + "learning_rate": 4.3257100815980736e-05, + "loss": 2.4153, + "step": 2723000 + }, + { + "epoch": 13.49, + "learning_rate": 4.325586222955465e-05, + "loss": 2.3835, + "step": 2723500 + }, + { + "epoch": 13.5, + "learning_rate": 4.325462364312857e-05, + "loss": 2.4049, + "step": 2724000 + }, + { + "epoch": 13.5, + "learning_rate": 4.325338505670249e-05, + "loss": 2.4043, + "step": 2724500 + }, + { + "epoch": 13.5, + "learning_rate": 4.3252146470276404e-05, + "loss": 2.4012, + "step": 2725000 + }, + { + "epoch": 13.5, + "learning_rate": 4.325090788385032e-05, + "loss": 2.3883, + "step": 2725500 + }, + { + "epoch": 13.51, + "learning_rate": 4.324966929742424e-05, + "loss": 2.4236, + "step": 2726000 + }, + { + "epoch": 13.51, + "learning_rate": 4.3248430710998154e-05, + "loss": 2.3839, + "step": 2726500 + }, + { + "epoch": 13.51, + "learning_rate": 4.324719212457207e-05, + "loss": 2.4157, + "step": 2727000 + }, + { + "epoch": 13.51, + "learning_rate": 4.324595353814599e-05, + "loss": 2.3927, + "step": 2727500 + }, + { + "epoch": 13.52, + "learning_rate": 4.324471742889276e-05, + "loss": 2.388, + "step": 2728000 + }, + { + "epoch": 13.52, + "learning_rate": 4.3243481319639526e-05, + "loss": 2.3996, + "step": 2728500 + }, + { + "epoch": 13.52, + "learning_rate": 4.3242242733213436e-05, + "loss": 2.4144, + "step": 2729000 + }, + { + "epoch": 13.52, + "learning_rate": 4.324100414678735e-05, + "loss": 2.4069, + "step": 2729500 + }, + { + "epoch": 13.53, + "learning_rate": 4.323976556036127e-05, + "loss": 2.4112, + "step": 2730000 + }, + { + "epoch": 13.53, + "learning_rate": 4.323852697393519e-05, + "loss": 2.4076, + "step": 2730500 + }, + { + "epoch": 13.53, + "learning_rate": 4.3237288387509104e-05, + "loss": 2.4015, + "step": 2731000 + }, + { + "epoch": 13.53, + "learning_rate": 4.323604980108302e-05, + "loss": 2.4073, + "step": 2731500 + }, + { + "epoch": 13.54, + "learning_rate": 4.323481369182979e-05, + "loss": 2.428, + "step": 2732000 + }, + { + "epoch": 13.54, + "learning_rate": 4.3233575105403706e-05, + "loss": 2.3869, + "step": 2732500 + }, + { + "epoch": 13.54, + "learning_rate": 4.323233651897762e-05, + "loss": 2.411, + "step": 2733000 + }, + { + "epoch": 13.54, + "learning_rate": 4.323109793255154e-05, + "loss": 2.3869, + "step": 2733500 + }, + { + "epoch": 13.55, + "learning_rate": 4.322985934612546e-05, + "loss": 2.4143, + "step": 2734000 + }, + { + "epoch": 13.55, + "learning_rate": 4.3228620759699374e-05, + "loss": 2.4027, + "step": 2734500 + }, + { + "epoch": 13.55, + "learning_rate": 4.322738217327329e-05, + "loss": 2.3911, + "step": 2735000 + }, + { + "epoch": 13.55, + "learning_rate": 4.322614606402006e-05, + "loss": 2.4308, + "step": 2735500 + }, + { + "epoch": 13.56, + "learning_rate": 4.322490747759397e-05, + "loss": 2.4117, + "step": 2736000 + }, + { + "epoch": 13.56, + "learning_rate": 4.322366889116789e-05, + "loss": 2.4093, + "step": 2736500 + }, + { + "epoch": 13.56, + "learning_rate": 4.3222430304741804e-05, + "loss": 2.372, + "step": 2737000 + }, + { + "epoch": 13.56, + "learning_rate": 4.322119171831572e-05, + "loss": 2.3907, + "step": 2737500 + }, + { + "epoch": 13.56, + "learning_rate": 4.321995313188964e-05, + "loss": 2.3867, + "step": 2738000 + }, + { + "epoch": 13.57, + "learning_rate": 4.3218714545463555e-05, + "loss": 2.3667, + "step": 2738500 + }, + { + "epoch": 13.57, + "learning_rate": 4.3217478436210323e-05, + "loss": 2.3809, + "step": 2739000 + }, + { + "epoch": 13.57, + "learning_rate": 4.321623984978424e-05, + "loss": 2.4062, + "step": 2739500 + }, + { + "epoch": 13.57, + "learning_rate": 4.321500126335816e-05, + "loss": 2.396, + "step": 2740000 + }, + { + "epoch": 13.58, + "learning_rate": 4.3213762676932074e-05, + "loss": 2.4295, + "step": 2740500 + }, + { + "epoch": 13.58, + "learning_rate": 4.321252409050599e-05, + "loss": 2.4223, + "step": 2741000 + }, + { + "epoch": 13.58, + "learning_rate": 4.321128550407991e-05, + "loss": 2.4312, + "step": 2741500 + }, + { + "epoch": 13.58, + "learning_rate": 4.3210046917653825e-05, + "loss": 2.408, + "step": 2742000 + }, + { + "epoch": 13.59, + "learning_rate": 4.320880833122774e-05, + "loss": 2.391, + "step": 2742500 + }, + { + "epoch": 13.59, + "learning_rate": 4.320756974480166e-05, + "loss": 2.4148, + "step": 2743000 + }, + { + "epoch": 13.59, + "learning_rate": 4.320633363554842e-05, + "loss": 2.3847, + "step": 2743500 + }, + { + "epoch": 13.59, + "learning_rate": 4.320509504912234e-05, + "loss": 2.4125, + "step": 2744000 + }, + { + "epoch": 13.6, + "learning_rate": 4.3203856462696255e-05, + "loss": 2.4068, + "step": 2744500 + }, + { + "epoch": 13.6, + "learning_rate": 4.320261787627017e-05, + "loss": 2.3831, + "step": 2745000 + }, + { + "epoch": 13.6, + "learning_rate": 4.320137928984409e-05, + "loss": 2.4067, + "step": 2745500 + }, + { + "epoch": 13.6, + "learning_rate": 4.3200140703418006e-05, + "loss": 2.3968, + "step": 2746000 + }, + { + "epoch": 13.61, + "learning_rate": 4.319890211699192e-05, + "loss": 2.4172, + "step": 2746500 + }, + { + "epoch": 13.61, + "learning_rate": 4.319766353056584e-05, + "loss": 2.4011, + "step": 2747000 + }, + { + "epoch": 13.61, + "learning_rate": 4.319642494413975e-05, + "loss": 2.4083, + "step": 2747500 + }, + { + "epoch": 13.61, + "learning_rate": 4.3195188834886525e-05, + "loss": 2.4248, + "step": 2748000 + }, + { + "epoch": 13.62, + "learning_rate": 4.319395024846044e-05, + "loss": 2.3911, + "step": 2748500 + }, + { + "epoch": 13.62, + "learning_rate": 4.319271166203436e-05, + "loss": 2.3895, + "step": 2749000 + }, + { + "epoch": 13.62, + "learning_rate": 4.3191473075608276e-05, + "loss": 2.4364, + "step": 2749500 + }, + { + "epoch": 13.62, + "learning_rate": 4.319023448918219e-05, + "loss": 2.4193, + "step": 2750000 + }, + { + "epoch": 13.63, + "learning_rate": 4.3188998379928955e-05, + "loss": 2.4033, + "step": 2750500 + }, + { + "epoch": 13.63, + "learning_rate": 4.318775979350287e-05, + "loss": 2.4104, + "step": 2751000 + }, + { + "epoch": 13.63, + "learning_rate": 4.318652120707679e-05, + "loss": 2.4298, + "step": 2751500 + }, + { + "epoch": 13.63, + "learning_rate": 4.3185282620650706e-05, + "loss": 2.3979, + "step": 2752000 + }, + { + "epoch": 13.64, + "learning_rate": 4.318404403422462e-05, + "loss": 2.39, + "step": 2752500 + }, + { + "epoch": 13.64, + "learning_rate": 4.318280792497139e-05, + "loss": 2.4094, + "step": 2753000 + }, + { + "epoch": 13.64, + "learning_rate": 4.318156933854531e-05, + "loss": 2.4306, + "step": 2753500 + }, + { + "epoch": 13.64, + "learning_rate": 4.3180330752119225e-05, + "loss": 2.4091, + "step": 2754000 + }, + { + "epoch": 13.65, + "learning_rate": 4.317909216569314e-05, + "loss": 2.4295, + "step": 2754500 + }, + { + "epoch": 13.65, + "learning_rate": 4.317785357926706e-05, + "loss": 2.4031, + "step": 2755000 + }, + { + "epoch": 13.65, + "learning_rate": 4.317661747001383e-05, + "loss": 2.4377, + "step": 2755500 + }, + { + "epoch": 13.65, + "learning_rate": 4.3175378883587745e-05, + "loss": 2.3773, + "step": 2756000 + }, + { + "epoch": 13.66, + "learning_rate": 4.3174140297161655e-05, + "loss": 2.4163, + "step": 2756500 + }, + { + "epoch": 13.66, + "learning_rate": 4.317290171073557e-05, + "loss": 2.4139, + "step": 2757000 + }, + { + "epoch": 13.66, + "learning_rate": 4.317166312430949e-05, + "loss": 2.4116, + "step": 2757500 + }, + { + "epoch": 13.66, + "learning_rate": 4.3170424537883406e-05, + "loss": 2.391, + "step": 2758000 + }, + { + "epoch": 13.67, + "learning_rate": 4.316918595145732e-05, + "loss": 2.4302, + "step": 2758500 + }, + { + "epoch": 13.67, + "learning_rate": 4.316794984220409e-05, + "loss": 2.4052, + "step": 2759000 + }, + { + "epoch": 13.67, + "learning_rate": 4.316671373295086e-05, + "loss": 2.4154, + "step": 2759500 + }, + { + "epoch": 13.67, + "learning_rate": 4.316547514652478e-05, + "loss": 2.3856, + "step": 2760000 + }, + { + "epoch": 13.68, + "learning_rate": 4.3164236560098694e-05, + "loss": 2.4001, + "step": 2760500 + }, + { + "epoch": 13.68, + "learning_rate": 4.316299797367261e-05, + "loss": 2.4079, + "step": 2761000 + }, + { + "epoch": 13.68, + "learning_rate": 4.316175938724653e-05, + "loss": 2.3876, + "step": 2761500 + }, + { + "epoch": 13.68, + "learning_rate": 4.3160520800820445e-05, + "loss": 2.4033, + "step": 2762000 + }, + { + "epoch": 13.69, + "learning_rate": 4.315928221439436e-05, + "loss": 2.4024, + "step": 2762500 + }, + { + "epoch": 13.69, + "learning_rate": 4.3158046105141124e-05, + "loss": 2.3802, + "step": 2763000 + }, + { + "epoch": 13.69, + "learning_rate": 4.315680751871504e-05, + "loss": 2.4104, + "step": 2763500 + }, + { + "epoch": 13.69, + "learning_rate": 4.315556893228896e-05, + "loss": 2.3841, + "step": 2764000 + }, + { + "epoch": 13.7, + "learning_rate": 4.315433282303573e-05, + "loss": 2.3901, + "step": 2764500 + }, + { + "epoch": 13.7, + "learning_rate": 4.3153094236609644e-05, + "loss": 2.4044, + "step": 2765000 + }, + { + "epoch": 13.7, + "learning_rate": 4.315185565018356e-05, + "loss": 2.4209, + "step": 2765500 + }, + { + "epoch": 13.7, + "learning_rate": 4.315061706375748e-05, + "loss": 2.3937, + "step": 2766000 + }, + { + "epoch": 13.71, + "learning_rate": 4.3149380954504246e-05, + "loss": 2.41, + "step": 2766500 + }, + { + "epoch": 13.71, + "learning_rate": 4.3148142368078156e-05, + "loss": 2.4149, + "step": 2767000 + }, + { + "epoch": 13.71, + "learning_rate": 4.314690378165207e-05, + "loss": 2.4024, + "step": 2767500 + }, + { + "epoch": 13.71, + "learning_rate": 4.314566519522599e-05, + "loss": 2.4355, + "step": 2768000 + }, + { + "epoch": 13.72, + "learning_rate": 4.314442660879991e-05, + "loss": 2.4085, + "step": 2768500 + }, + { + "epoch": 13.72, + "learning_rate": 4.3143188022373824e-05, + "loss": 2.3709, + "step": 2769000 + }, + { + "epoch": 13.72, + "learning_rate": 4.314194943594774e-05, + "loss": 2.397, + "step": 2769500 + }, + { + "epoch": 13.72, + "learning_rate": 4.314071084952166e-05, + "loss": 2.3901, + "step": 2770000 + }, + { + "epoch": 13.73, + "learning_rate": 4.3139472263095575e-05, + "loss": 2.4026, + "step": 2770500 + }, + { + "epoch": 13.73, + "learning_rate": 4.313823367666949e-05, + "loss": 2.4013, + "step": 2771000 + }, + { + "epoch": 13.73, + "learning_rate": 4.313699756741626e-05, + "loss": 2.3871, + "step": 2771500 + }, + { + "epoch": 13.73, + "learning_rate": 4.313575898099018e-05, + "loss": 2.4078, + "step": 2772000 + }, + { + "epoch": 13.74, + "learning_rate": 4.3134520394564094e-05, + "loss": 2.4151, + "step": 2772500 + }, + { + "epoch": 13.74, + "learning_rate": 4.313328180813801e-05, + "loss": 2.4024, + "step": 2773000 + }, + { + "epoch": 13.74, + "learning_rate": 4.313204322171193e-05, + "loss": 2.4182, + "step": 2773500 + }, + { + "epoch": 13.74, + "learning_rate": 4.3130804635285845e-05, + "loss": 2.3841, + "step": 2774000 + }, + { + "epoch": 13.75, + "learning_rate": 4.312956604885976e-05, + "loss": 2.3987, + "step": 2774500 + }, + { + "epoch": 13.75, + "learning_rate": 4.312832746243368e-05, + "loss": 2.3907, + "step": 2775000 + }, + { + "epoch": 13.75, + "learning_rate": 4.3127088876007596e-05, + "loss": 2.4168, + "step": 2775500 + }, + { + "epoch": 13.75, + "learning_rate": 4.312585028958151e-05, + "loss": 2.4074, + "step": 2776000 + }, + { + "epoch": 13.76, + "learning_rate": 4.312461170315542e-05, + "loss": 2.3894, + "step": 2776500 + }, + { + "epoch": 13.76, + "learning_rate": 4.312337311672934e-05, + "loss": 2.3936, + "step": 2777000 + }, + { + "epoch": 13.76, + "learning_rate": 4.312213453030326e-05, + "loss": 2.3887, + "step": 2777500 + }, + { + "epoch": 13.76, + "learning_rate": 4.3120895943877174e-05, + "loss": 2.4014, + "step": 2778000 + }, + { + "epoch": 13.77, + "learning_rate": 4.311965735745109e-05, + "loss": 2.4004, + "step": 2778500 + }, + { + "epoch": 13.77, + "learning_rate": 4.3118418771025e-05, + "loss": 2.4207, + "step": 2779000 + }, + { + "epoch": 13.77, + "learning_rate": 4.3117182661771777e-05, + "loss": 2.4038, + "step": 2779500 + }, + { + "epoch": 13.77, + "learning_rate": 4.3115944075345694e-05, + "loss": 2.417, + "step": 2780000 + }, + { + "epoch": 13.78, + "learning_rate": 4.311470548891961e-05, + "loss": 2.4107, + "step": 2780500 + }, + { + "epoch": 13.78, + "learning_rate": 4.311346690249353e-05, + "loss": 2.4136, + "step": 2781000 + }, + { + "epoch": 13.78, + "learning_rate": 4.3112228316067444e-05, + "loss": 2.3845, + "step": 2781500 + }, + { + "epoch": 13.78, + "learning_rate": 4.311099220681421e-05, + "loss": 2.43, + "step": 2782000 + }, + { + "epoch": 13.79, + "learning_rate": 4.3109756097560975e-05, + "loss": 2.4391, + "step": 2782500 + }, + { + "epoch": 13.79, + "learning_rate": 4.3108519988307744e-05, + "loss": 2.417, + "step": 2783000 + }, + { + "epoch": 13.79, + "learning_rate": 4.310728387905452e-05, + "loss": 2.4119, + "step": 2783500 + }, + { + "epoch": 13.79, + "learning_rate": 4.310604529262843e-05, + "loss": 2.4281, + "step": 2784000 + }, + { + "epoch": 13.8, + "learning_rate": 4.31048091833752e-05, + "loss": 2.4254, + "step": 2784500 + }, + { + "epoch": 13.8, + "learning_rate": 4.3103570596949115e-05, + "loss": 2.3977, + "step": 2785000 + }, + { + "epoch": 13.8, + "learning_rate": 4.310233201052303e-05, + "loss": 2.4255, + "step": 2785500 + }, + { + "epoch": 13.8, + "learning_rate": 4.310109342409695e-05, + "loss": 2.393, + "step": 2786000 + }, + { + "epoch": 13.81, + "learning_rate": 4.3099854837670866e-05, + "loss": 2.4038, + "step": 2786500 + }, + { + "epoch": 13.81, + "learning_rate": 4.309861625124478e-05, + "loss": 2.4045, + "step": 2787000 + }, + { + "epoch": 13.81, + "learning_rate": 4.30973776648187e-05, + "loss": 2.4029, + "step": 2787500 + }, + { + "epoch": 13.81, + "learning_rate": 4.309613907839262e-05, + "loss": 2.4214, + "step": 2788000 + }, + { + "epoch": 13.82, + "learning_rate": 4.3094902969139386e-05, + "loss": 2.3809, + "step": 2788500 + }, + { + "epoch": 13.82, + "learning_rate": 4.30936643827133e-05, + "loss": 2.402, + "step": 2789000 + }, + { + "epoch": 13.82, + "learning_rate": 4.309242579628722e-05, + "loss": 2.3671, + "step": 2789500 + }, + { + "epoch": 13.82, + "learning_rate": 4.3091187209861137e-05, + "loss": 2.391, + "step": 2790000 + }, + { + "epoch": 13.83, + "learning_rate": 4.30899511006079e-05, + "loss": 2.4029, + "step": 2790500 + }, + { + "epoch": 13.83, + "learning_rate": 4.3088712514181816e-05, + "loss": 2.3973, + "step": 2791000 + }, + { + "epoch": 13.83, + "learning_rate": 4.308747392775573e-05, + "loss": 2.3823, + "step": 2791500 + }, + { + "epoch": 13.83, + "learning_rate": 4.308623534132965e-05, + "loss": 2.4324, + "step": 2792000 + }, + { + "epoch": 13.84, + "learning_rate": 4.3084996754903566e-05, + "loss": 2.4068, + "step": 2792500 + }, + { + "epoch": 13.84, + "learning_rate": 4.308375816847748e-05, + "loss": 2.4183, + "step": 2793000 + }, + { + "epoch": 13.84, + "learning_rate": 4.30825195820514e-05, + "loss": 2.4176, + "step": 2793500 + }, + { + "epoch": 13.84, + "learning_rate": 4.308128099562532e-05, + "loss": 2.41, + "step": 2794000 + }, + { + "epoch": 13.84, + "learning_rate": 4.3080042409199234e-05, + "loss": 2.3977, + "step": 2794500 + }, + { + "epoch": 13.85, + "learning_rate": 4.3078806299946e-05, + "loss": 2.3854, + "step": 2795000 + }, + { + "epoch": 13.85, + "learning_rate": 4.3077570190692765e-05, + "loss": 2.4014, + "step": 2795500 + }, + { + "epoch": 13.85, + "learning_rate": 4.307633160426668e-05, + "loss": 2.432, + "step": 2796000 + }, + { + "epoch": 13.85, + "learning_rate": 4.30750930178406e-05, + "loss": 2.4016, + "step": 2796500 + }, + { + "epoch": 13.86, + "learning_rate": 4.3073854431414516e-05, + "loss": 2.4061, + "step": 2797000 + }, + { + "epoch": 13.86, + "learning_rate": 4.307261584498843e-05, + "loss": 2.3972, + "step": 2797500 + }, + { + "epoch": 13.86, + "learning_rate": 4.307137725856235e-05, + "loss": 2.3841, + "step": 2798000 + }, + { + "epoch": 13.86, + "learning_rate": 4.307014114930912e-05, + "loss": 2.3905, + "step": 2798500 + }, + { + "epoch": 13.87, + "learning_rate": 4.3068902562883035e-05, + "loss": 2.412, + "step": 2799000 + }, + { + "epoch": 13.87, + "learning_rate": 4.306766397645695e-05, + "loss": 2.3943, + "step": 2799500 + }, + { + "epoch": 13.87, + "learning_rate": 4.306642539003087e-05, + "loss": 2.3898, + "step": 2800000 + }, + { + "epoch": 13.87, + "learning_rate": 4.3065186803604786e-05, + "loss": 2.3945, + "step": 2800500 + }, + { + "epoch": 13.88, + "learning_rate": 4.30639482171787e-05, + "loss": 2.4051, + "step": 2801000 + }, + { + "epoch": 13.88, + "learning_rate": 4.306270963075262e-05, + "loss": 2.3813, + "step": 2801500 + }, + { + "epoch": 13.88, + "learning_rate": 4.306147104432654e-05, + "loss": 2.3979, + "step": 2802000 + }, + { + "epoch": 13.88, + "learning_rate": 4.3060232457900454e-05, + "loss": 2.4034, + "step": 2802500 + }, + { + "epoch": 13.89, + "learning_rate": 4.305899387147437e-05, + "loss": 2.399, + "step": 2803000 + }, + { + "epoch": 13.89, + "learning_rate": 4.305775528504829e-05, + "loss": 2.3926, + "step": 2803500 + }, + { + "epoch": 13.89, + "learning_rate": 4.3056516698622205e-05, + "loss": 2.3909, + "step": 2804000 + }, + { + "epoch": 13.89, + "learning_rate": 4.3055278112196115e-05, + "loss": 2.4082, + "step": 2804500 + }, + { + "epoch": 13.9, + "learning_rate": 4.305403952577003e-05, + "loss": 2.3697, + "step": 2805000 + }, + { + "epoch": 13.9, + "learning_rate": 4.30528034165168e-05, + "loss": 2.4134, + "step": 2805500 + }, + { + "epoch": 13.9, + "learning_rate": 4.305156483009072e-05, + "loss": 2.3691, + "step": 2806000 + }, + { + "epoch": 13.9, + "learning_rate": 4.3050326243664634e-05, + "loss": 2.388, + "step": 2806500 + }, + { + "epoch": 13.91, + "learning_rate": 4.304908765723855e-05, + "loss": 2.4274, + "step": 2807000 + }, + { + "epoch": 13.91, + "learning_rate": 4.304784907081246e-05, + "loss": 2.4141, + "step": 2807500 + }, + { + "epoch": 13.91, + "learning_rate": 4.304661048438638e-05, + "loss": 2.3835, + "step": 2808000 + }, + { + "epoch": 13.91, + "learning_rate": 4.3045371897960295e-05, + "loss": 2.4, + "step": 2808500 + }, + { + "epoch": 13.92, + "learning_rate": 4.304413331153421e-05, + "loss": 2.4225, + "step": 2809000 + }, + { + "epoch": 13.92, + "learning_rate": 4.304289472510813e-05, + "loss": 2.3942, + "step": 2809500 + }, + { + "epoch": 13.92, + "learning_rate": 4.3041658615854905e-05, + "loss": 2.3977, + "step": 2810000 + }, + { + "epoch": 13.92, + "learning_rate": 4.304042250660167e-05, + "loss": 2.4204, + "step": 2810500 + }, + { + "epoch": 13.93, + "learning_rate": 4.3039183920175584e-05, + "loss": 2.4132, + "step": 2811000 + }, + { + "epoch": 13.93, + "learning_rate": 4.30379453337495e-05, + "loss": 2.4255, + "step": 2811500 + }, + { + "epoch": 13.93, + "learning_rate": 4.303670674732342e-05, + "loss": 2.4183, + "step": 2812000 + }, + { + "epoch": 13.93, + "learning_rate": 4.3035468160897334e-05, + "loss": 2.3954, + "step": 2812500 + }, + { + "epoch": 13.94, + "learning_rate": 4.30342320516441e-05, + "loss": 2.4053, + "step": 2813000 + }, + { + "epoch": 13.94, + "learning_rate": 4.303299594239087e-05, + "loss": 2.3931, + "step": 2813500 + }, + { + "epoch": 13.94, + "learning_rate": 4.303175735596479e-05, + "loss": 2.4367, + "step": 2814000 + }, + { + "epoch": 13.94, + "learning_rate": 4.30305187695387e-05, + "loss": 2.4039, + "step": 2814500 + }, + { + "epoch": 13.95, + "learning_rate": 4.3029280183112616e-05, + "loss": 2.3991, + "step": 2815000 + }, + { + "epoch": 13.95, + "learning_rate": 4.302804159668653e-05, + "loss": 2.4093, + "step": 2815500 + }, + { + "epoch": 13.95, + "learning_rate": 4.302680301026045e-05, + "loss": 2.3957, + "step": 2816000 + }, + { + "epoch": 13.95, + "learning_rate": 4.302556442383437e-05, + "loss": 2.4145, + "step": 2816500 + }, + { + "epoch": 13.96, + "learning_rate": 4.3024325837408284e-05, + "loss": 2.4256, + "step": 2817000 + }, + { + "epoch": 13.96, + "learning_rate": 4.30230872509822e-05, + "loss": 2.412, + "step": 2817500 + }, + { + "epoch": 13.96, + "learning_rate": 4.302184866455612e-05, + "loss": 2.412, + "step": 2818000 + }, + { + "epoch": 13.96, + "learning_rate": 4.3020612555302886e-05, + "loss": 2.4011, + "step": 2818500 + }, + { + "epoch": 13.97, + "learning_rate": 4.3019376446049655e-05, + "loss": 2.4118, + "step": 2819000 + }, + { + "epoch": 13.97, + "learning_rate": 4.301813785962357e-05, + "loss": 2.4149, + "step": 2819500 + }, + { + "epoch": 13.97, + "learning_rate": 4.301689927319749e-05, + "loss": 2.4283, + "step": 2820000 + }, + { + "epoch": 13.97, + "learning_rate": 4.3015660686771406e-05, + "loss": 2.3776, + "step": 2820500 + }, + { + "epoch": 13.98, + "learning_rate": 4.301442457751817e-05, + "loss": 2.4164, + "step": 2821000 + }, + { + "epoch": 13.98, + "learning_rate": 4.3013185991092085e-05, + "loss": 2.3833, + "step": 2821500 + }, + { + "epoch": 13.98, + "learning_rate": 4.3011947404666e-05, + "loss": 2.384, + "step": 2822000 + }, + { + "epoch": 13.98, + "learning_rate": 4.301070881823992e-05, + "loss": 2.4177, + "step": 2822500 + }, + { + "epoch": 13.99, + "learning_rate": 4.3009470231813836e-05, + "loss": 2.4214, + "step": 2823000 + }, + { + "epoch": 13.99, + "learning_rate": 4.300823164538775e-05, + "loss": 2.4005, + "step": 2823500 + }, + { + "epoch": 13.99, + "learning_rate": 4.300699305896167e-05, + "loss": 2.3818, + "step": 2824000 + }, + { + "epoch": 13.99, + "learning_rate": 4.3005754472535587e-05, + "loss": 2.3987, + "step": 2824500 + }, + { + "epoch": 14.0, + "learning_rate": 4.3004515886109503e-05, + "loss": 2.4102, + "step": 2825000 + }, + { + "epoch": 14.0, + "learning_rate": 4.300327729968342e-05, + "loss": 2.4001, + "step": 2825500 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.6461582624983472, + "eval_accuracy_mlm": 0.5998228536990704, + "eval_accuracy_nsp": 0.8650881122062763, + "eval_loss": 2.389559268951416, + "eval_runtime": 145.7903, + "eval_samples_per_second": 1748.807, + "eval_steps_per_second": 72.872, + "step": 2825802 + }, + { + "epoch": 14.0, + "learning_rate": 4.300204119043019e-05, + "loss": 2.3643, + "step": 2826000 + }, + { + "epoch": 14.0, + "learning_rate": 4.3000802604004106e-05, + "loss": 2.3761, + "step": 2826500 + }, + { + "epoch": 14.01, + "learning_rate": 4.299956401757802e-05, + "loss": 2.3749, + "step": 2827000 + }, + { + "epoch": 14.01, + "learning_rate": 4.299832543115194e-05, + "loss": 2.3892, + "step": 2827500 + }, + { + "epoch": 14.01, + "learning_rate": 4.299708684472585e-05, + "loss": 2.3667, + "step": 2828000 + }, + { + "epoch": 14.01, + "learning_rate": 4.299585073547262e-05, + "loss": 2.364, + "step": 2828500 + }, + { + "epoch": 14.02, + "learning_rate": 4.2994612149046536e-05, + "loss": 2.3782, + "step": 2829000 + }, + { + "epoch": 14.02, + "learning_rate": 4.299337356262045e-05, + "loss": 2.371, + "step": 2829500 + }, + { + "epoch": 14.02, + "learning_rate": 4.299213497619437e-05, + "loss": 2.349, + "step": 2830000 + }, + { + "epoch": 14.02, + "learning_rate": 4.299089638976829e-05, + "loss": 2.3903, + "step": 2830500 + }, + { + "epoch": 14.03, + "learning_rate": 4.2989657803342204e-05, + "loss": 2.374, + "step": 2831000 + }, + { + "epoch": 14.03, + "learning_rate": 4.298842169408897e-05, + "loss": 2.378, + "step": 2831500 + }, + { + "epoch": 14.03, + "learning_rate": 4.298718310766289e-05, + "loss": 2.3867, + "step": 2832000 + }, + { + "epoch": 14.03, + "learning_rate": 4.2985944521236806e-05, + "loss": 2.366, + "step": 2832500 + }, + { + "epoch": 14.04, + "learning_rate": 4.298470593481072e-05, + "loss": 2.3584, + "step": 2833000 + }, + { + "epoch": 14.04, + "learning_rate": 4.298346734838464e-05, + "loss": 2.3585, + "step": 2833500 + }, + { + "epoch": 14.04, + "learning_rate": 4.298222876195856e-05, + "loss": 2.3845, + "step": 2834000 + }, + { + "epoch": 14.04, + "learning_rate": 4.2980990175532474e-05, + "loss": 2.3867, + "step": 2834500 + }, + { + "epoch": 14.05, + "learning_rate": 4.2979751589106384e-05, + "loss": 2.3844, + "step": 2835000 + }, + { + "epoch": 14.05, + "learning_rate": 4.29785130026803e-05, + "loss": 2.3945, + "step": 2835500 + }, + { + "epoch": 14.05, + "learning_rate": 4.297727689342707e-05, + "loss": 2.3819, + "step": 2836000 + }, + { + "epoch": 14.05, + "learning_rate": 4.297603830700099e-05, + "loss": 2.3963, + "step": 2836500 + }, + { + "epoch": 14.06, + "learning_rate": 4.2974799720574904e-05, + "loss": 2.3649, + "step": 2837000 + }, + { + "epoch": 14.06, + "learning_rate": 4.297356113414882e-05, + "loss": 2.3759, + "step": 2837500 + }, + { + "epoch": 14.06, + "learning_rate": 4.297232254772274e-05, + "loss": 2.3773, + "step": 2838000 + }, + { + "epoch": 14.06, + "learning_rate": 4.2971083961296655e-05, + "loss": 2.3649, + "step": 2838500 + }, + { + "epoch": 14.07, + "learning_rate": 4.296984785204342e-05, + "loss": 2.3764, + "step": 2839000 + }, + { + "epoch": 14.07, + "learning_rate": 4.296860926561734e-05, + "loss": 2.3793, + "step": 2839500 + }, + { + "epoch": 14.07, + "learning_rate": 4.29673731563641e-05, + "loss": 2.4066, + "step": 2840000 + }, + { + "epoch": 14.07, + "learning_rate": 4.296613704711088e-05, + "loss": 2.377, + "step": 2840500 + }, + { + "epoch": 14.08, + "learning_rate": 4.2964898460684795e-05, + "loss": 2.3808, + "step": 2841000 + }, + { + "epoch": 14.08, + "learning_rate": 4.296365987425871e-05, + "loss": 2.4138, + "step": 2841500 + }, + { + "epoch": 14.08, + "learning_rate": 4.296242128783263e-05, + "loss": 2.3763, + "step": 2842000 + }, + { + "epoch": 14.08, + "learning_rate": 4.296118270140654e-05, + "loss": 2.3747, + "step": 2842500 + }, + { + "epoch": 14.09, + "learning_rate": 4.2959944114980456e-05, + "loss": 2.3829, + "step": 2843000 + }, + { + "epoch": 14.09, + "learning_rate": 4.295870552855437e-05, + "loss": 2.4047, + "step": 2843500 + }, + { + "epoch": 14.09, + "learning_rate": 4.295746694212829e-05, + "loss": 2.3814, + "step": 2844000 + }, + { + "epoch": 14.09, + "learning_rate": 4.2956228355702207e-05, + "loss": 2.387, + "step": 2844500 + }, + { + "epoch": 14.1, + "learning_rate": 4.2954989769276123e-05, + "loss": 2.3661, + "step": 2845000 + }, + { + "epoch": 14.1, + "learning_rate": 4.295375118285004e-05, + "loss": 2.4036, + "step": 2845500 + }, + { + "epoch": 14.1, + "learning_rate": 4.29525150735968e-05, + "loss": 2.3828, + "step": 2846000 + }, + { + "epoch": 14.1, + "learning_rate": 4.295127648717072e-05, + "loss": 2.3651, + "step": 2846500 + }, + { + "epoch": 14.11, + "learning_rate": 4.2950037900744636e-05, + "loss": 2.3889, + "step": 2847000 + }, + { + "epoch": 14.11, + "learning_rate": 4.294879931431855e-05, + "loss": 2.3733, + "step": 2847500 + }, + { + "epoch": 14.11, + "learning_rate": 4.294756072789247e-05, + "loss": 2.3593, + "step": 2848000 + }, + { + "epoch": 14.11, + "learning_rate": 4.294632214146639e-05, + "loss": 2.3742, + "step": 2848500 + }, + { + "epoch": 14.11, + "learning_rate": 4.2945083555040304e-05, + "loss": 2.3745, + "step": 2849000 + }, + { + "epoch": 14.12, + "learning_rate": 4.294384496861422e-05, + "loss": 2.3674, + "step": 2849500 + }, + { + "epoch": 14.12, + "learning_rate": 4.294260638218814e-05, + "loss": 2.3607, + "step": 2850000 + }, + { + "epoch": 14.12, + "learning_rate": 4.294137027293491e-05, + "loss": 2.383, + "step": 2850500 + }, + { + "epoch": 14.12, + "learning_rate": 4.2940131686508824e-05, + "loss": 2.3761, + "step": 2851000 + }, + { + "epoch": 14.13, + "learning_rate": 4.293889310008274e-05, + "loss": 2.3966, + "step": 2851500 + }, + { + "epoch": 14.13, + "learning_rate": 4.293765451365666e-05, + "loss": 2.3912, + "step": 2852000 + }, + { + "epoch": 14.13, + "learning_rate": 4.293641840440342e-05, + "loss": 2.3719, + "step": 2852500 + }, + { + "epoch": 14.13, + "learning_rate": 4.2935179817977336e-05, + "loss": 2.3608, + "step": 2853000 + }, + { + "epoch": 14.14, + "learning_rate": 4.293394123155125e-05, + "loss": 2.3762, + "step": 2853500 + }, + { + "epoch": 14.14, + "learning_rate": 4.293270264512517e-05, + "loss": 2.3916, + "step": 2854000 + }, + { + "epoch": 14.14, + "learning_rate": 4.293146405869909e-05, + "loss": 2.3773, + "step": 2854500 + }, + { + "epoch": 14.14, + "learning_rate": 4.2930225472273004e-05, + "loss": 2.3872, + "step": 2855000 + }, + { + "epoch": 14.15, + "learning_rate": 4.292898688584692e-05, + "loss": 2.3776, + "step": 2855500 + }, + { + "epoch": 14.15, + "learning_rate": 4.292775077659369e-05, + "loss": 2.4074, + "step": 2856000 + }, + { + "epoch": 14.15, + "learning_rate": 4.292651219016761e-05, + "loss": 2.379, + "step": 2856500 + }, + { + "epoch": 14.15, + "learning_rate": 4.2925273603741524e-05, + "loss": 2.3803, + "step": 2857000 + }, + { + "epoch": 14.16, + "learning_rate": 4.292403501731544e-05, + "loss": 2.3807, + "step": 2857500 + }, + { + "epoch": 14.16, + "learning_rate": 4.292279643088936e-05, + "loss": 2.3591, + "step": 2858000 + }, + { + "epoch": 14.16, + "learning_rate": 4.2921557844463274e-05, + "loss": 2.3725, + "step": 2858500 + }, + { + "epoch": 14.16, + "learning_rate": 4.292031925803719e-05, + "loss": 2.3891, + "step": 2859000 + }, + { + "epoch": 14.17, + "learning_rate": 4.291908067161111e-05, + "loss": 2.3872, + "step": 2859500 + }, + { + "epoch": 14.17, + "learning_rate": 4.291784456235787e-05, + "loss": 2.3672, + "step": 2860000 + }, + { + "epoch": 14.17, + "learning_rate": 4.2916608453104646e-05, + "loss": 2.3655, + "step": 2860500 + }, + { + "epoch": 14.17, + "learning_rate": 4.2915369866678556e-05, + "loss": 2.403, + "step": 2861000 + }, + { + "epoch": 14.18, + "learning_rate": 4.291413375742533e-05, + "loss": 2.4014, + "step": 2861500 + }, + { + "epoch": 14.18, + "learning_rate": 4.291289517099925e-05, + "loss": 2.3719, + "step": 2862000 + }, + { + "epoch": 14.18, + "learning_rate": 4.2911656584573166e-05, + "loss": 2.377, + "step": 2862500 + }, + { + "epoch": 14.18, + "learning_rate": 4.2910417998147076e-05, + "loss": 2.3801, + "step": 2863000 + }, + { + "epoch": 14.19, + "learning_rate": 4.290917941172099e-05, + "loss": 2.3656, + "step": 2863500 + }, + { + "epoch": 14.19, + "learning_rate": 4.290794082529491e-05, + "loss": 2.4083, + "step": 2864000 + }, + { + "epoch": 14.19, + "learning_rate": 4.2906702238868826e-05, + "loss": 2.3794, + "step": 2864500 + }, + { + "epoch": 14.19, + "learning_rate": 4.2905463652442743e-05, + "loss": 2.3929, + "step": 2865000 + }, + { + "epoch": 14.2, + "learning_rate": 4.2904225066016654e-05, + "loss": 2.3909, + "step": 2865500 + }, + { + "epoch": 14.2, + "learning_rate": 4.290298647959057e-05, + "loss": 2.4093, + "step": 2866000 + }, + { + "epoch": 14.2, + "learning_rate": 4.290174789316449e-05, + "loss": 2.3792, + "step": 2866500 + }, + { + "epoch": 14.2, + "learning_rate": 4.2900509306738404e-05, + "loss": 2.3973, + "step": 2867000 + }, + { + "epoch": 14.21, + "learning_rate": 4.289927072031232e-05, + "loss": 2.3682, + "step": 2867500 + }, + { + "epoch": 14.21, + "learning_rate": 4.289803213388624e-05, + "loss": 2.3672, + "step": 2868000 + }, + { + "epoch": 14.21, + "learning_rate": 4.2896793547460155e-05, + "loss": 2.3825, + "step": 2868500 + }, + { + "epoch": 14.21, + "learning_rate": 4.289555496103407e-05, + "loss": 2.3766, + "step": 2869000 + }, + { + "epoch": 14.22, + "learning_rate": 4.289431885178084e-05, + "loss": 2.3758, + "step": 2869500 + }, + { + "epoch": 14.22, + "learning_rate": 4.289308026535476e-05, + "loss": 2.37, + "step": 2870000 + }, + { + "epoch": 14.22, + "learning_rate": 4.2891844156101527e-05, + "loss": 2.3608, + "step": 2870500 + }, + { + "epoch": 14.22, + "learning_rate": 4.2890605569675444e-05, + "loss": 2.3819, + "step": 2871000 + }, + { + "epoch": 14.23, + "learning_rate": 4.288936698324936e-05, + "loss": 2.3681, + "step": 2871500 + }, + { + "epoch": 14.23, + "learning_rate": 4.288813087399613e-05, + "loss": 2.3852, + "step": 2872000 + }, + { + "epoch": 14.23, + "learning_rate": 4.2886892287570046e-05, + "loss": 2.3943, + "step": 2872500 + }, + { + "epoch": 14.23, + "learning_rate": 4.288565370114396e-05, + "loss": 2.3918, + "step": 2873000 + }, + { + "epoch": 14.24, + "learning_rate": 4.288441511471787e-05, + "loss": 2.363, + "step": 2873500 + }, + { + "epoch": 14.24, + "learning_rate": 4.288317652829179e-05, + "loss": 2.3689, + "step": 2874000 + }, + { + "epoch": 14.24, + "learning_rate": 4.2881940419038566e-05, + "loss": 2.3561, + "step": 2874500 + }, + { + "epoch": 14.24, + "learning_rate": 4.288070183261248e-05, + "loss": 2.3556, + "step": 2875000 + }, + { + "epoch": 14.25, + "learning_rate": 4.28794632461864e-05, + "loss": 2.3698, + "step": 2875500 + }, + { + "epoch": 14.25, + "learning_rate": 4.2878224659760317e-05, + "loss": 2.3819, + "step": 2876000 + }, + { + "epoch": 14.25, + "learning_rate": 4.287698607333423e-05, + "loss": 2.3529, + "step": 2876500 + }, + { + "epoch": 14.25, + "learning_rate": 4.2875747486908144e-05, + "loss": 2.3704, + "step": 2877000 + }, + { + "epoch": 14.26, + "learning_rate": 4.287450890048206e-05, + "loss": 2.399, + "step": 2877500 + }, + { + "epoch": 14.26, + "learning_rate": 4.287327031405598e-05, + "loss": 2.3887, + "step": 2878000 + }, + { + "epoch": 14.26, + "learning_rate": 4.2872031727629894e-05, + "loss": 2.3905, + "step": 2878500 + }, + { + "epoch": 14.26, + "learning_rate": 4.2870793141203805e-05, + "loss": 2.3566, + "step": 2879000 + }, + { + "epoch": 14.27, + "learning_rate": 4.286955703195058e-05, + "loss": 2.4, + "step": 2879500 + }, + { + "epoch": 14.27, + "learning_rate": 4.286831844552449e-05, + "loss": 2.3926, + "step": 2880000 + }, + { + "epoch": 14.27, + "learning_rate": 4.286707985909841e-05, + "loss": 2.3815, + "step": 2880500 + }, + { + "epoch": 14.27, + "learning_rate": 4.2865841272672324e-05, + "loss": 2.369, + "step": 2881000 + }, + { + "epoch": 14.28, + "learning_rate": 4.286460268624624e-05, + "loss": 2.3836, + "step": 2881500 + }, + { + "epoch": 14.28, + "learning_rate": 4.286336409982016e-05, + "loss": 2.4035, + "step": 2882000 + }, + { + "epoch": 14.28, + "learning_rate": 4.2862127990566934e-05, + "loss": 2.3799, + "step": 2882500 + }, + { + "epoch": 14.28, + "learning_rate": 4.2860889404140844e-05, + "loss": 2.4322, + "step": 2883000 + }, + { + "epoch": 14.29, + "learning_rate": 4.285965081771476e-05, + "loss": 2.4124, + "step": 2883500 + }, + { + "epoch": 14.29, + "learning_rate": 4.285841223128868e-05, + "loss": 2.3728, + "step": 2884000 + }, + { + "epoch": 14.29, + "learning_rate": 4.2857173644862595e-05, + "loss": 2.3791, + "step": 2884500 + }, + { + "epoch": 14.29, + "learning_rate": 4.285593505843651e-05, + "loss": 2.3921, + "step": 2885000 + }, + { + "epoch": 14.3, + "learning_rate": 4.285469647201042e-05, + "loss": 2.3687, + "step": 2885500 + }, + { + "epoch": 14.3, + "learning_rate": 4.285346036275719e-05, + "loss": 2.3897, + "step": 2886000 + }, + { + "epoch": 14.3, + "learning_rate": 4.285222177633111e-05, + "loss": 2.3735, + "step": 2886500 + }, + { + "epoch": 14.3, + "learning_rate": 4.2850983189905024e-05, + "loss": 2.3695, + "step": 2887000 + }, + { + "epoch": 14.31, + "learning_rate": 4.284974460347894e-05, + "loss": 2.3833, + "step": 2887500 + }, + { + "epoch": 14.31, + "learning_rate": 4.284850601705286e-05, + "loss": 2.3874, + "step": 2888000 + }, + { + "epoch": 14.31, + "learning_rate": 4.2847267430626775e-05, + "loss": 2.4175, + "step": 2888500 + }, + { + "epoch": 14.31, + "learning_rate": 4.284603132137355e-05, + "loss": 2.3706, + "step": 2889000 + }, + { + "epoch": 14.32, + "learning_rate": 4.2844797689293165e-05, + "loss": 2.3967, + "step": 2889500 + }, + { + "epoch": 14.32, + "learning_rate": 4.2843561580039933e-05, + "loss": 2.3934, + "step": 2890000 + }, + { + "epoch": 14.32, + "learning_rate": 4.284232299361385e-05, + "loss": 2.3733, + "step": 2890500 + }, + { + "epoch": 14.32, + "learning_rate": 4.284108440718777e-05, + "loss": 2.3728, + "step": 2891000 + }, + { + "epoch": 14.33, + "learning_rate": 4.2839845820761684e-05, + "loss": 2.3741, + "step": 2891500 + }, + { + "epoch": 14.33, + "learning_rate": 4.28386072343356e-05, + "loss": 2.406, + "step": 2892000 + }, + { + "epoch": 14.33, + "learning_rate": 4.283736864790952e-05, + "loss": 2.3878, + "step": 2892500 + }, + { + "epoch": 14.33, + "learning_rate": 4.283613006148343e-05, + "loss": 2.3857, + "step": 2893000 + }, + { + "epoch": 14.34, + "learning_rate": 4.2834891475057345e-05, + "loss": 2.3929, + "step": 2893500 + }, + { + "epoch": 14.34, + "learning_rate": 4.2833655365804114e-05, + "loss": 2.3873, + "step": 2894000 + }, + { + "epoch": 14.34, + "learning_rate": 4.283241925655089e-05, + "loss": 2.3801, + "step": 2894500 + }, + { + "epoch": 14.34, + "learning_rate": 4.2831180670124806e-05, + "loss": 2.3904, + "step": 2895000 + }, + { + "epoch": 14.35, + "learning_rate": 4.282994208369872e-05, + "loss": 2.4079, + "step": 2895500 + }, + { + "epoch": 14.35, + "learning_rate": 4.2828703497272634e-05, + "loss": 2.418, + "step": 2896000 + }, + { + "epoch": 14.35, + "learning_rate": 4.282746491084655e-05, + "loss": 2.3692, + "step": 2896500 + }, + { + "epoch": 14.35, + "learning_rate": 4.282622632442047e-05, + "loss": 2.377, + "step": 2897000 + }, + { + "epoch": 14.36, + "learning_rate": 4.2824987737994384e-05, + "loss": 2.3925, + "step": 2897500 + }, + { + "epoch": 14.36, + "learning_rate": 4.28237491515683e-05, + "loss": 2.3754, + "step": 2898000 + }, + { + "epoch": 14.36, + "learning_rate": 4.282251056514222e-05, + "loss": 2.3814, + "step": 2898500 + }, + { + "epoch": 14.36, + "learning_rate": 4.2821271978716135e-05, + "loss": 2.3872, + "step": 2899000 + }, + { + "epoch": 14.37, + "learning_rate": 4.282003339229005e-05, + "loss": 2.3753, + "step": 2899500 + }, + { + "epoch": 14.37, + "learning_rate": 4.281879480586396e-05, + "loss": 2.4065, + "step": 2900000 + }, + { + "epoch": 14.37, + "learning_rate": 4.281755621943788e-05, + "loss": 2.3957, + "step": 2900500 + }, + { + "epoch": 14.37, + "learning_rate": 4.2816317633011796e-05, + "loss": 2.4159, + "step": 2901000 + }, + { + "epoch": 14.38, + "learning_rate": 4.281507904658571e-05, + "loss": 2.4126, + "step": 2901500 + }, + { + "epoch": 14.38, + "learning_rate": 4.281384046015963e-05, + "loss": 2.3595, + "step": 2902000 + }, + { + "epoch": 14.38, + "learning_rate": 4.281260187373355e-05, + "loss": 2.3574, + "step": 2902500 + }, + { + "epoch": 14.38, + "learning_rate": 4.281136824165317e-05, + "loss": 2.3769, + "step": 2903000 + }, + { + "epoch": 14.38, + "learning_rate": 4.2810129655227084e-05, + "loss": 2.395, + "step": 2903500 + }, + { + "epoch": 14.39, + "learning_rate": 4.2808891068801e-05, + "loss": 2.4074, + "step": 2904000 + }, + { + "epoch": 14.39, + "learning_rate": 4.280765495954777e-05, + "loss": 2.3847, + "step": 2904500 + }, + { + "epoch": 14.39, + "learning_rate": 4.280641637312169e-05, + "loss": 2.4074, + "step": 2905000 + }, + { + "epoch": 14.39, + "learning_rate": 4.28051777866956e-05, + "loss": 2.3715, + "step": 2905500 + }, + { + "epoch": 14.4, + "learning_rate": 4.2803939200269514e-05, + "loss": 2.3871, + "step": 2906000 + }, + { + "epoch": 14.4, + "learning_rate": 4.280270061384343e-05, + "loss": 2.4013, + "step": 2906500 + }, + { + "epoch": 14.4, + "learning_rate": 4.280146202741735e-05, + "loss": 2.3838, + "step": 2907000 + }, + { + "epoch": 14.4, + "learning_rate": 4.2800223440991265e-05, + "loss": 2.4154, + "step": 2907500 + }, + { + "epoch": 14.41, + "learning_rate": 4.279898485456518e-05, + "loss": 2.3687, + "step": 2908000 + }, + { + "epoch": 14.41, + "learning_rate": 4.27977462681391e-05, + "loss": 2.396, + "step": 2908500 + }, + { + "epoch": 14.41, + "learning_rate": 4.2796507681713016e-05, + "loss": 2.3786, + "step": 2909000 + }, + { + "epoch": 14.41, + "learning_rate": 4.2795271572459785e-05, + "loss": 2.3984, + "step": 2909500 + }, + { + "epoch": 14.42, + "learning_rate": 4.279403546320655e-05, + "loss": 2.3943, + "step": 2910000 + }, + { + "epoch": 14.42, + "learning_rate": 4.279279687678047e-05, + "loss": 2.3614, + "step": 2910500 + }, + { + "epoch": 14.42, + "learning_rate": 4.279155829035439e-05, + "loss": 2.3771, + "step": 2911000 + }, + { + "epoch": 14.42, + "learning_rate": 4.2790319703928304e-05, + "loss": 2.3691, + "step": 2911500 + }, + { + "epoch": 14.43, + "learning_rate": 4.2789081117502214e-05, + "loss": 2.392, + "step": 2912000 + }, + { + "epoch": 14.43, + "learning_rate": 4.278784253107613e-05, + "loss": 2.4161, + "step": 2912500 + }, + { + "epoch": 14.43, + "learning_rate": 4.278660394465005e-05, + "loss": 2.3881, + "step": 2913000 + }, + { + "epoch": 14.43, + "learning_rate": 4.2785365358223965e-05, + "loss": 2.3568, + "step": 2913500 + }, + { + "epoch": 14.44, + "learning_rate": 4.278412677179788e-05, + "loss": 2.4166, + "step": 2914000 + }, + { + "epoch": 14.44, + "learning_rate": 4.278289066254466e-05, + "loss": 2.4008, + "step": 2914500 + }, + { + "epoch": 14.44, + "learning_rate": 4.278165207611857e-05, + "loss": 2.3719, + "step": 2915000 + }, + { + "epoch": 14.44, + "learning_rate": 4.2780413489692485e-05, + "loss": 2.4054, + "step": 2915500 + }, + { + "epoch": 14.45, + "learning_rate": 4.27791749032664e-05, + "loss": 2.3981, + "step": 2916000 + }, + { + "epoch": 14.45, + "learning_rate": 4.277793631684032e-05, + "loss": 2.4088, + "step": 2916500 + }, + { + "epoch": 14.45, + "learning_rate": 4.2776697730414235e-05, + "loss": 2.4193, + "step": 2917000 + }, + { + "epoch": 14.45, + "learning_rate": 4.277545914398815e-05, + "loss": 2.3723, + "step": 2917500 + }, + { + "epoch": 14.46, + "learning_rate": 4.277422055756207e-05, + "loss": 2.3896, + "step": 2918000 + }, + { + "epoch": 14.46, + "learning_rate": 4.2772981971135986e-05, + "loss": 2.3889, + "step": 2918500 + }, + { + "epoch": 14.46, + "learning_rate": 4.27717433847099e-05, + "loss": 2.3773, + "step": 2919000 + }, + { + "epoch": 14.46, + "learning_rate": 4.277050479828382e-05, + "loss": 2.4043, + "step": 2919500 + }, + { + "epoch": 14.47, + "learning_rate": 4.276927116620344e-05, + "loss": 2.3883, + "step": 2920000 + }, + { + "epoch": 14.47, + "learning_rate": 4.276803257977736e-05, + "loss": 2.3812, + "step": 2920500 + }, + { + "epoch": 14.47, + "learning_rate": 4.276679894769697e-05, + "loss": 2.4031, + "step": 2921000 + }, + { + "epoch": 14.47, + "learning_rate": 4.276556036127089e-05, + "loss": 2.3853, + "step": 2921500 + }, + { + "epoch": 14.48, + "learning_rate": 4.2764321774844806e-05, + "loss": 2.4171, + "step": 2922000 + }, + { + "epoch": 14.48, + "learning_rate": 4.276308318841872e-05, + "loss": 2.3994, + "step": 2922500 + }, + { + "epoch": 14.48, + "learning_rate": 4.276184460199264e-05, + "loss": 2.3933, + "step": 2923000 + }, + { + "epoch": 14.48, + "learning_rate": 4.276060849273941e-05, + "loss": 2.3946, + "step": 2923500 + }, + { + "epoch": 14.49, + "learning_rate": 4.2759369906313325e-05, + "loss": 2.4314, + "step": 2924000 + }, + { + "epoch": 14.49, + "learning_rate": 4.275813131988724e-05, + "loss": 2.3782, + "step": 2924500 + }, + { + "epoch": 14.49, + "learning_rate": 4.275689273346116e-05, + "loss": 2.3963, + "step": 2925000 + }, + { + "epoch": 14.49, + "learning_rate": 4.2755654147035076e-05, + "loss": 2.4168, + "step": 2925500 + }, + { + "epoch": 14.5, + "learning_rate": 4.275441556060899e-05, + "loss": 2.3955, + "step": 2926000 + }, + { + "epoch": 14.5, + "learning_rate": 4.275317697418291e-05, + "loss": 2.4109, + "step": 2926500 + }, + { + "epoch": 14.5, + "learning_rate": 4.275193838775683e-05, + "loss": 2.3643, + "step": 2927000 + }, + { + "epoch": 14.5, + "learning_rate": 4.2750699801330744e-05, + "loss": 2.3692, + "step": 2927500 + }, + { + "epoch": 14.51, + "learning_rate": 4.2749461214904654e-05, + "loss": 2.4234, + "step": 2928000 + }, + { + "epoch": 14.51, + "learning_rate": 4.274822262847857e-05, + "loss": 2.4136, + "step": 2928500 + }, + { + "epoch": 14.51, + "learning_rate": 4.274698404205249e-05, + "loss": 2.3882, + "step": 2929000 + }, + { + "epoch": 14.51, + "learning_rate": 4.2745745455626405e-05, + "loss": 2.3713, + "step": 2929500 + }, + { + "epoch": 14.52, + "learning_rate": 4.274450686920032e-05, + "loss": 2.3898, + "step": 2930000 + }, + { + "epoch": 14.52, + "learning_rate": 4.274326828277423e-05, + "loss": 2.4151, + "step": 2930500 + }, + { + "epoch": 14.52, + "learning_rate": 4.274202969634815e-05, + "loss": 2.3902, + "step": 2931000 + }, + { + "epoch": 14.52, + "learning_rate": 4.2740793587094924e-05, + "loss": 2.3814, + "step": 2931500 + }, + { + "epoch": 14.53, + "learning_rate": 4.273955500066884e-05, + "loss": 2.3855, + "step": 2932000 + }, + { + "epoch": 14.53, + "learning_rate": 4.273831641424276e-05, + "loss": 2.4006, + "step": 2932500 + }, + { + "epoch": 14.53, + "learning_rate": 4.2737077827816675e-05, + "loss": 2.3651, + "step": 2933000 + }, + { + "epoch": 14.53, + "learning_rate": 4.2735839241390585e-05, + "loss": 2.3846, + "step": 2933500 + }, + { + "epoch": 14.54, + "learning_rate": 4.273460313213736e-05, + "loss": 2.3763, + "step": 2934000 + }, + { + "epoch": 14.54, + "learning_rate": 4.273336454571127e-05, + "loss": 2.382, + "step": 2934500 + }, + { + "epoch": 14.54, + "learning_rate": 4.273212595928519e-05, + "loss": 2.3847, + "step": 2935000 + }, + { + "epoch": 14.54, + "learning_rate": 4.2730887372859105e-05, + "loss": 2.3934, + "step": 2935500 + }, + { + "epoch": 14.55, + "learning_rate": 4.2729651263605873e-05, + "loss": 2.3807, + "step": 2936000 + }, + { + "epoch": 14.55, + "learning_rate": 4.272841515435264e-05, + "loss": 2.3737, + "step": 2936500 + }, + { + "epoch": 14.55, + "learning_rate": 4.272717656792656e-05, + "loss": 2.396, + "step": 2937000 + }, + { + "epoch": 14.55, + "learning_rate": 4.2725937981500476e-05, + "loss": 2.397, + "step": 2937500 + }, + { + "epoch": 14.56, + "learning_rate": 4.272469939507439e-05, + "loss": 2.3834, + "step": 2938000 + }, + { + "epoch": 14.56, + "learning_rate": 4.272346080864831e-05, + "loss": 2.383, + "step": 2938500 + }, + { + "epoch": 14.56, + "learning_rate": 4.272222222222223e-05, + "loss": 2.3652, + "step": 2939000 + }, + { + "epoch": 14.56, + "learning_rate": 4.2720983635796144e-05, + "loss": 2.3763, + "step": 2939500 + }, + { + "epoch": 14.57, + "learning_rate": 4.271974504937006e-05, + "loss": 2.4087, + "step": 2940000 + }, + { + "epoch": 14.57, + "learning_rate": 4.271850646294398e-05, + "loss": 2.3843, + "step": 2940500 + }, + { + "epoch": 14.57, + "learning_rate": 4.2717267876517895e-05, + "loss": 2.3971, + "step": 2941000 + }, + { + "epoch": 14.57, + "learning_rate": 4.2716029290091805e-05, + "loss": 2.3815, + "step": 2941500 + }, + { + "epoch": 14.58, + "learning_rate": 4.271479070366572e-05, + "loss": 2.4035, + "step": 2942000 + }, + { + "epoch": 14.58, + "learning_rate": 4.271355211723964e-05, + "loss": 2.3709, + "step": 2942500 + }, + { + "epoch": 14.58, + "learning_rate": 4.2712313530813556e-05, + "loss": 2.3951, + "step": 2943000 + }, + { + "epoch": 14.58, + "learning_rate": 4.271107494438747e-05, + "loss": 2.3758, + "step": 2943500 + }, + { + "epoch": 14.59, + "learning_rate": 4.270983635796138e-05, + "loss": 2.4007, + "step": 2944000 + }, + { + "epoch": 14.59, + "learning_rate": 4.270860024870816e-05, + "loss": 2.4097, + "step": 2944500 + }, + { + "epoch": 14.59, + "learning_rate": 4.2707361662282075e-05, + "loss": 2.4025, + "step": 2945000 + }, + { + "epoch": 14.59, + "learning_rate": 4.2706125553028844e-05, + "loss": 2.3816, + "step": 2945500 + }, + { + "epoch": 14.6, + "learning_rate": 4.270488696660276e-05, + "loss": 2.3982, + "step": 2946000 + }, + { + "epoch": 14.6, + "learning_rate": 4.270364838017668e-05, + "loss": 2.3793, + "step": 2946500 + }, + { + "epoch": 14.6, + "learning_rate": 4.2702409793750595e-05, + "loss": 2.389, + "step": 2947000 + }, + { + "epoch": 14.6, + "learning_rate": 4.270117120732451e-05, + "loss": 2.362, + "step": 2947500 + }, + { + "epoch": 14.61, + "learning_rate": 4.2699935098071274e-05, + "loss": 2.3914, + "step": 2948000 + }, + { + "epoch": 14.61, + "learning_rate": 4.269869651164519e-05, + "loss": 2.3902, + "step": 2948500 + }, + { + "epoch": 14.61, + "learning_rate": 4.269745792521911e-05, + "loss": 2.3968, + "step": 2949000 + }, + { + "epoch": 14.61, + "learning_rate": 4.2696219338793024e-05, + "loss": 2.3966, + "step": 2949500 + }, + { + "epoch": 14.62, + "learning_rate": 4.269498075236694e-05, + "loss": 2.395, + "step": 2950000 + }, + { + "epoch": 14.62, + "learning_rate": 4.269374216594086e-05, + "loss": 2.3799, + "step": 2950500 + }, + { + "epoch": 14.62, + "learning_rate": 4.2692503579514775e-05, + "loss": 2.3837, + "step": 2951000 + }, + { + "epoch": 14.62, + "learning_rate": 4.2691267470261544e-05, + "loss": 2.3736, + "step": 2951500 + }, + { + "epoch": 14.63, + "learning_rate": 4.269002888383546e-05, + "loss": 2.3967, + "step": 2952000 + }, + { + "epoch": 14.63, + "learning_rate": 4.268879029740938e-05, + "loss": 2.4221, + "step": 2952500 + }, + { + "epoch": 14.63, + "learning_rate": 4.2687551710983295e-05, + "loss": 2.3803, + "step": 2953000 + }, + { + "epoch": 14.63, + "learning_rate": 4.268631560173006e-05, + "loss": 2.3863, + "step": 2953500 + }, + { + "epoch": 14.64, + "learning_rate": 4.2685077015303974e-05, + "loss": 2.391, + "step": 2954000 + }, + { + "epoch": 14.64, + "learning_rate": 4.268383842887789e-05, + "loss": 2.374, + "step": 2954500 + }, + { + "epoch": 14.64, + "learning_rate": 4.268259984245181e-05, + "loss": 2.3949, + "step": 2955000 + }, + { + "epoch": 14.64, + "learning_rate": 4.2681363733198577e-05, + "loss": 2.3759, + "step": 2955500 + }, + { + "epoch": 14.65, + "learning_rate": 4.2680125146772493e-05, + "loss": 2.387, + "step": 2956000 + }, + { + "epoch": 14.65, + "learning_rate": 4.267888656034641e-05, + "loss": 2.3696, + "step": 2956500 + }, + { + "epoch": 14.65, + "learning_rate": 4.267764797392033e-05, + "loss": 2.4066, + "step": 2957000 + }, + { + "epoch": 14.65, + "learning_rate": 4.2676409387494244e-05, + "loss": 2.4101, + "step": 2957500 + }, + { + "epoch": 14.65, + "learning_rate": 4.267517327824101e-05, + "loss": 2.3882, + "step": 2958000 + }, + { + "epoch": 14.66, + "learning_rate": 4.267393716898778e-05, + "loss": 2.4137, + "step": 2958500 + }, + { + "epoch": 14.66, + "learning_rate": 4.26726985825617e-05, + "loss": 2.3827, + "step": 2959000 + }, + { + "epoch": 14.66, + "learning_rate": 4.267145999613561e-05, + "loss": 2.4004, + "step": 2959500 + }, + { + "epoch": 14.66, + "learning_rate": 4.2670221409709526e-05, + "loss": 2.3833, + "step": 2960000 + }, + { + "epoch": 14.67, + "learning_rate": 4.26689853004563e-05, + "loss": 2.402, + "step": 2960500 + }, + { + "epoch": 14.67, + "learning_rate": 4.266774671403022e-05, + "loss": 2.4208, + "step": 2961000 + }, + { + "epoch": 14.67, + "learning_rate": 4.2666508127604135e-05, + "loss": 2.4217, + "step": 2961500 + }, + { + "epoch": 14.67, + "learning_rate": 4.266526954117805e-05, + "loss": 2.3977, + "step": 2962000 + }, + { + "epoch": 14.68, + "learning_rate": 4.266403095475196e-05, + "loss": 2.398, + "step": 2962500 + }, + { + "epoch": 14.68, + "learning_rate": 4.266279236832588e-05, + "loss": 2.3978, + "step": 2963000 + }, + { + "epoch": 14.68, + "learning_rate": 4.2661553781899796e-05, + "loss": 2.4067, + "step": 2963500 + }, + { + "epoch": 14.68, + "learning_rate": 4.266031519547371e-05, + "loss": 2.4094, + "step": 2964000 + }, + { + "epoch": 14.69, + "learning_rate": 4.265907660904763e-05, + "loss": 2.3881, + "step": 2964500 + }, + { + "epoch": 14.69, + "learning_rate": 4.265783802262154e-05, + "loss": 2.3953, + "step": 2965000 + }, + { + "epoch": 14.69, + "learning_rate": 4.265659943619546e-05, + "loss": 2.3919, + "step": 2965500 + }, + { + "epoch": 14.69, + "learning_rate": 4.2655363326942226e-05, + "loss": 2.3778, + "step": 2966000 + }, + { + "epoch": 14.7, + "learning_rate": 4.265412474051614e-05, + "loss": 2.3833, + "step": 2966500 + }, + { + "epoch": 14.7, + "learning_rate": 4.265288615409006e-05, + "loss": 2.3947, + "step": 2967000 + }, + { + "epoch": 14.7, + "learning_rate": 4.265164756766398e-05, + "loss": 2.3884, + "step": 2967500 + }, + { + "epoch": 14.7, + "learning_rate": 4.2650408981237894e-05, + "loss": 2.3881, + "step": 2968000 + }, + { + "epoch": 14.71, + "learning_rate": 4.264917039481181e-05, + "loss": 2.4038, + "step": 2968500 + }, + { + "epoch": 14.71, + "learning_rate": 4.264793180838573e-05, + "loss": 2.412, + "step": 2969000 + }, + { + "epoch": 14.71, + "learning_rate": 4.2646693221959644e-05, + "loss": 2.3893, + "step": 2969500 + }, + { + "epoch": 14.71, + "learning_rate": 4.264545463553356e-05, + "loss": 2.38, + "step": 2970000 + }, + { + "epoch": 14.72, + "learning_rate": 4.264421604910748e-05, + "loss": 2.4093, + "step": 2970500 + }, + { + "epoch": 14.72, + "learning_rate": 4.264297993985425e-05, + "loss": 2.3932, + "step": 2971000 + }, + { + "epoch": 14.72, + "learning_rate": 4.2641741353428164e-05, + "loss": 2.4133, + "step": 2971500 + }, + { + "epoch": 14.72, + "learning_rate": 4.2640502767002074e-05, + "loss": 2.3936, + "step": 2972000 + }, + { + "epoch": 14.73, + "learning_rate": 4.263926418057599e-05, + "loss": 2.405, + "step": 2972500 + }, + { + "epoch": 14.73, + "learning_rate": 4.263802559414991e-05, + "loss": 2.3882, + "step": 2973000 + }, + { + "epoch": 14.73, + "learning_rate": 4.2636787007723825e-05, + "loss": 2.3862, + "step": 2973500 + }, + { + "epoch": 14.73, + "learning_rate": 4.263554842129774e-05, + "loss": 2.3741, + "step": 2974000 + }, + { + "epoch": 14.74, + "learning_rate": 4.263430983487166e-05, + "loss": 2.4191, + "step": 2974500 + }, + { + "epoch": 14.74, + "learning_rate": 4.2633071248445576e-05, + "loss": 2.4142, + "step": 2975000 + }, + { + "epoch": 14.74, + "learning_rate": 4.263183266201949e-05, + "loss": 2.3749, + "step": 2975500 + }, + { + "epoch": 14.74, + "learning_rate": 4.263059407559341e-05, + "loss": 2.3808, + "step": 2976000 + }, + { + "epoch": 14.75, + "learning_rate": 4.262935796634018e-05, + "loss": 2.3935, + "step": 2976500 + }, + { + "epoch": 14.75, + "learning_rate": 4.2628119379914095e-05, + "loss": 2.3867, + "step": 2977000 + }, + { + "epoch": 14.75, + "learning_rate": 4.2626883270660864e-05, + "loss": 2.3977, + "step": 2977500 + }, + { + "epoch": 14.75, + "learning_rate": 4.262564468423478e-05, + "loss": 2.3814, + "step": 2978000 + }, + { + "epoch": 14.76, + "learning_rate": 4.262440609780869e-05, + "loss": 2.3813, + "step": 2978500 + }, + { + "epoch": 14.76, + "learning_rate": 4.262316751138261e-05, + "loss": 2.3986, + "step": 2979000 + }, + { + "epoch": 14.76, + "learning_rate": 4.262193140212938e-05, + "loss": 2.3936, + "step": 2979500 + }, + { + "epoch": 14.76, + "learning_rate": 4.2620692815703294e-05, + "loss": 2.403, + "step": 2980000 + }, + { + "epoch": 14.77, + "learning_rate": 4.261945422927721e-05, + "loss": 2.391, + "step": 2980500 + }, + { + "epoch": 14.77, + "learning_rate": 4.261821564285113e-05, + "loss": 2.4022, + "step": 2981000 + }, + { + "epoch": 14.77, + "learning_rate": 4.2616977056425045e-05, + "loss": 2.3994, + "step": 2981500 + }, + { + "epoch": 14.77, + "learning_rate": 4.261573846999896e-05, + "loss": 2.4071, + "step": 2982000 + }, + { + "epoch": 14.78, + "learning_rate": 4.261449988357288e-05, + "loss": 2.402, + "step": 2982500 + }, + { + "epoch": 14.78, + "learning_rate": 4.2613261297146795e-05, + "loss": 2.377, + "step": 2983000 + }, + { + "epoch": 14.78, + "learning_rate": 4.261202271072071e-05, + "loss": 2.3869, + "step": 2983500 + }, + { + "epoch": 14.78, + "learning_rate": 4.261078412429463e-05, + "loss": 2.3832, + "step": 2984000 + }, + { + "epoch": 14.79, + "learning_rate": 4.26095480150414e-05, + "loss": 2.4119, + "step": 2984500 + }, + { + "epoch": 14.79, + "learning_rate": 4.2608309428615315e-05, + "loss": 2.4104, + "step": 2985000 + }, + { + "epoch": 14.79, + "learning_rate": 4.2607070842189225e-05, + "loss": 2.3959, + "step": 2985500 + }, + { + "epoch": 14.79, + "learning_rate": 4.260583225576314e-05, + "loss": 2.4221, + "step": 2986000 + }, + { + "epoch": 14.8, + "learning_rate": 4.260459366933706e-05, + "loss": 2.4011, + "step": 2986500 + }, + { + "epoch": 14.8, + "learning_rate": 4.2603355082910976e-05, + "loss": 2.3937, + "step": 2987000 + }, + { + "epoch": 14.8, + "learning_rate": 4.260211649648489e-05, + "loss": 2.3794, + "step": 2987500 + }, + { + "epoch": 14.8, + "learning_rate": 4.260088038723166e-05, + "loss": 2.4179, + "step": 2988000 + }, + { + "epoch": 14.81, + "learning_rate": 4.259964180080558e-05, + "loss": 2.4193, + "step": 2988500 + }, + { + "epoch": 14.81, + "learning_rate": 4.2598403214379496e-05, + "loss": 2.3833, + "step": 2989000 + }, + { + "epoch": 14.81, + "learning_rate": 4.2597167105126264e-05, + "loss": 2.3923, + "step": 2989500 + }, + { + "epoch": 14.81, + "learning_rate": 4.259592851870018e-05, + "loss": 2.394, + "step": 2990000 + }, + { + "epoch": 14.82, + "learning_rate": 4.25946899322741e-05, + "loss": 2.3941, + "step": 2990500 + }, + { + "epoch": 14.82, + "learning_rate": 4.2593451345848015e-05, + "loss": 2.4062, + "step": 2991000 + }, + { + "epoch": 14.82, + "learning_rate": 4.259221275942193e-05, + "loss": 2.3848, + "step": 2991500 + }, + { + "epoch": 14.82, + "learning_rate": 4.259097417299584e-05, + "loss": 2.3924, + "step": 2992000 + }, + { + "epoch": 14.83, + "learning_rate": 4.258973558656976e-05, + "loss": 2.4166, + "step": 2992500 + }, + { + "epoch": 14.83, + "learning_rate": 4.2588497000143676e-05, + "loss": 2.3901, + "step": 2993000 + }, + { + "epoch": 14.83, + "learning_rate": 4.258725841371759e-05, + "loss": 2.4053, + "step": 2993500 + }, + { + "epoch": 14.83, + "learning_rate": 4.258601982729151e-05, + "loss": 2.3861, + "step": 2994000 + }, + { + "epoch": 14.84, + "learning_rate": 4.258478124086543e-05, + "loss": 2.3877, + "step": 2994500 + }, + { + "epoch": 14.84, + "learning_rate": 4.2583542654439344e-05, + "loss": 2.4219, + "step": 2995000 + }, + { + "epoch": 14.84, + "learning_rate": 4.258230654518611e-05, + "loss": 2.4288, + "step": 2995500 + }, + { + "epoch": 14.84, + "learning_rate": 4.258106795876003e-05, + "loss": 2.388, + "step": 2996000 + }, + { + "epoch": 14.85, + "learning_rate": 4.2579829372333947e-05, + "loss": 2.391, + "step": 2996500 + }, + { + "epoch": 14.85, + "learning_rate": 4.2578590785907863e-05, + "loss": 2.4086, + "step": 2997000 + }, + { + "epoch": 14.85, + "learning_rate": 4.257735219948178e-05, + "loss": 2.3844, + "step": 2997500 + }, + { + "epoch": 14.85, + "learning_rate": 4.25761136130557e-05, + "loss": 2.3956, + "step": 2998000 + }, + { + "epoch": 14.86, + "learning_rate": 4.2574875026629614e-05, + "loss": 2.3777, + "step": 2998500 + }, + { + "epoch": 14.86, + "learning_rate": 4.257363644020353e-05, + "loss": 2.3943, + "step": 2999000 + }, + { + "epoch": 14.86, + "learning_rate": 4.2572402808123145e-05, + "loss": 2.3853, + "step": 2999500 + }, + { + "epoch": 14.86, + "learning_rate": 4.257116422169706e-05, + "loss": 2.388, + "step": 3000000 + }, + { + "epoch": 14.87, + "learning_rate": 4.256992563527098e-05, + "loss": 2.4066, + "step": 3000500 + }, + { + "epoch": 14.87, + "learning_rate": 4.2568687048844896e-05, + "loss": 2.3979, + "step": 3001000 + }, + { + "epoch": 14.87, + "learning_rate": 4.256744846241881e-05, + "loss": 2.392, + "step": 3001500 + }, + { + "epoch": 14.87, + "learning_rate": 4.256620987599273e-05, + "loss": 2.4233, + "step": 3002000 + }, + { + "epoch": 14.88, + "learning_rate": 4.256497128956665e-05, + "loss": 2.3834, + "step": 3002500 + }, + { + "epoch": 14.88, + "learning_rate": 4.2563732703140564e-05, + "loss": 2.3635, + "step": 3003000 + }, + { + "epoch": 14.88, + "learning_rate": 4.256249659388733e-05, + "loss": 2.4039, + "step": 3003500 + }, + { + "epoch": 14.88, + "learning_rate": 4.256125800746125e-05, + "loss": 2.3884, + "step": 3004000 + }, + { + "epoch": 14.89, + "learning_rate": 4.2560019421035166e-05, + "loss": 2.4051, + "step": 3004500 + }, + { + "epoch": 14.89, + "learning_rate": 4.255878083460908e-05, + "loss": 2.3943, + "step": 3005000 + }, + { + "epoch": 14.89, + "learning_rate": 4.255754224818299e-05, + "loss": 2.4152, + "step": 3005500 + }, + { + "epoch": 14.89, + "learning_rate": 4.255630613892976e-05, + "loss": 2.3811, + "step": 3006000 + }, + { + "epoch": 14.9, + "learning_rate": 4.255506755250368e-05, + "loss": 2.393, + "step": 3006500 + }, + { + "epoch": 14.9, + "learning_rate": 4.2553828966077596e-05, + "loss": 2.4022, + "step": 3007000 + }, + { + "epoch": 14.9, + "learning_rate": 4.255259037965151e-05, + "loss": 2.3587, + "step": 3007500 + }, + { + "epoch": 14.9, + "learning_rate": 4.255135179322543e-05, + "loss": 2.4104, + "step": 3008000 + }, + { + "epoch": 14.91, + "learning_rate": 4.255011320679935e-05, + "loss": 2.3866, + "step": 3008500 + }, + { + "epoch": 14.91, + "learning_rate": 4.2548874620373264e-05, + "loss": 2.4138, + "step": 3009000 + }, + { + "epoch": 14.91, + "learning_rate": 4.254763603394718e-05, + "loss": 2.3889, + "step": 3009500 + }, + { + "epoch": 14.91, + "learning_rate": 4.254639992469395e-05, + "loss": 2.4053, + "step": 3010000 + }, + { + "epoch": 14.92, + "learning_rate": 4.2545161338267866e-05, + "loss": 2.3929, + "step": 3010500 + }, + { + "epoch": 14.92, + "learning_rate": 4.254392275184178e-05, + "loss": 2.401, + "step": 3011000 + }, + { + "epoch": 14.92, + "learning_rate": 4.25426841654157e-05, + "loss": 2.3921, + "step": 3011500 + }, + { + "epoch": 14.92, + "learning_rate": 4.254144557898962e-05, + "loss": 2.4238, + "step": 3012000 + }, + { + "epoch": 14.92, + "learning_rate": 4.254020699256353e-05, + "loss": 2.3746, + "step": 3012500 + }, + { + "epoch": 14.93, + "learning_rate": 4.2538968406137444e-05, + "loss": 2.3839, + "step": 3013000 + }, + { + "epoch": 14.93, + "learning_rate": 4.253773229688421e-05, + "loss": 2.3947, + "step": 3013500 + }, + { + "epoch": 14.93, + "learning_rate": 4.253649371045813e-05, + "loss": 2.402, + "step": 3014000 + }, + { + "epoch": 14.93, + "learning_rate": 4.253525512403205e-05, + "loss": 2.3758, + "step": 3014500 + }, + { + "epoch": 14.94, + "learning_rate": 4.2534016537605964e-05, + "loss": 2.3933, + "step": 3015000 + }, + { + "epoch": 14.94, + "learning_rate": 4.253278290552558e-05, + "loss": 2.3876, + "step": 3015500 + }, + { + "epoch": 14.94, + "learning_rate": 4.2531544319099495e-05, + "loss": 2.3971, + "step": 3016000 + }, + { + "epoch": 14.94, + "learning_rate": 4.253030573267341e-05, + "loss": 2.4107, + "step": 3016500 + }, + { + "epoch": 14.95, + "learning_rate": 4.252906714624733e-05, + "loss": 2.4113, + "step": 3017000 + }, + { + "epoch": 14.95, + "learning_rate": 4.2527831036994104e-05, + "loss": 2.3868, + "step": 3017500 + }, + { + "epoch": 14.95, + "learning_rate": 4.252659245056802e-05, + "loss": 2.3966, + "step": 3018000 + }, + { + "epoch": 14.95, + "learning_rate": 4.252535386414194e-05, + "loss": 2.3974, + "step": 3018500 + }, + { + "epoch": 14.96, + "learning_rate": 4.252411527771585e-05, + "loss": 2.3664, + "step": 3019000 + }, + { + "epoch": 14.96, + "learning_rate": 4.2522876691289765e-05, + "loss": 2.3885, + "step": 3019500 + }, + { + "epoch": 14.96, + "learning_rate": 4.252163810486368e-05, + "loss": 2.3988, + "step": 3020000 + }, + { + "epoch": 14.96, + "learning_rate": 4.252040199561045e-05, + "loss": 2.3894, + "step": 3020500 + }, + { + "epoch": 14.97, + "learning_rate": 4.251916340918437e-05, + "loss": 2.3897, + "step": 3021000 + }, + { + "epoch": 14.97, + "learning_rate": 4.2517924822758285e-05, + "loss": 2.3855, + "step": 3021500 + }, + { + "epoch": 14.97, + "learning_rate": 4.2516688713505053e-05, + "loss": 2.3951, + "step": 3022000 + }, + { + "epoch": 14.97, + "learning_rate": 4.251545260425182e-05, + "loss": 2.438, + "step": 3022500 + }, + { + "epoch": 14.98, + "learning_rate": 4.251421401782574e-05, + "loss": 2.3995, + "step": 3023000 + }, + { + "epoch": 14.98, + "learning_rate": 4.2512975431399656e-05, + "loss": 2.4106, + "step": 3023500 + }, + { + "epoch": 14.98, + "learning_rate": 4.251173684497357e-05, + "loss": 2.3756, + "step": 3024000 + }, + { + "epoch": 14.98, + "learning_rate": 4.2510500735720335e-05, + "loss": 2.3789, + "step": 3024500 + }, + { + "epoch": 14.99, + "learning_rate": 4.250926214929425e-05, + "loss": 2.4049, + "step": 3025000 + }, + { + "epoch": 14.99, + "learning_rate": 4.250802356286817e-05, + "loss": 2.363, + "step": 3025500 + }, + { + "epoch": 14.99, + "learning_rate": 4.2506784976442086e-05, + "loss": 2.4208, + "step": 3026000 + }, + { + "epoch": 14.99, + "learning_rate": 4.2505546390016e-05, + "loss": 2.3791, + "step": 3026500 + }, + { + "epoch": 15.0, + "learning_rate": 4.250430780358992e-05, + "loss": 2.3832, + "step": 3027000 + }, + { + "epoch": 15.0, + "learning_rate": 4.250306921716384e-05, + "loss": 2.3903, + "step": 3027500 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.6470159402014201, + "eval_accuracy_mlm": 0.6010111516244826, + "eval_accuracy_nsp": 0.8638683082377951, + "eval_loss": 2.399909496307373, + "eval_runtime": 145.758, + "eval_samples_per_second": 1749.194, + "eval_steps_per_second": 72.888, + "step": 3027645 + }, + { + "epoch": 15.0, + "learning_rate": 4.2501830630737754e-05, + "loss": 2.3627, + "step": 3028000 + }, + { + "epoch": 15.0, + "learning_rate": 4.250059204431167e-05, + "loss": 2.3456, + "step": 3028500 + }, + { + "epoch": 15.01, + "learning_rate": 4.249935593505844e-05, + "loss": 2.3451, + "step": 3029000 + }, + { + "epoch": 15.01, + "learning_rate": 4.2498117348632356e-05, + "loss": 2.3651, + "step": 3029500 + }, + { + "epoch": 15.01, + "learning_rate": 4.249687876220627e-05, + "loss": 2.3518, + "step": 3030000 + }, + { + "epoch": 15.01, + "learning_rate": 4.249564017578019e-05, + "loss": 2.3489, + "step": 3030500 + }, + { + "epoch": 15.02, + "learning_rate": 4.249440158935411e-05, + "loss": 2.3564, + "step": 3031000 + }, + { + "epoch": 15.02, + "learning_rate": 4.2493163002928024e-05, + "loss": 2.3755, + "step": 3031500 + }, + { + "epoch": 15.02, + "learning_rate": 4.249192441650194e-05, + "loss": 2.3875, + "step": 3032000 + }, + { + "epoch": 15.02, + "learning_rate": 4.249068583007586e-05, + "loss": 2.3706, + "step": 3032500 + }, + { + "epoch": 15.03, + "learning_rate": 4.2489447243649775e-05, + "loss": 2.3564, + "step": 3033000 + }, + { + "epoch": 15.03, + "learning_rate": 4.2488208657223685e-05, + "loss": 2.3672, + "step": 3033500 + }, + { + "epoch": 15.03, + "learning_rate": 4.24869700707976e-05, + "loss": 2.3532, + "step": 3034000 + }, + { + "epoch": 15.03, + "learning_rate": 4.248573148437152e-05, + "loss": 2.3531, + "step": 3034500 + }, + { + "epoch": 15.04, + "learning_rate": 4.2484492897945436e-05, + "loss": 2.3578, + "step": 3035000 + }, + { + "epoch": 15.04, + "learning_rate": 4.2483256788692204e-05, + "loss": 2.4104, + "step": 3035500 + }, + { + "epoch": 15.04, + "learning_rate": 4.248201820226612e-05, + "loss": 2.3724, + "step": 3036000 + }, + { + "epoch": 15.04, + "learning_rate": 4.248077961584004e-05, + "loss": 2.3303, + "step": 3036500 + }, + { + "epoch": 15.05, + "learning_rate": 4.2479541029413955e-05, + "loss": 2.3409, + "step": 3037000 + }, + { + "epoch": 15.05, + "learning_rate": 4.2478302442987865e-05, + "loss": 2.3438, + "step": 3037500 + }, + { + "epoch": 15.05, + "learning_rate": 4.247706633373464e-05, + "loss": 2.3988, + "step": 3038000 + }, + { + "epoch": 15.05, + "learning_rate": 4.247582774730856e-05, + "loss": 2.3621, + "step": 3038500 + }, + { + "epoch": 15.06, + "learning_rate": 4.247459163805532e-05, + "loss": 2.4002, + "step": 3039000 + }, + { + "epoch": 15.06, + "learning_rate": 4.247335305162924e-05, + "loss": 2.3571, + "step": 3039500 + }, + { + "epoch": 15.06, + "learning_rate": 4.2472114465203154e-05, + "loss": 2.3597, + "step": 3040000 + }, + { + "epoch": 15.06, + "learning_rate": 4.247087587877707e-05, + "loss": 2.3572, + "step": 3040500 + }, + { + "epoch": 15.07, + "learning_rate": 4.246963729235099e-05, + "loss": 2.391, + "step": 3041000 + }, + { + "epoch": 15.07, + "learning_rate": 4.2468398705924905e-05, + "loss": 2.3717, + "step": 3041500 + }, + { + "epoch": 15.07, + "learning_rate": 4.246716011949882e-05, + "loss": 2.3924, + "step": 3042000 + }, + { + "epoch": 15.07, + "learning_rate": 4.246592153307274e-05, + "loss": 2.3618, + "step": 3042500 + }, + { + "epoch": 15.08, + "learning_rate": 4.2464682946646655e-05, + "loss": 2.3624, + "step": 3043000 + }, + { + "epoch": 15.08, + "learning_rate": 4.246344436022057e-05, + "loss": 2.368, + "step": 3043500 + }, + { + "epoch": 15.08, + "learning_rate": 4.246220577379448e-05, + "loss": 2.3958, + "step": 3044000 + }, + { + "epoch": 15.08, + "learning_rate": 4.24609671873684e-05, + "loss": 2.3759, + "step": 3044500 + }, + { + "epoch": 15.09, + "learning_rate": 4.2459728600942316e-05, + "loss": 2.3554, + "step": 3045000 + }, + { + "epoch": 15.09, + "learning_rate": 4.245849496886194e-05, + "loss": 2.3552, + "step": 3045500 + }, + { + "epoch": 15.09, + "learning_rate": 4.2457256382435854e-05, + "loss": 2.3811, + "step": 3046000 + }, + { + "epoch": 15.09, + "learning_rate": 4.245601779600977e-05, + "loss": 2.381, + "step": 3046500 + }, + { + "epoch": 15.1, + "learning_rate": 4.245477920958369e-05, + "loss": 2.3725, + "step": 3047000 + }, + { + "epoch": 15.1, + "learning_rate": 4.245354310033046e-05, + "loss": 2.3783, + "step": 3047500 + }, + { + "epoch": 15.1, + "learning_rate": 4.2452304513904374e-05, + "loss": 2.3869, + "step": 3048000 + }, + { + "epoch": 15.1, + "learning_rate": 4.245106592747829e-05, + "loss": 2.3689, + "step": 3048500 + }, + { + "epoch": 15.11, + "learning_rate": 4.244982734105221e-05, + "loss": 2.3686, + "step": 3049000 + }, + { + "epoch": 15.11, + "learning_rate": 4.2448588754626124e-05, + "loss": 2.3877, + "step": 3049500 + }, + { + "epoch": 15.11, + "learning_rate": 4.244735016820004e-05, + "loss": 2.3803, + "step": 3050000 + }, + { + "epoch": 15.11, + "learning_rate": 4.244611158177396e-05, + "loss": 2.3927, + "step": 3050500 + }, + { + "epoch": 15.12, + "learning_rate": 4.2444872995347875e-05, + "loss": 2.3387, + "step": 3051000 + }, + { + "epoch": 15.12, + "learning_rate": 4.244363688609464e-05, + "loss": 2.3727, + "step": 3051500 + }, + { + "epoch": 15.12, + "learning_rate": 4.2442398299668554e-05, + "loss": 2.3679, + "step": 3052000 + }, + { + "epoch": 15.12, + "learning_rate": 4.244115971324247e-05, + "loss": 2.3728, + "step": 3052500 + }, + { + "epoch": 15.13, + "learning_rate": 4.243992112681639e-05, + "loss": 2.3888, + "step": 3053000 + }, + { + "epoch": 15.13, + "learning_rate": 4.2438682540390305e-05, + "loss": 2.3701, + "step": 3053500 + }, + { + "epoch": 15.13, + "learning_rate": 4.243744395396422e-05, + "loss": 2.363, + "step": 3054000 + }, + { + "epoch": 15.13, + "learning_rate": 4.243620784471099e-05, + "loss": 2.3638, + "step": 3054500 + }, + { + "epoch": 15.14, + "learning_rate": 4.243496925828491e-05, + "loss": 2.3703, + "step": 3055000 + }, + { + "epoch": 15.14, + "learning_rate": 4.2433730671858824e-05, + "loss": 2.3799, + "step": 3055500 + }, + { + "epoch": 15.14, + "learning_rate": 4.243249208543274e-05, + "loss": 2.3817, + "step": 3056000 + }, + { + "epoch": 15.14, + "learning_rate": 4.243125349900666e-05, + "loss": 2.3796, + "step": 3056500 + }, + { + "epoch": 15.15, + "learning_rate": 4.243001738975342e-05, + "loss": 2.3622, + "step": 3057000 + }, + { + "epoch": 15.15, + "learning_rate": 4.242877880332734e-05, + "loss": 2.3801, + "step": 3057500 + }, + { + "epoch": 15.15, + "learning_rate": 4.2427542694074106e-05, + "loss": 2.3878, + "step": 3058000 + }, + { + "epoch": 15.15, + "learning_rate": 4.242630410764802e-05, + "loss": 2.3574, + "step": 3058500 + }, + { + "epoch": 15.16, + "learning_rate": 4.242506552122194e-05, + "loss": 2.3479, + "step": 3059000 + }, + { + "epoch": 15.16, + "learning_rate": 4.242382693479586e-05, + "loss": 2.3529, + "step": 3059500 + }, + { + "epoch": 15.16, + "learning_rate": 4.2422588348369774e-05, + "loss": 2.3862, + "step": 3060000 + }, + { + "epoch": 15.16, + "learning_rate": 4.242134976194369e-05, + "loss": 2.3654, + "step": 3060500 + }, + { + "epoch": 15.17, + "learning_rate": 4.242011117551761e-05, + "loss": 2.3518, + "step": 3061000 + }, + { + "epoch": 15.17, + "learning_rate": 4.2418875066264376e-05, + "loss": 2.374, + "step": 3061500 + }, + { + "epoch": 15.17, + "learning_rate": 4.241763647983829e-05, + "loss": 2.3655, + "step": 3062000 + }, + { + "epoch": 15.17, + "learning_rate": 4.241639789341221e-05, + "loss": 2.3658, + "step": 3062500 + }, + { + "epoch": 15.18, + "learning_rate": 4.241515930698613e-05, + "loss": 2.3602, + "step": 3063000 + }, + { + "epoch": 15.18, + "learning_rate": 4.2413920720560044e-05, + "loss": 2.3609, + "step": 3063500 + }, + { + "epoch": 15.18, + "learning_rate": 4.2412682134133954e-05, + "loss": 2.367, + "step": 3064000 + }, + { + "epoch": 15.18, + "learning_rate": 4.241144354770787e-05, + "loss": 2.3744, + "step": 3064500 + }, + { + "epoch": 15.19, + "learning_rate": 4.241020496128179e-05, + "loss": 2.3696, + "step": 3065000 + }, + { + "epoch": 15.19, + "learning_rate": 4.2408966374855705e-05, + "loss": 2.3808, + "step": 3065500 + }, + { + "epoch": 15.19, + "learning_rate": 4.2407730265602474e-05, + "loss": 2.3886, + "step": 3066000 + }, + { + "epoch": 15.19, + "learning_rate": 4.240649167917639e-05, + "loss": 2.3662, + "step": 3066500 + }, + { + "epoch": 15.19, + "learning_rate": 4.240525309275031e-05, + "loss": 2.3931, + "step": 3067000 + }, + { + "epoch": 15.2, + "learning_rate": 4.2404014506324225e-05, + "loss": 2.3764, + "step": 3067500 + }, + { + "epoch": 15.2, + "learning_rate": 4.240277591989814e-05, + "loss": 2.3745, + "step": 3068000 + }, + { + "epoch": 15.2, + "learning_rate": 4.240153733347206e-05, + "loss": 2.3827, + "step": 3068500 + }, + { + "epoch": 15.2, + "learning_rate": 4.240030122421883e-05, + "loss": 2.3736, + "step": 3069000 + }, + { + "epoch": 15.21, + "learning_rate": 4.2399062637792744e-05, + "loss": 2.3926, + "step": 3069500 + }, + { + "epoch": 15.21, + "learning_rate": 4.239782405136666e-05, + "loss": 2.3844, + "step": 3070000 + }, + { + "epoch": 15.21, + "learning_rate": 4.239658546494057e-05, + "loss": 2.3614, + "step": 3070500 + }, + { + "epoch": 15.21, + "learning_rate": 4.239534687851449e-05, + "loss": 2.3672, + "step": 3071000 + }, + { + "epoch": 15.22, + "learning_rate": 4.2394108292088405e-05, + "loss": 2.3647, + "step": 3071500 + }, + { + "epoch": 15.22, + "learning_rate": 4.239286970566232e-05, + "loss": 2.3832, + "step": 3072000 + }, + { + "epoch": 15.22, + "learning_rate": 4.239163111923624e-05, + "loss": 2.3962, + "step": 3072500 + }, + { + "epoch": 15.22, + "learning_rate": 4.239039500998301e-05, + "loss": 2.361, + "step": 3073000 + }, + { + "epoch": 15.23, + "learning_rate": 4.238915890072978e-05, + "loss": 2.3841, + "step": 3073500 + }, + { + "epoch": 15.23, + "learning_rate": 4.2387920314303694e-05, + "loss": 2.3729, + "step": 3074000 + }, + { + "epoch": 15.23, + "learning_rate": 4.238668172787761e-05, + "loss": 2.3962, + "step": 3074500 + }, + { + "epoch": 15.23, + "learning_rate": 4.238544314145153e-05, + "loss": 2.3965, + "step": 3075000 + }, + { + "epoch": 15.24, + "learning_rate": 4.2384204555025444e-05, + "loss": 2.3928, + "step": 3075500 + }, + { + "epoch": 15.24, + "learning_rate": 4.238296596859936e-05, + "loss": 2.3878, + "step": 3076000 + }, + { + "epoch": 15.24, + "learning_rate": 4.238172985934612e-05, + "loss": 2.3677, + "step": 3076500 + }, + { + "epoch": 15.24, + "learning_rate": 4.238049127292004e-05, + "loss": 2.3718, + "step": 3077000 + }, + { + "epoch": 15.25, + "learning_rate": 4.237925268649396e-05, + "loss": 2.3981, + "step": 3077500 + }, + { + "epoch": 15.25, + "learning_rate": 4.2378014100067874e-05, + "loss": 2.3709, + "step": 3078000 + }, + { + "epoch": 15.25, + "learning_rate": 4.237677551364179e-05, + "loss": 2.4063, + "step": 3078500 + }, + { + "epoch": 15.25, + "learning_rate": 4.237553940438856e-05, + "loss": 2.3766, + "step": 3079000 + }, + { + "epoch": 15.26, + "learning_rate": 4.237430081796248e-05, + "loss": 2.3836, + "step": 3079500 + }, + { + "epoch": 15.26, + "learning_rate": 4.2373064708709246e-05, + "loss": 2.3741, + "step": 3080000 + }, + { + "epoch": 15.26, + "learning_rate": 4.237182612228316e-05, + "loss": 2.3866, + "step": 3080500 + }, + { + "epoch": 15.26, + "learning_rate": 4.237058753585708e-05, + "loss": 2.3761, + "step": 3081000 + }, + { + "epoch": 15.27, + "learning_rate": 4.2369348949430996e-05, + "loss": 2.3606, + "step": 3081500 + }, + { + "epoch": 15.27, + "learning_rate": 4.2368110363004907e-05, + "loss": 2.3544, + "step": 3082000 + }, + { + "epoch": 15.27, + "learning_rate": 4.2366871776578824e-05, + "loss": 2.3828, + "step": 3082500 + }, + { + "epoch": 15.27, + "learning_rate": 4.236563319015274e-05, + "loss": 2.3494, + "step": 3083000 + }, + { + "epoch": 15.28, + "learning_rate": 4.236439460372666e-05, + "loss": 2.3749, + "step": 3083500 + }, + { + "epoch": 15.28, + "learning_rate": 4.2363156017300574e-05, + "loss": 2.384, + "step": 3084000 + }, + { + "epoch": 15.28, + "learning_rate": 4.236191990804735e-05, + "loss": 2.3721, + "step": 3084500 + }, + { + "epoch": 15.28, + "learning_rate": 4.236068132162126e-05, + "loss": 2.367, + "step": 3085000 + }, + { + "epoch": 15.29, + "learning_rate": 4.235944273519518e-05, + "loss": 2.3508, + "step": 3085500 + }, + { + "epoch": 15.29, + "learning_rate": 4.2358204148769094e-05, + "loss": 2.3788, + "step": 3086000 + }, + { + "epoch": 15.29, + "learning_rate": 4.235696556234301e-05, + "loss": 2.3785, + "step": 3086500 + }, + { + "epoch": 15.29, + "learning_rate": 4.235572697591693e-05, + "loss": 2.3908, + "step": 3087000 + }, + { + "epoch": 15.3, + "learning_rate": 4.2354488389490845e-05, + "loss": 2.3881, + "step": 3087500 + }, + { + "epoch": 15.3, + "learning_rate": 4.235324980306476e-05, + "loss": 2.3729, + "step": 3088000 + }, + { + "epoch": 15.3, + "learning_rate": 4.2352013693811524e-05, + "loss": 2.3611, + "step": 3088500 + }, + { + "epoch": 15.3, + "learning_rate": 4.235077510738544e-05, + "loss": 2.3854, + "step": 3089000 + }, + { + "epoch": 15.31, + "learning_rate": 4.2349538998132216e-05, + "loss": 2.3698, + "step": 3089500 + }, + { + "epoch": 15.31, + "learning_rate": 4.234830041170613e-05, + "loss": 2.3766, + "step": 3090000 + }, + { + "epoch": 15.31, + "learning_rate": 4.234706182528005e-05, + "loss": 2.366, + "step": 3090500 + }, + { + "epoch": 15.31, + "learning_rate": 4.234582323885397e-05, + "loss": 2.3723, + "step": 3091000 + }, + { + "epoch": 15.32, + "learning_rate": 4.234458465242788e-05, + "loss": 2.378, + "step": 3091500 + }, + { + "epoch": 15.32, + "learning_rate": 4.2343346066001794e-05, + "loss": 2.3886, + "step": 3092000 + }, + { + "epoch": 15.32, + "learning_rate": 4.234210747957571e-05, + "loss": 2.3767, + "step": 3092500 + }, + { + "epoch": 15.32, + "learning_rate": 4.234087137032248e-05, + "loss": 2.3718, + "step": 3093000 + }, + { + "epoch": 15.33, + "learning_rate": 4.23396327838964e-05, + "loss": 2.3711, + "step": 3093500 + }, + { + "epoch": 15.33, + "learning_rate": 4.2338394197470314e-05, + "loss": 2.3911, + "step": 3094000 + }, + { + "epoch": 15.33, + "learning_rate": 4.2337155611044224e-05, + "loss": 2.3506, + "step": 3094500 + }, + { + "epoch": 15.33, + "learning_rate": 4.2335919501791e-05, + "loss": 2.3763, + "step": 3095000 + }, + { + "epoch": 15.34, + "learning_rate": 4.2334680915364916e-05, + "loss": 2.3951, + "step": 3095500 + }, + { + "epoch": 15.34, + "learning_rate": 4.233344232893883e-05, + "loss": 2.3904, + "step": 3096000 + }, + { + "epoch": 15.34, + "learning_rate": 4.233220374251275e-05, + "loss": 2.3658, + "step": 3096500 + }, + { + "epoch": 15.34, + "learning_rate": 4.233096515608667e-05, + "loss": 2.3671, + "step": 3097000 + }, + { + "epoch": 15.35, + "learning_rate": 4.232972656966058e-05, + "loss": 2.3921, + "step": 3097500 + }, + { + "epoch": 15.35, + "learning_rate": 4.2328487983234494e-05, + "loss": 2.3858, + "step": 3098000 + }, + { + "epoch": 15.35, + "learning_rate": 4.232724939680841e-05, + "loss": 2.3708, + "step": 3098500 + }, + { + "epoch": 15.35, + "learning_rate": 4.232601081038233e-05, + "loss": 2.3545, + "step": 3099000 + }, + { + "epoch": 15.36, + "learning_rate": 4.2324772223956245e-05, + "loss": 2.3699, + "step": 3099500 + }, + { + "epoch": 15.36, + "learning_rate": 4.232353363753016e-05, + "loss": 2.3685, + "step": 3100000 + }, + { + "epoch": 15.36, + "learning_rate": 4.232229505110408e-05, + "loss": 2.3597, + "step": 3100500 + }, + { + "epoch": 15.36, + "learning_rate": 4.2321056464677996e-05, + "loss": 2.4052, + "step": 3101000 + }, + { + "epoch": 15.37, + "learning_rate": 4.231981787825191e-05, + "loss": 2.3693, + "step": 3101500 + }, + { + "epoch": 15.37, + "learning_rate": 4.231857929182583e-05, + "loss": 2.3889, + "step": 3102000 + }, + { + "epoch": 15.37, + "learning_rate": 4.2317340705399746e-05, + "loss": 2.3852, + "step": 3102500 + }, + { + "epoch": 15.37, + "learning_rate": 4.2316102118973663e-05, + "loss": 2.3749, + "step": 3103000 + }, + { + "epoch": 15.38, + "learning_rate": 4.2314866009720425e-05, + "loss": 2.3614, + "step": 3103500 + }, + { + "epoch": 15.38, + "learning_rate": 4.231362742329434e-05, + "loss": 2.3763, + "step": 3104000 + }, + { + "epoch": 15.38, + "learning_rate": 4.231238883686826e-05, + "loss": 2.3818, + "step": 3104500 + }, + { + "epoch": 15.38, + "learning_rate": 4.2311150250442176e-05, + "loss": 2.3593, + "step": 3105000 + }, + { + "epoch": 15.39, + "learning_rate": 4.2309914141188945e-05, + "loss": 2.3944, + "step": 3105500 + }, + { + "epoch": 15.39, + "learning_rate": 4.2308678031935714e-05, + "loss": 2.3794, + "step": 3106000 + }, + { + "epoch": 15.39, + "learning_rate": 4.230743944550963e-05, + "loss": 2.3482, + "step": 3106500 + }, + { + "epoch": 15.39, + "learning_rate": 4.230620085908355e-05, + "loss": 2.3852, + "step": 3107000 + }, + { + "epoch": 15.4, + "learning_rate": 4.2304962272657465e-05, + "loss": 2.3996, + "step": 3107500 + }, + { + "epoch": 15.4, + "learning_rate": 4.2303723686231375e-05, + "loss": 2.3387, + "step": 3108000 + }, + { + "epoch": 15.4, + "learning_rate": 4.230248509980529e-05, + "loss": 2.3473, + "step": 3108500 + }, + { + "epoch": 15.4, + "learning_rate": 4.230124899055207e-05, + "loss": 2.3551, + "step": 3109000 + }, + { + "epoch": 15.41, + "learning_rate": 4.2300012881298836e-05, + "loss": 2.3676, + "step": 3109500 + }, + { + "epoch": 15.41, + "learning_rate": 4.229877429487275e-05, + "loss": 2.3606, + "step": 3110000 + }, + { + "epoch": 15.41, + "learning_rate": 4.229753570844667e-05, + "loss": 2.3807, + "step": 3110500 + }, + { + "epoch": 15.41, + "learning_rate": 4.229629712202059e-05, + "loss": 2.3688, + "step": 3111000 + }, + { + "epoch": 15.42, + "learning_rate": 4.2295058535594504e-05, + "loss": 2.4027, + "step": 3111500 + }, + { + "epoch": 15.42, + "learning_rate": 4.2293819949168414e-05, + "loss": 2.3676, + "step": 3112000 + }, + { + "epoch": 15.42, + "learning_rate": 4.229258136274233e-05, + "loss": 2.3845, + "step": 3112500 + }, + { + "epoch": 15.42, + "learning_rate": 4.229134277631625e-05, + "loss": 2.3844, + "step": 3113000 + }, + { + "epoch": 15.43, + "learning_rate": 4.2290104189890165e-05, + "loss": 2.3842, + "step": 3113500 + }, + { + "epoch": 15.43, + "learning_rate": 4.2288868080636934e-05, + "loss": 2.3777, + "step": 3114000 + }, + { + "epoch": 15.43, + "learning_rate": 4.228762949421085e-05, + "loss": 2.3698, + "step": 3114500 + }, + { + "epoch": 15.43, + "learning_rate": 4.228639090778477e-05, + "loss": 2.3828, + "step": 3115000 + }, + { + "epoch": 15.44, + "learning_rate": 4.2285154798531536e-05, + "loss": 2.3369, + "step": 3115500 + }, + { + "epoch": 15.44, + "learning_rate": 4.228391621210545e-05, + "loss": 2.3914, + "step": 3116000 + }, + { + "epoch": 15.44, + "learning_rate": 4.228267762567937e-05, + "loss": 2.3863, + "step": 3116500 + }, + { + "epoch": 15.44, + "learning_rate": 4.228144151642613e-05, + "loss": 2.3908, + "step": 3117000 + }, + { + "epoch": 15.45, + "learning_rate": 4.228020293000005e-05, + "loss": 2.3652, + "step": 3117500 + }, + { + "epoch": 15.45, + "learning_rate": 4.2278964343573966e-05, + "loss": 2.3764, + "step": 3118000 + }, + { + "epoch": 15.45, + "learning_rate": 4.227772575714788e-05, + "loss": 2.4005, + "step": 3118500 + }, + { + "epoch": 15.45, + "learning_rate": 4.22764871707218e-05, + "loss": 2.3834, + "step": 3119000 + }, + { + "epoch": 15.46, + "learning_rate": 4.227525106146857e-05, + "loss": 2.3881, + "step": 3119500 + }, + { + "epoch": 15.46, + "learning_rate": 4.2274012475042486e-05, + "loss": 2.3676, + "step": 3120000 + }, + { + "epoch": 15.46, + "learning_rate": 4.22727738886164e-05, + "loss": 2.3671, + "step": 3120500 + }, + { + "epoch": 15.46, + "learning_rate": 4.227153530219032e-05, + "loss": 2.3702, + "step": 3121000 + }, + { + "epoch": 15.46, + "learning_rate": 4.2270296715764236e-05, + "loss": 2.3961, + "step": 3121500 + }, + { + "epoch": 15.47, + "learning_rate": 4.226905812933815e-05, + "loss": 2.3922, + "step": 3122000 + }, + { + "epoch": 15.47, + "learning_rate": 4.226781954291207e-05, + "loss": 2.3649, + "step": 3122500 + }, + { + "epoch": 15.47, + "learning_rate": 4.226658095648599e-05, + "loss": 2.3649, + "step": 3123000 + }, + { + "epoch": 15.47, + "learning_rate": 4.2265342370059904e-05, + "loss": 2.393, + "step": 3123500 + }, + { + "epoch": 15.48, + "learning_rate": 4.226410378363382e-05, + "loss": 2.3866, + "step": 3124000 + }, + { + "epoch": 15.48, + "learning_rate": 4.226286519720774e-05, + "loss": 2.3817, + "step": 3124500 + }, + { + "epoch": 15.48, + "learning_rate": 4.2261626610781655e-05, + "loss": 2.3549, + "step": 3125000 + }, + { + "epoch": 15.48, + "learning_rate": 4.2260388024355565e-05, + "loss": 2.3664, + "step": 3125500 + }, + { + "epoch": 15.49, + "learning_rate": 4.225914943792948e-05, + "loss": 2.3663, + "step": 3126000 + }, + { + "epoch": 15.49, + "learning_rate": 4.22579158058491e-05, + "loss": 2.3709, + "step": 3126500 + }, + { + "epoch": 15.49, + "learning_rate": 4.225667721942302e-05, + "loss": 2.368, + "step": 3127000 + }, + { + "epoch": 15.49, + "learning_rate": 4.2255438632996936e-05, + "loss": 2.3716, + "step": 3127500 + }, + { + "epoch": 15.5, + "learning_rate": 4.2254200046570853e-05, + "loss": 2.4181, + "step": 3128000 + }, + { + "epoch": 15.5, + "learning_rate": 4.225296146014477e-05, + "loss": 2.3836, + "step": 3128500 + }, + { + "epoch": 15.5, + "learning_rate": 4.225172287371869e-05, + "loss": 2.3661, + "step": 3129000 + }, + { + "epoch": 15.5, + "learning_rate": 4.225048676446545e-05, + "loss": 2.3753, + "step": 3129500 + }, + { + "epoch": 15.51, + "learning_rate": 4.2249248178039366e-05, + "loss": 2.3983, + "step": 3130000 + }, + { + "epoch": 15.51, + "learning_rate": 4.224800959161328e-05, + "loss": 2.3943, + "step": 3130500 + }, + { + "epoch": 15.51, + "learning_rate": 4.22467710051872e-05, + "loss": 2.3889, + "step": 3131000 + }, + { + "epoch": 15.51, + "learning_rate": 4.224553241876112e-05, + "loss": 2.3742, + "step": 3131500 + }, + { + "epoch": 15.52, + "learning_rate": 4.2244293832335034e-05, + "loss": 2.3783, + "step": 3132000 + }, + { + "epoch": 15.52, + "learning_rate": 4.224305524590895e-05, + "loss": 2.4068, + "step": 3132500 + }, + { + "epoch": 15.52, + "learning_rate": 4.224181665948287e-05, + "loss": 2.3924, + "step": 3133000 + }, + { + "epoch": 15.52, + "learning_rate": 4.2240580550229637e-05, + "loss": 2.386, + "step": 3133500 + }, + { + "epoch": 15.53, + "learning_rate": 4.2239341963803554e-05, + "loss": 2.3976, + "step": 3134000 + }, + { + "epoch": 15.53, + "learning_rate": 4.223810337737747e-05, + "loss": 2.369, + "step": 3134500 + }, + { + "epoch": 15.53, + "learning_rate": 4.223686479095139e-05, + "loss": 2.3854, + "step": 3135000 + }, + { + "epoch": 15.53, + "learning_rate": 4.2235626204525304e-05, + "loss": 2.392, + "step": 3135500 + }, + { + "epoch": 15.54, + "learning_rate": 4.223438761809922e-05, + "loss": 2.3792, + "step": 3136000 + }, + { + "epoch": 15.54, + "learning_rate": 4.223314903167314e-05, + "loss": 2.357, + "step": 3136500 + }, + { + "epoch": 15.54, + "learning_rate": 4.2231910445247055e-05, + "loss": 2.3669, + "step": 3137000 + }, + { + "epoch": 15.54, + "learning_rate": 4.223067185882097e-05, + "loss": 2.3775, + "step": 3137500 + }, + { + "epoch": 15.55, + "learning_rate": 4.2229435749567734e-05, + "loss": 2.3768, + "step": 3138000 + }, + { + "epoch": 15.55, + "learning_rate": 4.222819716314165e-05, + "loss": 2.3868, + "step": 3138500 + }, + { + "epoch": 15.55, + "learning_rate": 4.222695857671557e-05, + "loss": 2.3805, + "step": 3139000 + }, + { + "epoch": 15.55, + "learning_rate": 4.2225719990289485e-05, + "loss": 2.3855, + "step": 3139500 + }, + { + "epoch": 15.56, + "learning_rate": 4.22244814038634e-05, + "loss": 2.364, + "step": 3140000 + }, + { + "epoch": 15.56, + "learning_rate": 4.222324281743732e-05, + "loss": 2.3673, + "step": 3140500 + }, + { + "epoch": 15.56, + "learning_rate": 4.222200670818409e-05, + "loss": 2.3848, + "step": 3141000 + }, + { + "epoch": 15.56, + "learning_rate": 4.2220768121758004e-05, + "loss": 2.3857, + "step": 3141500 + }, + { + "epoch": 15.57, + "learning_rate": 4.221953201250477e-05, + "loss": 2.3872, + "step": 3142000 + }, + { + "epoch": 15.57, + "learning_rate": 4.2218293426078683e-05, + "loss": 2.3764, + "step": 3142500 + }, + { + "epoch": 15.57, + "learning_rate": 4.22170548396526e-05, + "loss": 2.3927, + "step": 3143000 + }, + { + "epoch": 15.57, + "learning_rate": 4.221581625322652e-05, + "loss": 2.3675, + "step": 3143500 + }, + { + "epoch": 15.58, + "learning_rate": 4.2214577666800434e-05, + "loss": 2.3898, + "step": 3144000 + }, + { + "epoch": 15.58, + "learning_rate": 4.221333908037435e-05, + "loss": 2.3677, + "step": 3144500 + }, + { + "epoch": 15.58, + "learning_rate": 4.221210049394827e-05, + "loss": 2.4042, + "step": 3145000 + }, + { + "epoch": 15.58, + "learning_rate": 4.2210861907522185e-05, + "loss": 2.3829, + "step": 3145500 + }, + { + "epoch": 15.59, + "learning_rate": 4.2209625798268954e-05, + "loss": 2.3929, + "step": 3146000 + }, + { + "epoch": 15.59, + "learning_rate": 4.220838721184287e-05, + "loss": 2.3803, + "step": 3146500 + }, + { + "epoch": 15.59, + "learning_rate": 4.220714862541679e-05, + "loss": 2.3651, + "step": 3147000 + }, + { + "epoch": 15.59, + "learning_rate": 4.2205910038990705e-05, + "loss": 2.4064, + "step": 3147500 + }, + { + "epoch": 15.6, + "learning_rate": 4.220467392973747e-05, + "loss": 2.3791, + "step": 3148000 + }, + { + "epoch": 15.6, + "learning_rate": 4.220343534331139e-05, + "loss": 2.402, + "step": 3148500 + }, + { + "epoch": 15.6, + "learning_rate": 4.22021967568853e-05, + "loss": 2.3714, + "step": 3149000 + }, + { + "epoch": 15.6, + "learning_rate": 4.220095817045922e-05, + "loss": 2.3842, + "step": 3149500 + }, + { + "epoch": 15.61, + "learning_rate": 4.2199719584033134e-05, + "loss": 2.3855, + "step": 3150000 + }, + { + "epoch": 15.61, + "learning_rate": 4.21984834747799e-05, + "loss": 2.4002, + "step": 3150500 + }, + { + "epoch": 15.61, + "learning_rate": 4.219724488835382e-05, + "loss": 2.3798, + "step": 3151000 + }, + { + "epoch": 15.61, + "learning_rate": 4.219600630192774e-05, + "loss": 2.3679, + "step": 3151500 + }, + { + "epoch": 15.62, + "learning_rate": 4.2194770192674506e-05, + "loss": 2.3694, + "step": 3152000 + }, + { + "epoch": 15.62, + "learning_rate": 4.219353160624842e-05, + "loss": 2.3997, + "step": 3152500 + }, + { + "epoch": 15.62, + "learning_rate": 4.219229301982234e-05, + "loss": 2.3808, + "step": 3153000 + }, + { + "epoch": 15.62, + "learning_rate": 4.2191054433396257e-05, + "loss": 2.3822, + "step": 3153500 + }, + { + "epoch": 15.63, + "learning_rate": 4.2189815846970173e-05, + "loss": 2.3799, + "step": 3154000 + }, + { + "epoch": 15.63, + "learning_rate": 4.218857726054409e-05, + "loss": 2.3874, + "step": 3154500 + }, + { + "epoch": 15.63, + "learning_rate": 4.218733867411801e-05, + "loss": 2.387, + "step": 3155000 + }, + { + "epoch": 15.63, + "learning_rate": 4.2186100087691924e-05, + "loss": 2.3953, + "step": 3155500 + }, + { + "epoch": 15.64, + "learning_rate": 4.2184861501265834e-05, + "loss": 2.3654, + "step": 3156000 + }, + { + "epoch": 15.64, + "learning_rate": 4.218362291483975e-05, + "loss": 2.3823, + "step": 3156500 + }, + { + "epoch": 15.64, + "learning_rate": 4.218238432841367e-05, + "loss": 2.3889, + "step": 3157000 + }, + { + "epoch": 15.64, + "learning_rate": 4.218114821916044e-05, + "loss": 2.3922, + "step": 3157500 + }, + { + "epoch": 15.65, + "learning_rate": 4.2179909632734354e-05, + "loss": 2.395, + "step": 3158000 + }, + { + "epoch": 15.65, + "learning_rate": 4.217867352348112e-05, + "loss": 2.3761, + "step": 3158500 + }, + { + "epoch": 15.65, + "learning_rate": 4.217743493705504e-05, + "loss": 2.4031, + "step": 3159000 + }, + { + "epoch": 15.65, + "learning_rate": 4.217619635062896e-05, + "loss": 2.3753, + "step": 3159500 + }, + { + "epoch": 15.66, + "learning_rate": 4.2174957764202874e-05, + "loss": 2.3735, + "step": 3160000 + }, + { + "epoch": 15.66, + "learning_rate": 4.217371917777679e-05, + "loss": 2.3785, + "step": 3160500 + }, + { + "epoch": 15.66, + "learning_rate": 4.217248059135071e-05, + "loss": 2.3862, + "step": 3161000 + }, + { + "epoch": 15.66, + "learning_rate": 4.217124448209747e-05, + "loss": 2.3566, + "step": 3161500 + }, + { + "epoch": 15.67, + "learning_rate": 4.2170008372844245e-05, + "loss": 2.3831, + "step": 3162000 + }, + { + "epoch": 15.67, + "learning_rate": 4.216876978641816e-05, + "loss": 2.3836, + "step": 3162500 + }, + { + "epoch": 15.67, + "learning_rate": 4.216753119999208e-05, + "loss": 2.3862, + "step": 3163000 + }, + { + "epoch": 15.67, + "learning_rate": 4.2166292613565996e-05, + "loss": 2.3876, + "step": 3163500 + }, + { + "epoch": 15.68, + "learning_rate": 4.2165054027139906e-05, + "loss": 2.3782, + "step": 3164000 + }, + { + "epoch": 15.68, + "learning_rate": 4.216381544071382e-05, + "loss": 2.3743, + "step": 3164500 + }, + { + "epoch": 15.68, + "learning_rate": 4.216257685428774e-05, + "loss": 2.3789, + "step": 3165000 + }, + { + "epoch": 15.68, + "learning_rate": 4.216133826786166e-05, + "loss": 2.3919, + "step": 3165500 + }, + { + "epoch": 15.69, + "learning_rate": 4.2160099681435574e-05, + "loss": 2.3701, + "step": 3166000 + }, + { + "epoch": 15.69, + "learning_rate": 4.215886109500949e-05, + "loss": 2.3719, + "step": 3166500 + }, + { + "epoch": 15.69, + "learning_rate": 4.215762250858341e-05, + "loss": 2.3928, + "step": 3167000 + }, + { + "epoch": 15.69, + "learning_rate": 4.2156383922157325e-05, + "loss": 2.3984, + "step": 3167500 + }, + { + "epoch": 15.7, + "learning_rate": 4.215514533573124e-05, + "loss": 2.3644, + "step": 3168000 + }, + { + "epoch": 15.7, + "learning_rate": 4.215390674930516e-05, + "loss": 2.3855, + "step": 3168500 + }, + { + "epoch": 15.7, + "learning_rate": 4.2152668162879075e-05, + "loss": 2.3751, + "step": 3169000 + }, + { + "epoch": 15.7, + "learning_rate": 4.2151429576452985e-05, + "loss": 2.3593, + "step": 3169500 + }, + { + "epoch": 15.71, + "learning_rate": 4.2150193467199754e-05, + "loss": 2.3574, + "step": 3170000 + }, + { + "epoch": 15.71, + "learning_rate": 4.214895488077367e-05, + "loss": 2.3706, + "step": 3170500 + }, + { + "epoch": 15.71, + "learning_rate": 4.214771629434759e-05, + "loss": 2.3809, + "step": 3171000 + }, + { + "epoch": 15.71, + "learning_rate": 4.214648018509436e-05, + "loss": 2.3843, + "step": 3171500 + }, + { + "epoch": 15.72, + "learning_rate": 4.2145241598668274e-05, + "loss": 2.3904, + "step": 3172000 + }, + { + "epoch": 15.72, + "learning_rate": 4.214400301224219e-05, + "loss": 2.38, + "step": 3172500 + }, + { + "epoch": 15.72, + "learning_rate": 4.214276442581611e-05, + "loss": 2.3769, + "step": 3173000 + }, + { + "epoch": 15.72, + "learning_rate": 4.2141525839390025e-05, + "loss": 2.3672, + "step": 3173500 + }, + { + "epoch": 15.73, + "learning_rate": 4.214028725296394e-05, + "loss": 2.4197, + "step": 3174000 + }, + { + "epoch": 15.73, + "learning_rate": 4.213904866653786e-05, + "loss": 2.3741, + "step": 3174500 + }, + { + "epoch": 15.73, + "learning_rate": 4.2137810080111775e-05, + "loss": 2.3853, + "step": 3175000 + }, + { + "epoch": 15.73, + "learning_rate": 4.213657149368569e-05, + "loss": 2.3921, + "step": 3175500 + }, + { + "epoch": 15.74, + "learning_rate": 4.213533290725961e-05, + "loss": 2.3486, + "step": 3176000 + }, + { + "epoch": 15.74, + "learning_rate": 4.213409679800637e-05, + "loss": 2.3594, + "step": 3176500 + }, + { + "epoch": 15.74, + "learning_rate": 4.213285821158029e-05, + "loss": 2.3819, + "step": 3177000 + }, + { + "epoch": 15.74, + "learning_rate": 4.2131619625154205e-05, + "loss": 2.3789, + "step": 3177500 + }, + { + "epoch": 15.74, + "learning_rate": 4.213038103872812e-05, + "loss": 2.3747, + "step": 3178000 + }, + { + "epoch": 15.75, + "learning_rate": 4.212914245230204e-05, + "loss": 2.3711, + "step": 3178500 + }, + { + "epoch": 15.75, + "learning_rate": 4.2127903865875956e-05, + "loss": 2.3928, + "step": 3179000 + }, + { + "epoch": 15.75, + "learning_rate": 4.2126667756622725e-05, + "loss": 2.3843, + "step": 3179500 + }, + { + "epoch": 15.75, + "learning_rate": 4.212543164736949e-05, + "loss": 2.4232, + "step": 3180000 + }, + { + "epoch": 15.76, + "learning_rate": 4.2124193060943404e-05, + "loss": 2.3803, + "step": 3180500 + }, + { + "epoch": 15.76, + "learning_rate": 4.212295447451732e-05, + "loss": 2.3913, + "step": 3181000 + }, + { + "epoch": 15.76, + "learning_rate": 4.212171588809124e-05, + "loss": 2.3512, + "step": 3181500 + }, + { + "epoch": 15.76, + "learning_rate": 4.2120477301665155e-05, + "loss": 2.3715, + "step": 3182000 + }, + { + "epoch": 15.77, + "learning_rate": 4.211923871523907e-05, + "loss": 2.3803, + "step": 3182500 + }, + { + "epoch": 15.77, + "learning_rate": 4.211800012881299e-05, + "loss": 2.3765, + "step": 3183000 + }, + { + "epoch": 15.77, + "learning_rate": 4.211676401955976e-05, + "loss": 2.3654, + "step": 3183500 + }, + { + "epoch": 15.77, + "learning_rate": 4.2115525433133674e-05, + "loss": 2.3893, + "step": 3184000 + }, + { + "epoch": 15.78, + "learning_rate": 4.211428684670759e-05, + "loss": 2.3841, + "step": 3184500 + }, + { + "epoch": 15.78, + "learning_rate": 4.211304826028151e-05, + "loss": 2.3901, + "step": 3185000 + }, + { + "epoch": 15.78, + "learning_rate": 4.2111809673855425e-05, + "loss": 2.3614, + "step": 3185500 + }, + { + "epoch": 15.78, + "learning_rate": 4.211057108742934e-05, + "loss": 2.3858, + "step": 3186000 + }, + { + "epoch": 15.79, + "learning_rate": 4.210933250100326e-05, + "loss": 2.3817, + "step": 3186500 + }, + { + "epoch": 15.79, + "learning_rate": 4.210809886892288e-05, + "loss": 2.3566, + "step": 3187000 + }, + { + "epoch": 15.79, + "learning_rate": 4.2106860282496796e-05, + "loss": 2.3557, + "step": 3187500 + }, + { + "epoch": 15.79, + "learning_rate": 4.210562169607071e-05, + "loss": 2.3717, + "step": 3188000 + }, + { + "epoch": 15.8, + "learning_rate": 4.210438310964463e-05, + "loss": 2.3864, + "step": 3188500 + }, + { + "epoch": 15.8, + "learning_rate": 4.210314452321854e-05, + "loss": 2.3699, + "step": 3189000 + }, + { + "epoch": 15.8, + "learning_rate": 4.210190593679246e-05, + "loss": 2.4, + "step": 3189500 + }, + { + "epoch": 15.8, + "learning_rate": 4.2100667350366374e-05, + "loss": 2.3853, + "step": 3190000 + }, + { + "epoch": 15.81, + "learning_rate": 4.209942876394029e-05, + "loss": 2.3952, + "step": 3190500 + }, + { + "epoch": 15.81, + "learning_rate": 4.209819017751421e-05, + "loss": 2.3948, + "step": 3191000 + }, + { + "epoch": 15.81, + "learning_rate": 4.2096951591088125e-05, + "loss": 2.3522, + "step": 3191500 + }, + { + "epoch": 15.81, + "learning_rate": 4.209571300466204e-05, + "loss": 2.3915, + "step": 3192000 + }, + { + "epoch": 15.82, + "learning_rate": 4.209447441823596e-05, + "loss": 2.3876, + "step": 3192500 + }, + { + "epoch": 15.82, + "learning_rate": 4.2093235831809876e-05, + "loss": 2.4019, + "step": 3193000 + }, + { + "epoch": 15.82, + "learning_rate": 4.209199724538379e-05, + "loss": 2.3881, + "step": 3193500 + }, + { + "epoch": 15.82, + "learning_rate": 4.209075865895771e-05, + "loss": 2.4025, + "step": 3194000 + }, + { + "epoch": 15.83, + "learning_rate": 4.208952254970447e-05, + "loss": 2.3753, + "step": 3194500 + }, + { + "epoch": 15.83, + "learning_rate": 4.208828644045124e-05, + "loss": 2.4118, + "step": 3195000 + }, + { + "epoch": 15.83, + "learning_rate": 4.208704785402516e-05, + "loss": 2.3851, + "step": 3195500 + }, + { + "epoch": 15.83, + "learning_rate": 4.2085809267599074e-05, + "loss": 2.3801, + "step": 3196000 + }, + { + "epoch": 15.84, + "learning_rate": 4.208457315834585e-05, + "loss": 2.394, + "step": 3196500 + }, + { + "epoch": 15.84, + "learning_rate": 4.208333457191977e-05, + "loss": 2.4099, + "step": 3197000 + }, + { + "epoch": 15.84, + "learning_rate": 4.208209598549368e-05, + "loss": 2.3875, + "step": 3197500 + }, + { + "epoch": 15.84, + "learning_rate": 4.2080857399067594e-05, + "loss": 2.3896, + "step": 3198000 + }, + { + "epoch": 15.85, + "learning_rate": 4.207961881264151e-05, + "loss": 2.3944, + "step": 3198500 + }, + { + "epoch": 15.85, + "learning_rate": 4.207838022621543e-05, + "loss": 2.3874, + "step": 3199000 + }, + { + "epoch": 15.85, + "learning_rate": 4.2077141639789345e-05, + "loss": 2.3929, + "step": 3199500 + }, + { + "epoch": 15.85, + "learning_rate": 4.2075903053363255e-05, + "loss": 2.3987, + "step": 3200000 + }, + { + "epoch": 15.86, + "learning_rate": 4.207466694411003e-05, + "loss": 2.3844, + "step": 3200500 + }, + { + "epoch": 15.86, + "learning_rate": 4.207342835768395e-05, + "loss": 2.3909, + "step": 3201000 + }, + { + "epoch": 15.86, + "learning_rate": 4.207218977125786e-05, + "loss": 2.3669, + "step": 3201500 + }, + { + "epoch": 15.86, + "learning_rate": 4.2070951184831774e-05, + "loss": 2.3722, + "step": 3202000 + }, + { + "epoch": 15.87, + "learning_rate": 4.206971259840569e-05, + "loss": 2.3922, + "step": 3202500 + }, + { + "epoch": 15.87, + "learning_rate": 4.206847401197961e-05, + "loss": 2.3647, + "step": 3203000 + }, + { + "epoch": 15.87, + "learning_rate": 4.2067235425553525e-05, + "loss": 2.3798, + "step": 3203500 + }, + { + "epoch": 15.87, + "learning_rate": 4.206599683912744e-05, + "loss": 2.3789, + "step": 3204000 + }, + { + "epoch": 15.88, + "learning_rate": 4.206475825270136e-05, + "loss": 2.3835, + "step": 3204500 + }, + { + "epoch": 15.88, + "learning_rate": 4.2063519666275276e-05, + "loss": 2.4027, + "step": 3205000 + }, + { + "epoch": 15.88, + "learning_rate": 4.206228107984919e-05, + "loss": 2.3981, + "step": 3205500 + }, + { + "epoch": 15.88, + "learning_rate": 4.206104249342311e-05, + "loss": 2.3786, + "step": 3206000 + }, + { + "epoch": 15.89, + "learning_rate": 4.205980390699703e-05, + "loss": 2.3917, + "step": 3206500 + }, + { + "epoch": 15.89, + "learning_rate": 4.2058565320570944e-05, + "loss": 2.381, + "step": 3207000 + }, + { + "epoch": 15.89, + "learning_rate": 4.2057329211317706e-05, + "loss": 2.4097, + "step": 3207500 + }, + { + "epoch": 15.89, + "learning_rate": 4.205609062489162e-05, + "loss": 2.37, + "step": 3208000 + }, + { + "epoch": 15.9, + "learning_rate": 4.205485203846554e-05, + "loss": 2.3886, + "step": 3208500 + }, + { + "epoch": 15.9, + "learning_rate": 4.2053613452039457e-05, + "loss": 2.3526, + "step": 3209000 + }, + { + "epoch": 15.9, + "learning_rate": 4.2052374865613374e-05, + "loss": 2.3746, + "step": 3209500 + }, + { + "epoch": 15.9, + "learning_rate": 4.205113875636014e-05, + "loss": 2.397, + "step": 3210000 + }, + { + "epoch": 15.91, + "learning_rate": 4.204990016993406e-05, + "loss": 2.3853, + "step": 3210500 + }, + { + "epoch": 15.91, + "learning_rate": 4.2048661583507976e-05, + "loss": 2.4041, + "step": 3211000 + }, + { + "epoch": 15.91, + "learning_rate": 4.204742299708189e-05, + "loss": 2.3686, + "step": 3211500 + }, + { + "epoch": 15.91, + "learning_rate": 4.204618441065581e-05, + "loss": 2.3785, + "step": 3212000 + }, + { + "epoch": 15.92, + "learning_rate": 4.204494582422973e-05, + "loss": 2.4122, + "step": 3212500 + }, + { + "epoch": 15.92, + "learning_rate": 4.2043707237803644e-05, + "loss": 2.3736, + "step": 3213000 + }, + { + "epoch": 15.92, + "learning_rate": 4.204246865137756e-05, + "loss": 2.3955, + "step": 3213500 + }, + { + "epoch": 15.92, + "learning_rate": 4.204123006495148e-05, + "loss": 2.4139, + "step": 3214000 + }, + { + "epoch": 15.93, + "learning_rate": 4.2039991478525395e-05, + "loss": 2.385, + "step": 3214500 + }, + { + "epoch": 15.93, + "learning_rate": 4.203875536927216e-05, + "loss": 2.3506, + "step": 3215000 + }, + { + "epoch": 15.93, + "learning_rate": 4.2037516782846074e-05, + "loss": 2.3841, + "step": 3215500 + }, + { + "epoch": 15.93, + "learning_rate": 4.203627819641999e-05, + "loss": 2.3781, + "step": 3216000 + }, + { + "epoch": 15.94, + "learning_rate": 4.203503960999391e-05, + "loss": 2.3828, + "step": 3216500 + }, + { + "epoch": 15.94, + "learning_rate": 4.2033801023567824e-05, + "loss": 2.402, + "step": 3217000 + }, + { + "epoch": 15.94, + "learning_rate": 4.203256243714174e-05, + "loss": 2.3728, + "step": 3217500 + }, + { + "epoch": 15.94, + "learning_rate": 4.203132632788851e-05, + "loss": 2.3832, + "step": 3218000 + }, + { + "epoch": 15.95, + "learning_rate": 4.203008774146243e-05, + "loss": 2.3773, + "step": 3218500 + }, + { + "epoch": 15.95, + "learning_rate": 4.2028849155036344e-05, + "loss": 2.3783, + "step": 3219000 + }, + { + "epoch": 15.95, + "learning_rate": 4.202761056861026e-05, + "loss": 2.401, + "step": 3219500 + }, + { + "epoch": 15.95, + "learning_rate": 4.202637445935702e-05, + "loss": 2.4113, + "step": 3220000 + }, + { + "epoch": 15.96, + "learning_rate": 4.202513587293094e-05, + "loss": 2.3905, + "step": 3220500 + }, + { + "epoch": 15.96, + "learning_rate": 4.202389728650486e-05, + "loss": 2.3798, + "step": 3221000 + }, + { + "epoch": 15.96, + "learning_rate": 4.2022658700078774e-05, + "loss": 2.3802, + "step": 3221500 + }, + { + "epoch": 15.96, + "learning_rate": 4.202142011365269e-05, + "loss": 2.3822, + "step": 3222000 + }, + { + "epoch": 15.97, + "learning_rate": 4.202018152722661e-05, + "loss": 2.3768, + "step": 3222500 + }, + { + "epoch": 15.97, + "learning_rate": 4.2018942940800525e-05, + "loss": 2.373, + "step": 3223000 + }, + { + "epoch": 15.97, + "learning_rate": 4.201770683154729e-05, + "loss": 2.3979, + "step": 3223500 + }, + { + "epoch": 15.97, + "learning_rate": 4.201646824512121e-05, + "loss": 2.4065, + "step": 3224000 + }, + { + "epoch": 15.98, + "learning_rate": 4.201522965869513e-05, + "loss": 2.3761, + "step": 3224500 + }, + { + "epoch": 15.98, + "learning_rate": 4.2013991072269044e-05, + "loss": 2.3741, + "step": 3225000 + }, + { + "epoch": 15.98, + "learning_rate": 4.201275248584296e-05, + "loss": 2.354, + "step": 3225500 + }, + { + "epoch": 15.98, + "learning_rate": 4.201151637658973e-05, + "loss": 2.3871, + "step": 3226000 + }, + { + "epoch": 15.99, + "learning_rate": 4.201027779016365e-05, + "loss": 2.3685, + "step": 3226500 + }, + { + "epoch": 15.99, + "learning_rate": 4.200903920373756e-05, + "loss": 2.3788, + "step": 3227000 + }, + { + "epoch": 15.99, + "learning_rate": 4.2007805571657184e-05, + "loss": 2.365, + "step": 3227500 + }, + { + "epoch": 15.99, + "learning_rate": 4.20065669852311e-05, + "loss": 2.3865, + "step": 3228000 + }, + { + "epoch": 16.0, + "learning_rate": 4.200532839880502e-05, + "loss": 2.3693, + "step": 3228500 + }, + { + "epoch": 16.0, + "learning_rate": 4.200409228955178e-05, + "loss": 2.3664, + "step": 3229000 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.6492868439902124, + "eval_accuracy_mlm": 0.6034055016175841, + "eval_accuracy_nsp": 0.8654450323385329, + "eval_loss": 2.379127025604248, + "eval_runtime": 145.9487, + "eval_samples_per_second": 1746.908, + "eval_steps_per_second": 72.793, + "step": 3229488 + }, + { + "epoch": 16.0, + "learning_rate": 4.20028537031257e-05, + "loss": 2.3991, + "step": 3229500 + }, + { + "epoch": 16.0, + "learning_rate": 4.2001615116699614e-05, + "loss": 2.3311, + "step": 3230000 + }, + { + "epoch": 16.01, + "learning_rate": 4.200037653027353e-05, + "loss": 2.379, + "step": 3230500 + }, + { + "epoch": 16.01, + "learning_rate": 4.199913794384745e-05, + "loss": 2.3483, + "step": 3231000 + }, + { + "epoch": 16.01, + "learning_rate": 4.1997899357421365e-05, + "loss": 2.3625, + "step": 3231500 + }, + { + "epoch": 16.01, + "learning_rate": 4.199666077099528e-05, + "loss": 2.3739, + "step": 3232000 + }, + { + "epoch": 16.01, + "learning_rate": 4.199542218456919e-05, + "loss": 2.3595, + "step": 3232500 + }, + { + "epoch": 16.02, + "learning_rate": 4.199418359814311e-05, + "loss": 2.3672, + "step": 3233000 + }, + { + "epoch": 16.02, + "learning_rate": 4.1992947488889885e-05, + "loss": 2.3369, + "step": 3233500 + }, + { + "epoch": 16.02, + "learning_rate": 4.19917089024638e-05, + "loss": 2.3675, + "step": 3234000 + }, + { + "epoch": 16.02, + "learning_rate": 4.199047031603772e-05, + "loss": 2.3456, + "step": 3234500 + }, + { + "epoch": 16.03, + "learning_rate": 4.1989231729611635e-05, + "loss": 2.345, + "step": 3235000 + }, + { + "epoch": 16.03, + "learning_rate": 4.1987993143185545e-05, + "loss": 2.3621, + "step": 3235500 + }, + { + "epoch": 16.03, + "learning_rate": 4.198675455675946e-05, + "loss": 2.3692, + "step": 3236000 + }, + { + "epoch": 16.03, + "learning_rate": 4.198551597033338e-05, + "loss": 2.3583, + "step": 3236500 + }, + { + "epoch": 16.04, + "learning_rate": 4.1984277383907296e-05, + "loss": 2.3604, + "step": 3237000 + }, + { + "epoch": 16.04, + "learning_rate": 4.198303879748121e-05, + "loss": 2.3367, + "step": 3237500 + }, + { + "epoch": 16.04, + "learning_rate": 4.198180268822798e-05, + "loss": 2.3517, + "step": 3238000 + }, + { + "epoch": 16.04, + "learning_rate": 4.19805641018019e-05, + "loss": 2.3562, + "step": 3238500 + }, + { + "epoch": 16.05, + "learning_rate": 4.197932551537581e-05, + "loss": 2.3516, + "step": 3239000 + }, + { + "epoch": 16.05, + "learning_rate": 4.1978086928949726e-05, + "loss": 2.3629, + "step": 3239500 + }, + { + "epoch": 16.05, + "learning_rate": 4.197684834252364e-05, + "loss": 2.3508, + "step": 3240000 + }, + { + "epoch": 16.05, + "learning_rate": 4.197560975609756e-05, + "loss": 2.3546, + "step": 3240500 + }, + { + "epoch": 16.06, + "learning_rate": 4.1974373646844335e-05, + "loss": 2.3317, + "step": 3241000 + }, + { + "epoch": 16.06, + "learning_rate": 4.19731375375911e-05, + "loss": 2.3723, + "step": 3241500 + }, + { + "epoch": 16.06, + "learning_rate": 4.1971898951165014e-05, + "loss": 2.3662, + "step": 3242000 + }, + { + "epoch": 16.06, + "learning_rate": 4.197066036473893e-05, + "loss": 2.3793, + "step": 3242500 + }, + { + "epoch": 16.07, + "learning_rate": 4.196942177831285e-05, + "loss": 2.3432, + "step": 3243000 + }, + { + "epoch": 16.07, + "learning_rate": 4.196818566905962e-05, + "loss": 2.357, + "step": 3243500 + }, + { + "epoch": 16.07, + "learning_rate": 4.1966947082633534e-05, + "loss": 2.3268, + "step": 3244000 + }, + { + "epoch": 16.07, + "learning_rate": 4.196570849620745e-05, + "loss": 2.3552, + "step": 3244500 + }, + { + "epoch": 16.08, + "learning_rate": 4.196446990978137e-05, + "loss": 2.3761, + "step": 3245000 + }, + { + "epoch": 16.08, + "learning_rate": 4.1963231323355285e-05, + "loss": 2.3717, + "step": 3245500 + }, + { + "epoch": 16.08, + "learning_rate": 4.19619927369292e-05, + "loss": 2.3475, + "step": 3246000 + }, + { + "epoch": 16.08, + "learning_rate": 4.196075415050312e-05, + "loss": 2.3493, + "step": 3246500 + }, + { + "epoch": 16.09, + "learning_rate": 4.1959515564077036e-05, + "loss": 2.3632, + "step": 3247000 + }, + { + "epoch": 16.09, + "learning_rate": 4.1958279454823804e-05, + "loss": 2.3624, + "step": 3247500 + }, + { + "epoch": 16.09, + "learning_rate": 4.1957040868397715e-05, + "loss": 2.3572, + "step": 3248000 + }, + { + "epoch": 16.09, + "learning_rate": 4.195580228197163e-05, + "loss": 2.3751, + "step": 3248500 + }, + { + "epoch": 16.1, + "learning_rate": 4.195456369554555e-05, + "loss": 2.3589, + "step": 3249000 + }, + { + "epoch": 16.1, + "learning_rate": 4.195332758629232e-05, + "loss": 2.3459, + "step": 3249500 + }, + { + "epoch": 16.1, + "learning_rate": 4.1952088999866234e-05, + "loss": 2.3791, + "step": 3250000 + }, + { + "epoch": 16.1, + "learning_rate": 4.195085041344015e-05, + "loss": 2.3528, + "step": 3250500 + }, + { + "epoch": 16.11, + "learning_rate": 4.194961182701407e-05, + "loss": 2.3381, + "step": 3251000 + }, + { + "epoch": 16.11, + "learning_rate": 4.1948373240587985e-05, + "loss": 2.3431, + "step": 3251500 + }, + { + "epoch": 16.11, + "learning_rate": 4.19471346541619e-05, + "loss": 2.3478, + "step": 3252000 + }, + { + "epoch": 16.11, + "learning_rate": 4.194589606773582e-05, + "loss": 2.3681, + "step": 3252500 + }, + { + "epoch": 16.12, + "learning_rate": 4.1944657481309736e-05, + "loss": 2.3455, + "step": 3253000 + }, + { + "epoch": 16.12, + "learning_rate": 4.194341889488365e-05, + "loss": 2.3379, + "step": 3253500 + }, + { + "epoch": 16.12, + "learning_rate": 4.194218030845757e-05, + "loss": 2.3439, + "step": 3254000 + }, + { + "epoch": 16.12, + "learning_rate": 4.194094172203148e-05, + "loss": 2.3919, + "step": 3254500 + }, + { + "epoch": 16.13, + "learning_rate": 4.193970561277825e-05, + "loss": 2.3615, + "step": 3255000 + }, + { + "epoch": 16.13, + "learning_rate": 4.1938467026352165e-05, + "loss": 2.3455, + "step": 3255500 + }, + { + "epoch": 16.13, + "learning_rate": 4.193722843992608e-05, + "loss": 2.3646, + "step": 3256000 + }, + { + "epoch": 16.13, + "learning_rate": 4.19359898535e-05, + "loss": 2.3457, + "step": 3256500 + }, + { + "epoch": 16.14, + "learning_rate": 4.1934751267073916e-05, + "loss": 2.364, + "step": 3257000 + }, + { + "epoch": 16.14, + "learning_rate": 4.1933512680647826e-05, + "loss": 2.3615, + "step": 3257500 + }, + { + "epoch": 16.14, + "learning_rate": 4.193227409422174e-05, + "loss": 2.3757, + "step": 3258000 + }, + { + "epoch": 16.14, + "learning_rate": 4.193104046214137e-05, + "loss": 2.3399, + "step": 3258500 + }, + { + "epoch": 16.15, + "learning_rate": 4.192980187571529e-05, + "loss": 2.3593, + "step": 3259000 + }, + { + "epoch": 16.15, + "learning_rate": 4.192856576646205e-05, + "loss": 2.378, + "step": 3259500 + }, + { + "epoch": 16.15, + "learning_rate": 4.192732718003597e-05, + "loss": 2.362, + "step": 3260000 + }, + { + "epoch": 16.15, + "learning_rate": 4.1926088593609884e-05, + "loss": 2.381, + "step": 3260500 + }, + { + "epoch": 16.16, + "learning_rate": 4.19248500071838e-05, + "loss": 2.3708, + "step": 3261000 + }, + { + "epoch": 16.16, + "learning_rate": 4.192361142075772e-05, + "loss": 2.3763, + "step": 3261500 + }, + { + "epoch": 16.16, + "learning_rate": 4.1922372834331634e-05, + "loss": 2.3587, + "step": 3262000 + }, + { + "epoch": 16.16, + "learning_rate": 4.192113424790555e-05, + "loss": 2.3468, + "step": 3262500 + }, + { + "epoch": 16.17, + "learning_rate": 4.191989566147947e-05, + "loss": 2.3629, + "step": 3263000 + }, + { + "epoch": 16.17, + "learning_rate": 4.1918657075053385e-05, + "loss": 2.3905, + "step": 3263500 + }, + { + "epoch": 16.17, + "learning_rate": 4.1917423442973006e-05, + "loss": 2.3469, + "step": 3264000 + }, + { + "epoch": 16.17, + "learning_rate": 4.191618485654692e-05, + "loss": 2.3685, + "step": 3264500 + }, + { + "epoch": 16.18, + "learning_rate": 4.191494627012083e-05, + "loss": 2.3798, + "step": 3265000 + }, + { + "epoch": 16.18, + "learning_rate": 4.191370768369475e-05, + "loss": 2.368, + "step": 3265500 + }, + { + "epoch": 16.18, + "learning_rate": 4.191246909726867e-05, + "loss": 2.3569, + "step": 3266000 + }, + { + "epoch": 16.18, + "learning_rate": 4.1911230510842584e-05, + "loss": 2.3642, + "step": 3266500 + }, + { + "epoch": 16.19, + "learning_rate": 4.19099919244165e-05, + "loss": 2.339, + "step": 3267000 + }, + { + "epoch": 16.19, + "learning_rate": 4.190875333799042e-05, + "loss": 2.392, + "step": 3267500 + }, + { + "epoch": 16.19, + "learning_rate": 4.1907514751564335e-05, + "loss": 2.3535, + "step": 3268000 + }, + { + "epoch": 16.19, + "learning_rate": 4.190627616513825e-05, + "loss": 2.3608, + "step": 3268500 + }, + { + "epoch": 16.2, + "learning_rate": 4.190504005588502e-05, + "loss": 2.3881, + "step": 3269000 + }, + { + "epoch": 16.2, + "learning_rate": 4.190380146945894e-05, + "loss": 2.354, + "step": 3269500 + }, + { + "epoch": 16.2, + "learning_rate": 4.1902562883032854e-05, + "loss": 2.3587, + "step": 3270000 + }, + { + "epoch": 16.2, + "learning_rate": 4.190132429660677e-05, + "loss": 2.375, + "step": 3270500 + }, + { + "epoch": 16.21, + "learning_rate": 4.190008571018069e-05, + "loss": 2.3826, + "step": 3271000 + }, + { + "epoch": 16.21, + "learning_rate": 4.1898847123754605e-05, + "loss": 2.3526, + "step": 3271500 + }, + { + "epoch": 16.21, + "learning_rate": 4.189760853732852e-05, + "loss": 2.3849, + "step": 3272000 + }, + { + "epoch": 16.21, + "learning_rate": 4.189636995090244e-05, + "loss": 2.3827, + "step": 3272500 + }, + { + "epoch": 16.22, + "learning_rate": 4.1895131364476356e-05, + "loss": 2.3505, + "step": 3273000 + }, + { + "epoch": 16.22, + "learning_rate": 4.189389277805027e-05, + "loss": 2.3436, + "step": 3273500 + }, + { + "epoch": 16.22, + "learning_rate": 4.189265419162419e-05, + "loss": 2.3404, + "step": 3274000 + }, + { + "epoch": 16.22, + "learning_rate": 4.1891415605198106e-05, + "loss": 2.3904, + "step": 3274500 + }, + { + "epoch": 16.23, + "learning_rate": 4.189017701877202e-05, + "loss": 2.3878, + "step": 3275000 + }, + { + "epoch": 16.23, + "learning_rate": 4.188894338669164e-05, + "loss": 2.372, + "step": 3275500 + }, + { + "epoch": 16.23, + "learning_rate": 4.1887704800265554e-05, + "loss": 2.3541, + "step": 3276000 + }, + { + "epoch": 16.23, + "learning_rate": 4.188646621383947e-05, + "loss": 2.3633, + "step": 3276500 + }, + { + "epoch": 16.24, + "learning_rate": 4.188522762741339e-05, + "loss": 2.3915, + "step": 3277000 + }, + { + "epoch": 16.24, + "learning_rate": 4.1883989040987305e-05, + "loss": 2.3363, + "step": 3277500 + }, + { + "epoch": 16.24, + "learning_rate": 4.188275045456122e-05, + "loss": 2.3568, + "step": 3278000 + }, + { + "epoch": 16.24, + "learning_rate": 4.188151186813514e-05, + "loss": 2.3588, + "step": 3278500 + }, + { + "epoch": 16.25, + "learning_rate": 4.18802757588819e-05, + "loss": 2.3645, + "step": 3279000 + }, + { + "epoch": 16.25, + "learning_rate": 4.187903717245582e-05, + "loss": 2.3591, + "step": 3279500 + }, + { + "epoch": 16.25, + "learning_rate": 4.1877798586029735e-05, + "loss": 2.3448, + "step": 3280000 + }, + { + "epoch": 16.25, + "learning_rate": 4.187655999960365e-05, + "loss": 2.3665, + "step": 3280500 + }, + { + "epoch": 16.26, + "learning_rate": 4.187532389035042e-05, + "loss": 2.3366, + "step": 3281000 + }, + { + "epoch": 16.26, + "learning_rate": 4.187408530392434e-05, + "loss": 2.3512, + "step": 3281500 + }, + { + "epoch": 16.26, + "learning_rate": 4.1872846717498254e-05, + "loss": 2.3426, + "step": 3282000 + }, + { + "epoch": 16.26, + "learning_rate": 4.187160813107217e-05, + "loss": 2.3714, + "step": 3282500 + }, + { + "epoch": 16.27, + "learning_rate": 4.187036954464609e-05, + "loss": 2.3823, + "step": 3283000 + }, + { + "epoch": 16.27, + "learning_rate": 4.186913343539286e-05, + "loss": 2.3549, + "step": 3283500 + }, + { + "epoch": 16.27, + "learning_rate": 4.1867894848966774e-05, + "loss": 2.3734, + "step": 3284000 + }, + { + "epoch": 16.27, + "learning_rate": 4.186665626254069e-05, + "loss": 2.372, + "step": 3284500 + }, + { + "epoch": 16.28, + "learning_rate": 4.18654176761146e-05, + "loss": 2.3525, + "step": 3285000 + }, + { + "epoch": 16.28, + "learning_rate": 4.186417908968852e-05, + "loss": 2.3635, + "step": 3285500 + }, + { + "epoch": 16.28, + "learning_rate": 4.1862940503262435e-05, + "loss": 2.3709, + "step": 3286000 + }, + { + "epoch": 16.28, + "learning_rate": 4.186170191683635e-05, + "loss": 2.364, + "step": 3286500 + }, + { + "epoch": 16.28, + "learning_rate": 4.186046333041027e-05, + "loss": 2.3777, + "step": 3287000 + }, + { + "epoch": 16.29, + "learning_rate": 4.185922722115704e-05, + "loss": 2.3548, + "step": 3287500 + }, + { + "epoch": 16.29, + "learning_rate": 4.1857988634730954e-05, + "loss": 2.3503, + "step": 3288000 + }, + { + "epoch": 16.29, + "learning_rate": 4.185675004830487e-05, + "loss": 2.3702, + "step": 3288500 + }, + { + "epoch": 16.29, + "learning_rate": 4.185551146187879e-05, + "loss": 2.3839, + "step": 3289000 + }, + { + "epoch": 16.3, + "learning_rate": 4.185427535262556e-05, + "loss": 2.3612, + "step": 3289500 + }, + { + "epoch": 16.3, + "learning_rate": 4.1853039243372326e-05, + "loss": 2.3803, + "step": 3290000 + }, + { + "epoch": 16.3, + "learning_rate": 4.185180065694624e-05, + "loss": 2.3574, + "step": 3290500 + }, + { + "epoch": 16.3, + "learning_rate": 4.185056207052016e-05, + "loss": 2.3609, + "step": 3291000 + }, + { + "epoch": 16.31, + "learning_rate": 4.184932348409408e-05, + "loss": 2.3628, + "step": 3291500 + }, + { + "epoch": 16.31, + "learning_rate": 4.1848087374840846e-05, + "loss": 2.3595, + "step": 3292000 + }, + { + "epoch": 16.31, + "learning_rate": 4.1846851265587614e-05, + "loss": 2.3629, + "step": 3292500 + }, + { + "epoch": 16.31, + "learning_rate": 4.1845612679161525e-05, + "loss": 2.3337, + "step": 3293000 + }, + { + "epoch": 16.32, + "learning_rate": 4.184437409273544e-05, + "loss": 2.3881, + "step": 3293500 + }, + { + "epoch": 16.32, + "learning_rate": 4.184313550630936e-05, + "loss": 2.3534, + "step": 3294000 + }, + { + "epoch": 16.32, + "learning_rate": 4.184189939705613e-05, + "loss": 2.4006, + "step": 3294500 + }, + { + "epoch": 16.32, + "learning_rate": 4.1840660810630044e-05, + "loss": 2.351, + "step": 3295000 + }, + { + "epoch": 16.33, + "learning_rate": 4.183942222420396e-05, + "loss": 2.3661, + "step": 3295500 + }, + { + "epoch": 16.33, + "learning_rate": 4.183818363777788e-05, + "loss": 2.3705, + "step": 3296000 + }, + { + "epoch": 16.33, + "learning_rate": 4.1836945051351795e-05, + "loss": 2.3444, + "step": 3296500 + }, + { + "epoch": 16.33, + "learning_rate": 4.183570646492571e-05, + "loss": 2.3706, + "step": 3297000 + }, + { + "epoch": 16.34, + "learning_rate": 4.183446787849963e-05, + "loss": 2.3554, + "step": 3297500 + }, + { + "epoch": 16.34, + "learning_rate": 4.1833229292073546e-05, + "loss": 2.372, + "step": 3298000 + }, + { + "epoch": 16.34, + "learning_rate": 4.183199070564746e-05, + "loss": 2.3748, + "step": 3298500 + }, + { + "epoch": 16.34, + "learning_rate": 4.183075211922138e-05, + "loss": 2.3779, + "step": 3299000 + }, + { + "epoch": 16.35, + "learning_rate": 4.1829513532795296e-05, + "loss": 2.3628, + "step": 3299500 + }, + { + "epoch": 16.35, + "learning_rate": 4.182827494636921e-05, + "loss": 2.3664, + "step": 3300000 + }, + { + "epoch": 16.35, + "learning_rate": 4.182703635994313e-05, + "loss": 2.3823, + "step": 3300500 + }, + { + "epoch": 16.35, + "learning_rate": 4.182579777351705e-05, + "loss": 2.3607, + "step": 3301000 + }, + { + "epoch": 16.36, + "learning_rate": 4.1824559187090964e-05, + "loss": 2.3748, + "step": 3301500 + }, + { + "epoch": 16.36, + "learning_rate": 4.1823320600664874e-05, + "loss": 2.3491, + "step": 3302000 + }, + { + "epoch": 16.36, + "learning_rate": 4.182208201423879e-05, + "loss": 2.3577, + "step": 3302500 + }, + { + "epoch": 16.36, + "learning_rate": 4.182084342781271e-05, + "loss": 2.3337, + "step": 3303000 + }, + { + "epoch": 16.37, + "learning_rate": 4.1819604841386625e-05, + "loss": 2.3717, + "step": 3303500 + }, + { + "epoch": 16.37, + "learning_rate": 4.181836625496054e-05, + "loss": 2.3637, + "step": 3304000 + }, + { + "epoch": 16.37, + "learning_rate": 4.181712766853446e-05, + "loss": 2.3689, + "step": 3304500 + }, + { + "epoch": 16.37, + "learning_rate": 4.181589155928122e-05, + "loss": 2.3704, + "step": 3305000 + }, + { + "epoch": 16.38, + "learning_rate": 4.181465297285514e-05, + "loss": 2.3826, + "step": 3305500 + }, + { + "epoch": 16.38, + "learning_rate": 4.1813414386429055e-05, + "loss": 2.3344, + "step": 3306000 + }, + { + "epoch": 16.38, + "learning_rate": 4.181217580000297e-05, + "loss": 2.3534, + "step": 3306500 + }, + { + "epoch": 16.38, + "learning_rate": 4.181093721357689e-05, + "loss": 2.3628, + "step": 3307000 + }, + { + "epoch": 16.39, + "learning_rate": 4.1809698627150806e-05, + "loss": 2.3458, + "step": 3307500 + }, + { + "epoch": 16.39, + "learning_rate": 4.180846251789758e-05, + "loss": 2.3597, + "step": 3308000 + }, + { + "epoch": 16.39, + "learning_rate": 4.180722640864434e-05, + "loss": 2.3612, + "step": 3308500 + }, + { + "epoch": 16.39, + "learning_rate": 4.180598782221826e-05, + "loss": 2.348, + "step": 3309000 + }, + { + "epoch": 16.4, + "learning_rate": 4.180474923579218e-05, + "loss": 2.3849, + "step": 3309500 + }, + { + "epoch": 16.4, + "learning_rate": 4.1803510649366094e-05, + "loss": 2.3843, + "step": 3310000 + }, + { + "epoch": 16.4, + "learning_rate": 4.180227206294001e-05, + "loss": 2.3771, + "step": 3310500 + }, + { + "epoch": 16.4, + "learning_rate": 4.180103347651393e-05, + "loss": 2.3771, + "step": 3311000 + }, + { + "epoch": 16.41, + "learning_rate": 4.179979489008784e-05, + "loss": 2.3497, + "step": 3311500 + }, + { + "epoch": 16.41, + "learning_rate": 4.1798556303661755e-05, + "loss": 2.348, + "step": 3312000 + }, + { + "epoch": 16.41, + "learning_rate": 4.179732019440853e-05, + "loss": 2.3576, + "step": 3312500 + }, + { + "epoch": 16.41, + "learning_rate": 4.179608160798245e-05, + "loss": 2.3733, + "step": 3313000 + }, + { + "epoch": 16.42, + "learning_rate": 4.1794843021556364e-05, + "loss": 2.3712, + "step": 3313500 + }, + { + "epoch": 16.42, + "learning_rate": 4.179360443513028e-05, + "loss": 2.3685, + "step": 3314000 + }, + { + "epoch": 16.42, + "learning_rate": 4.179236832587704e-05, + "loss": 2.3851, + "step": 3314500 + }, + { + "epoch": 16.42, + "learning_rate": 4.179112973945096e-05, + "loss": 2.3961, + "step": 3315000 + }, + { + "epoch": 16.43, + "learning_rate": 4.178989115302488e-05, + "loss": 2.4049, + "step": 3315500 + }, + { + "epoch": 16.43, + "learning_rate": 4.1788652566598794e-05, + "loss": 2.3543, + "step": 3316000 + }, + { + "epoch": 16.43, + "learning_rate": 4.178741398017271e-05, + "loss": 2.3707, + "step": 3316500 + }, + { + "epoch": 16.43, + "learning_rate": 4.178617539374663e-05, + "loss": 2.3482, + "step": 3317000 + }, + { + "epoch": 16.44, + "learning_rate": 4.178493680732054e-05, + "loss": 2.3832, + "step": 3317500 + }, + { + "epoch": 16.44, + "learning_rate": 4.1783700698067314e-05, + "loss": 2.4093, + "step": 3318000 + }, + { + "epoch": 16.44, + "learning_rate": 4.178246458881408e-05, + "loss": 2.3686, + "step": 3318500 + }, + { + "epoch": 16.44, + "learning_rate": 4.1781226002388e-05, + "loss": 2.3364, + "step": 3319000 + }, + { + "epoch": 16.45, + "learning_rate": 4.177998989313476e-05, + "loss": 2.3651, + "step": 3319500 + }, + { + "epoch": 16.45, + "learning_rate": 4.177875130670868e-05, + "loss": 2.3806, + "step": 3320000 + }, + { + "epoch": 16.45, + "learning_rate": 4.1777512720282595e-05, + "loss": 2.3776, + "step": 3320500 + }, + { + "epoch": 16.45, + "learning_rate": 4.177627413385651e-05, + "loss": 2.3673, + "step": 3321000 + }, + { + "epoch": 16.46, + "learning_rate": 4.177503554743043e-05, + "loss": 2.3572, + "step": 3321500 + }, + { + "epoch": 16.46, + "learning_rate": 4.1773796961004346e-05, + "loss": 2.3464, + "step": 3322000 + }, + { + "epoch": 16.46, + "learning_rate": 4.177255837457826e-05, + "loss": 2.3861, + "step": 3322500 + }, + { + "epoch": 16.46, + "learning_rate": 4.177131978815218e-05, + "loss": 2.3693, + "step": 3323000 + }, + { + "epoch": 16.47, + "learning_rate": 4.177008367889895e-05, + "loss": 2.362, + "step": 3323500 + }, + { + "epoch": 16.47, + "learning_rate": 4.1768845092472866e-05, + "loss": 2.3583, + "step": 3324000 + }, + { + "epoch": 16.47, + "learning_rate": 4.176760650604678e-05, + "loss": 2.4084, + "step": 3324500 + }, + { + "epoch": 16.47, + "learning_rate": 4.17663679196207e-05, + "loss": 2.3874, + "step": 3325000 + }, + { + "epoch": 16.48, + "learning_rate": 4.1765129333194617e-05, + "loss": 2.4014, + "step": 3325500 + }, + { + "epoch": 16.48, + "learning_rate": 4.1763890746768533e-05, + "loss": 2.3739, + "step": 3326000 + }, + { + "epoch": 16.48, + "learning_rate": 4.1762652160342444e-05, + "loss": 2.362, + "step": 3326500 + }, + { + "epoch": 16.48, + "learning_rate": 4.176141357391636e-05, + "loss": 2.3732, + "step": 3327000 + }, + { + "epoch": 16.49, + "learning_rate": 4.176017498749028e-05, + "loss": 2.3626, + "step": 3327500 + }, + { + "epoch": 16.49, + "learning_rate": 4.1758936401064194e-05, + "loss": 2.3443, + "step": 3328000 + }, + { + "epoch": 16.49, + "learning_rate": 4.175770029181096e-05, + "loss": 2.3804, + "step": 3328500 + }, + { + "epoch": 16.49, + "learning_rate": 4.175646170538488e-05, + "loss": 2.3758, + "step": 3329000 + }, + { + "epoch": 16.5, + "learning_rate": 4.17552231189588e-05, + "loss": 2.3673, + "step": 3329500 + }, + { + "epoch": 16.5, + "learning_rate": 4.1753984532532714e-05, + "loss": 2.3549, + "step": 3330000 + }, + { + "epoch": 16.5, + "learning_rate": 4.175274594610663e-05, + "loss": 2.362, + "step": 3330500 + }, + { + "epoch": 16.5, + "learning_rate": 4.175150735968055e-05, + "loss": 2.3912, + "step": 3331000 + }, + { + "epoch": 16.51, + "learning_rate": 4.1750268773254465e-05, + "loss": 2.3859, + "step": 3331500 + }, + { + "epoch": 16.51, + "learning_rate": 4.174903018682838e-05, + "loss": 2.3581, + "step": 3332000 + }, + { + "epoch": 16.51, + "learning_rate": 4.17477916004023e-05, + "loss": 2.3842, + "step": 3332500 + }, + { + "epoch": 16.51, + "learning_rate": 4.1746553013976216e-05, + "loss": 2.3812, + "step": 3333000 + }, + { + "epoch": 16.52, + "learning_rate": 4.1745314427550126e-05, + "loss": 2.3868, + "step": 3333500 + }, + { + "epoch": 16.52, + "learning_rate": 4.174407584112404e-05, + "loss": 2.3852, + "step": 3334000 + }, + { + "epoch": 16.52, + "learning_rate": 4.174283725469796e-05, + "loss": 2.3561, + "step": 3334500 + }, + { + "epoch": 16.52, + "learning_rate": 4.1741598668271877e-05, + "loss": 2.3751, + "step": 3335000 + }, + { + "epoch": 16.53, + "learning_rate": 4.1740362559018645e-05, + "loss": 2.3717, + "step": 3335500 + }, + { + "epoch": 16.53, + "learning_rate": 4.173912397259256e-05, + "loss": 2.3852, + "step": 3336000 + }, + { + "epoch": 16.53, + "learning_rate": 4.173788786333933e-05, + "loss": 2.4165, + "step": 3336500 + }, + { + "epoch": 16.53, + "learning_rate": 4.173664927691325e-05, + "loss": 2.3534, + "step": 3337000 + }, + { + "epoch": 16.54, + "learning_rate": 4.1735410690487165e-05, + "loss": 2.3673, + "step": 3337500 + }, + { + "epoch": 16.54, + "learning_rate": 4.173417210406108e-05, + "loss": 2.3468, + "step": 3338000 + }, + { + "epoch": 16.54, + "learning_rate": 4.1732933517635e-05, + "loss": 2.3575, + "step": 3338500 + }, + { + "epoch": 16.54, + "learning_rate": 4.1731694931208916e-05, + "loss": 2.4043, + "step": 3339000 + }, + { + "epoch": 16.55, + "learning_rate": 4.1730456344782826e-05, + "loss": 2.3744, + "step": 3339500 + }, + { + "epoch": 16.55, + "learning_rate": 4.172921775835674e-05, + "loss": 2.3968, + "step": 3340000 + }, + { + "epoch": 16.55, + "learning_rate": 4.172797917193066e-05, + "loss": 2.389, + "step": 3340500 + }, + { + "epoch": 16.55, + "learning_rate": 4.172674306267743e-05, + "loss": 2.3549, + "step": 3341000 + }, + { + "epoch": 16.55, + "learning_rate": 4.17255069534242e-05, + "loss": 2.3684, + "step": 3341500 + }, + { + "epoch": 16.56, + "learning_rate": 4.1724268366998114e-05, + "loss": 2.379, + "step": 3342000 + }, + { + "epoch": 16.56, + "learning_rate": 4.172302978057203e-05, + "loss": 2.3729, + "step": 3342500 + }, + { + "epoch": 16.56, + "learning_rate": 4.172179119414595e-05, + "loss": 2.3568, + "step": 3343000 + }, + { + "epoch": 16.56, + "learning_rate": 4.172055508489272e-05, + "loss": 2.3773, + "step": 3343500 + }, + { + "epoch": 16.57, + "learning_rate": 4.1719316498466634e-05, + "loss": 2.3581, + "step": 3344000 + }, + { + "epoch": 16.57, + "learning_rate": 4.171807791204055e-05, + "loss": 2.368, + "step": 3344500 + }, + { + "epoch": 16.57, + "learning_rate": 4.171683932561447e-05, + "loss": 2.3668, + "step": 3345000 + }, + { + "epoch": 16.57, + "learning_rate": 4.1715600739188385e-05, + "loss": 2.3714, + "step": 3345500 + }, + { + "epoch": 16.58, + "learning_rate": 4.17143621527623e-05, + "loss": 2.3788, + "step": 3346000 + }, + { + "epoch": 16.58, + "learning_rate": 4.171312356633622e-05, + "loss": 2.3737, + "step": 3346500 + }, + { + "epoch": 16.58, + "learning_rate": 4.171188497991013e-05, + "loss": 2.3877, + "step": 3347000 + }, + { + "epoch": 16.58, + "learning_rate": 4.1710646393484046e-05, + "loss": 2.359, + "step": 3347500 + }, + { + "epoch": 16.59, + "learning_rate": 4.170940780705796e-05, + "loss": 2.3579, + "step": 3348000 + }, + { + "epoch": 16.59, + "learning_rate": 4.170817169780473e-05, + "loss": 2.3688, + "step": 3348500 + }, + { + "epoch": 16.59, + "learning_rate": 4.170693311137865e-05, + "loss": 2.3799, + "step": 3349000 + }, + { + "epoch": 16.59, + "learning_rate": 4.170569700212542e-05, + "loss": 2.3597, + "step": 3349500 + }, + { + "epoch": 16.6, + "learning_rate": 4.1704458415699334e-05, + "loss": 2.3678, + "step": 3350000 + }, + { + "epoch": 16.6, + "learning_rate": 4.170321982927325e-05, + "loss": 2.3693, + "step": 3350500 + }, + { + "epoch": 16.6, + "learning_rate": 4.170198124284717e-05, + "loss": 2.3535, + "step": 3351000 + }, + { + "epoch": 16.6, + "learning_rate": 4.1700742656421085e-05, + "loss": 2.3655, + "step": 3351500 + }, + { + "epoch": 16.61, + "learning_rate": 4.169950654716785e-05, + "loss": 2.3376, + "step": 3352000 + }, + { + "epoch": 16.61, + "learning_rate": 4.1698267960741764e-05, + "loss": 2.3532, + "step": 3352500 + }, + { + "epoch": 16.61, + "learning_rate": 4.169702937431568e-05, + "loss": 2.3747, + "step": 3353000 + }, + { + "epoch": 16.61, + "learning_rate": 4.16957907878896e-05, + "loss": 2.3603, + "step": 3353500 + }, + { + "epoch": 16.62, + "learning_rate": 4.1694552201463515e-05, + "loss": 2.377, + "step": 3354000 + }, + { + "epoch": 16.62, + "learning_rate": 4.169331361503743e-05, + "loss": 2.3917, + "step": 3354500 + }, + { + "epoch": 16.62, + "learning_rate": 4.16920775057842e-05, + "loss": 2.3773, + "step": 3355000 + }, + { + "epoch": 16.62, + "learning_rate": 4.169083891935812e-05, + "loss": 2.3885, + "step": 3355500 + }, + { + "epoch": 16.63, + "learning_rate": 4.1689600332932034e-05, + "loss": 2.3602, + "step": 3356000 + }, + { + "epoch": 16.63, + "learning_rate": 4.16883642236788e-05, + "loss": 2.3732, + "step": 3356500 + }, + { + "epoch": 16.63, + "learning_rate": 4.168712563725271e-05, + "loss": 2.3674, + "step": 3357000 + }, + { + "epoch": 16.63, + "learning_rate": 4.168588705082663e-05, + "loss": 2.3854, + "step": 3357500 + }, + { + "epoch": 16.64, + "learning_rate": 4.168464846440055e-05, + "loss": 2.3712, + "step": 3358000 + }, + { + "epoch": 16.64, + "learning_rate": 4.1683409877974464e-05, + "loss": 2.3552, + "step": 3358500 + }, + { + "epoch": 16.64, + "learning_rate": 4.168217129154838e-05, + "loss": 2.3662, + "step": 3359000 + }, + { + "epoch": 16.64, + "learning_rate": 4.168093518229515e-05, + "loss": 2.3816, + "step": 3359500 + }, + { + "epoch": 16.65, + "learning_rate": 4.1679696595869067e-05, + "loss": 2.3567, + "step": 3360000 + }, + { + "epoch": 16.65, + "learning_rate": 4.1678458009442983e-05, + "loss": 2.3709, + "step": 3360500 + }, + { + "epoch": 16.65, + "learning_rate": 4.16772194230169e-05, + "loss": 2.3764, + "step": 3361000 + }, + { + "epoch": 16.65, + "learning_rate": 4.167598083659082e-05, + "loss": 2.375, + "step": 3361500 + }, + { + "epoch": 16.66, + "learning_rate": 4.1674742250164734e-05, + "loss": 2.3625, + "step": 3362000 + }, + { + "epoch": 16.66, + "learning_rate": 4.167350366373865e-05, + "loss": 2.3537, + "step": 3362500 + }, + { + "epoch": 16.66, + "learning_rate": 4.167226507731257e-05, + "loss": 2.3476, + "step": 3363000 + }, + { + "epoch": 16.66, + "learning_rate": 4.1671026490886485e-05, + "loss": 2.4053, + "step": 3363500 + }, + { + "epoch": 16.67, + "learning_rate": 4.16697879044604e-05, + "loss": 2.3621, + "step": 3364000 + }, + { + "epoch": 16.67, + "learning_rate": 4.166854931803432e-05, + "loss": 2.3565, + "step": 3364500 + }, + { + "epoch": 16.67, + "learning_rate": 4.1667310731608236e-05, + "loss": 2.4182, + "step": 3365000 + }, + { + "epoch": 16.67, + "learning_rate": 4.1666074622355e-05, + "loss": 2.378, + "step": 3365500 + }, + { + "epoch": 16.68, + "learning_rate": 4.1664836035928915e-05, + "loss": 2.3553, + "step": 3366000 + }, + { + "epoch": 16.68, + "learning_rate": 4.166359744950283e-05, + "loss": 2.3641, + "step": 3366500 + }, + { + "epoch": 16.68, + "learning_rate": 4.166235886307675e-05, + "loss": 2.3704, + "step": 3367000 + }, + { + "epoch": 16.68, + "learning_rate": 4.1661120276650666e-05, + "loss": 2.3547, + "step": 3367500 + }, + { + "epoch": 16.69, + "learning_rate": 4.165988169022458e-05, + "loss": 2.3821, + "step": 3368000 + }, + { + "epoch": 16.69, + "learning_rate": 4.16586431037985e-05, + "loss": 2.3792, + "step": 3368500 + }, + { + "epoch": 16.69, + "learning_rate": 4.165740699454527e-05, + "loss": 2.3716, + "step": 3369000 + }, + { + "epoch": 16.69, + "learning_rate": 4.1656168408119185e-05, + "loss": 2.3796, + "step": 3369500 + }, + { + "epoch": 16.7, + "learning_rate": 4.16549298216931e-05, + "loss": 2.3907, + "step": 3370000 + }, + { + "epoch": 16.7, + "learning_rate": 4.165369123526702e-05, + "loss": 2.3926, + "step": 3370500 + }, + { + "epoch": 16.7, + "learning_rate": 4.165245512601378e-05, + "loss": 2.3875, + "step": 3371000 + }, + { + "epoch": 16.7, + "learning_rate": 4.16512165395877e-05, + "loss": 2.3632, + "step": 3371500 + }, + { + "epoch": 16.71, + "learning_rate": 4.1649977953161615e-05, + "loss": 2.3904, + "step": 3372000 + }, + { + "epoch": 16.71, + "learning_rate": 4.1648741843908384e-05, + "loss": 2.3739, + "step": 3372500 + }, + { + "epoch": 16.71, + "learning_rate": 4.16475032574823e-05, + "loss": 2.3697, + "step": 3373000 + }, + { + "epoch": 16.71, + "learning_rate": 4.164626467105622e-05, + "loss": 2.3728, + "step": 3373500 + }, + { + "epoch": 16.72, + "learning_rate": 4.164502856180299e-05, + "loss": 2.3865, + "step": 3374000 + }, + { + "epoch": 16.72, + "learning_rate": 4.16437899753769e-05, + "loss": 2.3485, + "step": 3374500 + }, + { + "epoch": 16.72, + "learning_rate": 4.164255138895082e-05, + "loss": 2.3721, + "step": 3375000 + }, + { + "epoch": 16.72, + "learning_rate": 4.164131280252474e-05, + "loss": 2.3634, + "step": 3375500 + }, + { + "epoch": 16.73, + "learning_rate": 4.1640076693271506e-05, + "loss": 2.3643, + "step": 3376000 + }, + { + "epoch": 16.73, + "learning_rate": 4.163883810684542e-05, + "loss": 2.3706, + "step": 3376500 + }, + { + "epoch": 16.73, + "learning_rate": 4.163759952041934e-05, + "loss": 2.3708, + "step": 3377000 + }, + { + "epoch": 16.73, + "learning_rate": 4.163636093399325e-05, + "loss": 2.3637, + "step": 3377500 + }, + { + "epoch": 16.74, + "learning_rate": 4.163512234756717e-05, + "loss": 2.378, + "step": 3378000 + }, + { + "epoch": 16.74, + "learning_rate": 4.1633883761141084e-05, + "loss": 2.3886, + "step": 3378500 + }, + { + "epoch": 16.74, + "learning_rate": 4.1632645174715e-05, + "loss": 2.3935, + "step": 3379000 + }, + { + "epoch": 16.74, + "learning_rate": 4.163140658828892e-05, + "loss": 2.3653, + "step": 3379500 + }, + { + "epoch": 16.75, + "learning_rate": 4.1630168001862835e-05, + "loss": 2.3607, + "step": 3380000 + }, + { + "epoch": 16.75, + "learning_rate": 4.162892941543675e-05, + "loss": 2.3704, + "step": 3380500 + }, + { + "epoch": 16.75, + "learning_rate": 4.162769082901067e-05, + "loss": 2.3722, + "step": 3381000 + }, + { + "epoch": 16.75, + "learning_rate": 4.1626452242584585e-05, + "loss": 2.3535, + "step": 3381500 + }, + { + "epoch": 16.76, + "learning_rate": 4.16252136561585e-05, + "loss": 2.3634, + "step": 3382000 + }, + { + "epoch": 16.76, + "learning_rate": 4.162397754690527e-05, + "loss": 2.3759, + "step": 3382500 + }, + { + "epoch": 16.76, + "learning_rate": 4.162273896047919e-05, + "loss": 2.3764, + "step": 3383000 + }, + { + "epoch": 16.76, + "learning_rate": 4.1621500374053105e-05, + "loss": 2.3739, + "step": 3383500 + }, + { + "epoch": 16.77, + "learning_rate": 4.1620261787627015e-05, + "loss": 2.3762, + "step": 3384000 + }, + { + "epoch": 16.77, + "learning_rate": 4.161902320120093e-05, + "loss": 2.3656, + "step": 3384500 + }, + { + "epoch": 16.77, + "learning_rate": 4.16177870919477e-05, + "loss": 2.3756, + "step": 3385000 + }, + { + "epoch": 16.77, + "learning_rate": 4.161654850552162e-05, + "loss": 2.3657, + "step": 3385500 + }, + { + "epoch": 16.78, + "learning_rate": 4.1615309919095535e-05, + "loss": 2.3836, + "step": 3386000 + }, + { + "epoch": 16.78, + "learning_rate": 4.161407133266945e-05, + "loss": 2.3893, + "step": 3386500 + }, + { + "epoch": 16.78, + "learning_rate": 4.161283274624337e-05, + "loss": 2.3778, + "step": 3387000 + }, + { + "epoch": 16.78, + "learning_rate": 4.1611594159817286e-05, + "loss": 2.3606, + "step": 3387500 + }, + { + "epoch": 16.79, + "learning_rate": 4.16103555733912e-05, + "loss": 2.3618, + "step": 3388000 + }, + { + "epoch": 16.79, + "learning_rate": 4.160911698696512e-05, + "loss": 2.3565, + "step": 3388500 + }, + { + "epoch": 16.79, + "learning_rate": 4.160788087771189e-05, + "loss": 2.3636, + "step": 3389000 + }, + { + "epoch": 16.79, + "learning_rate": 4.1606642291285805e-05, + "loss": 2.3672, + "step": 3389500 + }, + { + "epoch": 16.8, + "learning_rate": 4.160540370485972e-05, + "loss": 2.3587, + "step": 3390000 + }, + { + "epoch": 16.8, + "learning_rate": 4.160416511843364e-05, + "loss": 2.3782, + "step": 3390500 + }, + { + "epoch": 16.8, + "learning_rate": 4.160292653200755e-05, + "loss": 2.3666, + "step": 3391000 + }, + { + "epoch": 16.8, + "learning_rate": 4.160169042275432e-05, + "loss": 2.3514, + "step": 3391500 + }, + { + "epoch": 16.81, + "learning_rate": 4.1600451836328235e-05, + "loss": 2.3646, + "step": 3392000 + }, + { + "epoch": 16.81, + "learning_rate": 4.159921324990215e-05, + "loss": 2.3601, + "step": 3392500 + }, + { + "epoch": 16.81, + "learning_rate": 4.159797466347607e-05, + "loss": 2.3792, + "step": 3393000 + }, + { + "epoch": 16.81, + "learning_rate": 4.1596736077049986e-05, + "loss": 2.355, + "step": 3393500 + }, + { + "epoch": 16.82, + "learning_rate": 4.1595499967796754e-05, + "loss": 2.3727, + "step": 3394000 + }, + { + "epoch": 16.82, + "learning_rate": 4.159426138137067e-05, + "loss": 2.3774, + "step": 3394500 + }, + { + "epoch": 16.82, + "learning_rate": 4.159302279494459e-05, + "loss": 2.3607, + "step": 3395000 + }, + { + "epoch": 16.82, + "learning_rate": 4.1591784208518505e-05, + "loss": 2.3917, + "step": 3395500 + }, + { + "epoch": 16.82, + "learning_rate": 4.159054562209242e-05, + "loss": 2.3518, + "step": 3396000 + }, + { + "epoch": 16.83, + "learning_rate": 4.158930703566634e-05, + "loss": 2.3856, + "step": 3396500 + }, + { + "epoch": 16.83, + "learning_rate": 4.1588068449240256e-05, + "loss": 2.387, + "step": 3397000 + }, + { + "epoch": 16.83, + "learning_rate": 4.1586829862814166e-05, + "loss": 2.3707, + "step": 3397500 + }, + { + "epoch": 16.83, + "learning_rate": 4.158559127638808e-05, + "loss": 2.3603, + "step": 3398000 + }, + { + "epoch": 16.84, + "learning_rate": 4.158435764430771e-05, + "loss": 2.3707, + "step": 3398500 + }, + { + "epoch": 16.84, + "learning_rate": 4.158311905788163e-05, + "loss": 2.3861, + "step": 3399000 + }, + { + "epoch": 16.84, + "learning_rate": 4.158188047145554e-05, + "loss": 2.3695, + "step": 3399500 + }, + { + "epoch": 16.84, + "learning_rate": 4.1580641885029455e-05, + "loss": 2.3408, + "step": 3400000 + }, + { + "epoch": 16.85, + "learning_rate": 4.157940329860337e-05, + "loss": 2.3676, + "step": 3400500 + }, + { + "epoch": 16.85, + "learning_rate": 4.157816718935014e-05, + "loss": 2.3638, + "step": 3401000 + }, + { + "epoch": 16.85, + "learning_rate": 4.157692860292406e-05, + "loss": 2.3828, + "step": 3401500 + }, + { + "epoch": 16.85, + "learning_rate": 4.1575690016497974e-05, + "loss": 2.3853, + "step": 3402000 + }, + { + "epoch": 16.86, + "learning_rate": 4.1574451430071884e-05, + "loss": 2.3722, + "step": 3402500 + }, + { + "epoch": 16.86, + "learning_rate": 4.15732128436458e-05, + "loss": 2.3818, + "step": 3403000 + }, + { + "epoch": 16.86, + "learning_rate": 4.157197425721972e-05, + "loss": 2.3493, + "step": 3403500 + }, + { + "epoch": 16.86, + "learning_rate": 4.1570735670793635e-05, + "loss": 2.3795, + "step": 3404000 + }, + { + "epoch": 16.87, + "learning_rate": 4.156949708436755e-05, + "loss": 2.3624, + "step": 3404500 + }, + { + "epoch": 16.87, + "learning_rate": 4.156826097511433e-05, + "loss": 2.3783, + "step": 3405000 + }, + { + "epoch": 16.87, + "learning_rate": 4.1567022388688245e-05, + "loss": 2.379, + "step": 3405500 + }, + { + "epoch": 16.87, + "learning_rate": 4.1565783802262155e-05, + "loss": 2.3758, + "step": 3406000 + }, + { + "epoch": 16.88, + "learning_rate": 4.156454521583607e-05, + "loss": 2.3549, + "step": 3406500 + }, + { + "epoch": 16.88, + "learning_rate": 4.156330662940999e-05, + "loss": 2.3986, + "step": 3407000 + }, + { + "epoch": 16.88, + "learning_rate": 4.1562068042983905e-05, + "loss": 2.3615, + "step": 3407500 + }, + { + "epoch": 16.88, + "learning_rate": 4.156082945655782e-05, + "loss": 2.3583, + "step": 3408000 + }, + { + "epoch": 16.89, + "learning_rate": 4.155959334730459e-05, + "loss": 2.3854, + "step": 3408500 + }, + { + "epoch": 16.89, + "learning_rate": 4.15583547608785e-05, + "loss": 2.3673, + "step": 3409000 + }, + { + "epoch": 16.89, + "learning_rate": 4.155711617445242e-05, + "loss": 2.3642, + "step": 3409500 + }, + { + "epoch": 16.89, + "learning_rate": 4.1555877588026335e-05, + "loss": 2.3299, + "step": 3410000 + }, + { + "epoch": 16.9, + "learning_rate": 4.155463900160025e-05, + "loss": 2.3807, + "step": 3410500 + }, + { + "epoch": 16.9, + "learning_rate": 4.155340041517417e-05, + "loss": 2.3767, + "step": 3411000 + }, + { + "epoch": 16.9, + "learning_rate": 4.1552161828748086e-05, + "loss": 2.3789, + "step": 3411500 + }, + { + "epoch": 16.9, + "learning_rate": 4.1550923242322e-05, + "loss": 2.3876, + "step": 3412000 + }, + { + "epoch": 16.91, + "learning_rate": 4.154968713306877e-05, + "loss": 2.3517, + "step": 3412500 + }, + { + "epoch": 16.91, + "learning_rate": 4.154844854664269e-05, + "loss": 2.4007, + "step": 3413000 + }, + { + "epoch": 16.91, + "learning_rate": 4.1547209960216606e-05, + "loss": 2.3944, + "step": 3413500 + }, + { + "epoch": 16.91, + "learning_rate": 4.154597137379052e-05, + "loss": 2.3647, + "step": 3414000 + }, + { + "epoch": 16.92, + "learning_rate": 4.154473278736444e-05, + "loss": 2.3501, + "step": 3414500 + }, + { + "epoch": 16.92, + "learning_rate": 4.1543494200938356e-05, + "loss": 2.3941, + "step": 3415000 + }, + { + "epoch": 16.92, + "learning_rate": 4.154225561451227e-05, + "loss": 2.3809, + "step": 3415500 + }, + { + "epoch": 16.92, + "learning_rate": 4.154101702808619e-05, + "loss": 2.3663, + "step": 3416000 + }, + { + "epoch": 16.93, + "learning_rate": 4.153978091883295e-05, + "loss": 2.3529, + "step": 3416500 + }, + { + "epoch": 16.93, + "learning_rate": 4.153854233240687e-05, + "loss": 2.3712, + "step": 3417000 + }, + { + "epoch": 16.93, + "learning_rate": 4.1537306223153645e-05, + "loss": 2.3917, + "step": 3417500 + }, + { + "epoch": 16.93, + "learning_rate": 4.153606763672756e-05, + "loss": 2.3671, + "step": 3418000 + }, + { + "epoch": 16.94, + "learning_rate": 4.153482905030147e-05, + "loss": 2.3691, + "step": 3418500 + }, + { + "epoch": 16.94, + "learning_rate": 4.153359046387539e-05, + "loss": 2.3647, + "step": 3419000 + }, + { + "epoch": 16.94, + "learning_rate": 4.1532351877449306e-05, + "loss": 2.3684, + "step": 3419500 + }, + { + "epoch": 16.94, + "learning_rate": 4.153111329102322e-05, + "loss": 2.3566, + "step": 3420000 + }, + { + "epoch": 16.95, + "learning_rate": 4.152987718176999e-05, + "loss": 2.3672, + "step": 3420500 + }, + { + "epoch": 16.95, + "learning_rate": 4.152863859534391e-05, + "loss": 2.383, + "step": 3421000 + }, + { + "epoch": 16.95, + "learning_rate": 4.152740248609068e-05, + "loss": 2.3671, + "step": 3421500 + }, + { + "epoch": 16.95, + "learning_rate": 4.1526163899664594e-05, + "loss": 2.3578, + "step": 3422000 + }, + { + "epoch": 16.96, + "learning_rate": 4.152492531323851e-05, + "loss": 2.3837, + "step": 3422500 + }, + { + "epoch": 16.96, + "learning_rate": 4.152368672681243e-05, + "loss": 2.3824, + "step": 3423000 + }, + { + "epoch": 16.96, + "learning_rate": 4.1522448140386345e-05, + "loss": 2.3624, + "step": 3423500 + }, + { + "epoch": 16.96, + "learning_rate": 4.152120955396026e-05, + "loss": 2.3848, + "step": 3424000 + }, + { + "epoch": 16.97, + "learning_rate": 4.151997096753417e-05, + "loss": 2.379, + "step": 3424500 + }, + { + "epoch": 16.97, + "learning_rate": 4.151873238110809e-05, + "loss": 2.3548, + "step": 3425000 + }, + { + "epoch": 16.97, + "learning_rate": 4.1517493794682006e-05, + "loss": 2.363, + "step": 3425500 + }, + { + "epoch": 16.97, + "learning_rate": 4.151625520825592e-05, + "loss": 2.3791, + "step": 3426000 + }, + { + "epoch": 16.98, + "learning_rate": 4.151501662182984e-05, + "loss": 2.3776, + "step": 3426500 + }, + { + "epoch": 16.98, + "learning_rate": 4.151378051257661e-05, + "loss": 2.3708, + "step": 3427000 + }, + { + "epoch": 16.98, + "learning_rate": 4.1512541926150525e-05, + "loss": 2.3777, + "step": 3427500 + }, + { + "epoch": 16.98, + "learning_rate": 4.1511303339724436e-05, + "loss": 2.3577, + "step": 3428000 + }, + { + "epoch": 16.99, + "learning_rate": 4.151006475329835e-05, + "loss": 2.3726, + "step": 3428500 + }, + { + "epoch": 16.99, + "learning_rate": 4.150882616687227e-05, + "loss": 2.393, + "step": 3429000 + }, + { + "epoch": 16.99, + "learning_rate": 4.1507590057619045e-05, + "loss": 2.356, + "step": 3429500 + }, + { + "epoch": 16.99, + "learning_rate": 4.150635147119296e-05, + "loss": 2.3563, + "step": 3430000 + }, + { + "epoch": 17.0, + "learning_rate": 4.150511288476688e-05, + "loss": 2.3842, + "step": 3430500 + }, + { + "epoch": 17.0, + "learning_rate": 4.150387429834079e-05, + "loss": 2.3678, + "step": 3431000 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.6498075948776978, + "eval_accuracy_mlm": 0.6043656048690601, + "eval_accuracy_nsp": 0.8643428943477186, + "eval_loss": 2.376680374145508, + "eval_runtime": 145.8417, + "eval_samples_per_second": 1748.189, + "eval_steps_per_second": 72.846, + "step": 3431331 } ], "max_steps": 20184300, "num_train_epochs": 100, - "total_flos": 2.6129529363964503e+18, + "total_flos": 4.441905477808464e+18, "trial_name": null, "trial_params": null }