diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,15391 +1,7936 @@ { - "best_metric": 1.0445035696029663, - "best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-05-09_21-49-53_experiment/checkpoint-248650", - "epoch": 49.99748655305886, - "global_step": 248650, + "best_metric": 1.1810568571090698, + "best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-10-31_05-26-52_experiment/checkpoint-124300", + "epoch": 49.98492007640495, + "eval_steps": 500, + "global_step": 124300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.02, - "learning_rate": 1.0054293183189222e-05, - "loss": 4.306, + "epoch": 0.04, + "learning_rate": 2.011263073209976e-05, + "loss": 4.9407, "step": 100 }, { - "epoch": 0.04, - "learning_rate": 2.0108586366378444e-05, - "loss": 3.8922, + "epoch": 0.08, + "learning_rate": 4.022526146419952e-05, + "loss": 4.4413, "step": 200 }, { - "epoch": 0.06, - "learning_rate": 3.016287954956767e-05, - "loss": 3.6512, + "epoch": 0.12, + "learning_rate": 6.033789219629928e-05, + "loss": 4.048, "step": 300 }, { - "epoch": 0.08, - "learning_rate": 4.021717273275689e-05, - "loss": 3.44, + "epoch": 0.16, + "learning_rate": 8.045052292839905e-05, + "loss": 3.6757, "step": 400 }, { - "epoch": 0.1, - "learning_rate": 5.027146591594611e-05, - "loss": 3.2164, + "epoch": 0.2, + "learning_rate": 0.0001005631536604988, + "loss": 3.3122, "step": 500 }, { - "epoch": 0.12, - "learning_rate": 6.032575909913534e-05, - "loss": 3.0072, + "epoch": 0.24, + "learning_rate": 0.00012067578439259856, + "loss": 2.9853, "step": 600 }, { - "epoch": 0.14, - "learning_rate": 7.038005228232456e-05, - "loss": 2.8143, + "epoch": 0.28, + "learning_rate": 0.0001407884151246983, + "loss": 2.7396, "step": 700 }, { - "epoch": 0.16, - "learning_rate": 8.043434546551378e-05, - "loss": 2.6472, + "epoch": 0.32, + "learning_rate": 0.0001609010458567981, + "loss": 2.5627, "step": 800 }, { - "epoch": 0.18, - "learning_rate": 9.048863864870299e-05, - "loss": 2.51, + "epoch": 0.36, + "learning_rate": 0.00018101367658889783, + "loss": 2.4278, "step": 900 }, { - "epoch": 0.2, - "learning_rate": 0.00010054293183189222, - "loss": 2.3891, + "epoch": 0.4, + "learning_rate": 0.0002011263073209976, + "loss": 2.3221, "step": 1000 }, { - "epoch": 0.22, - "learning_rate": 0.00011059722501508144, - "loss": 2.2905, + "epoch": 0.44, + "learning_rate": 0.00022123893805309734, + "loss": 2.2408, "step": 1100 }, { - "epoch": 0.24, - "learning_rate": 0.00012065151819827068, - "loss": 2.2056, + "epoch": 0.48, + "learning_rate": 0.0002413515687851971, + "loss": 2.1777, "step": 1200 }, { - "epoch": 0.26, - "learning_rate": 0.00013070581138145988, - "loss": 2.137, + "epoch": 0.52, + "learning_rate": 0.0002614641995172969, + "loss": 2.1231, "step": 1300 }, { - "epoch": 0.28, - "learning_rate": 0.00014076010456464912, - "loss": 2.0792, + "epoch": 0.56, + "learning_rate": 0.0002815768302493966, + "loss": 2.0787, "step": 1400 }, { - "epoch": 0.3, - "learning_rate": 0.00015081439774783834, - "loss": 2.0301, + "epoch": 0.6, + "learning_rate": 0.00030168946098149636, + "loss": 2.039, "step": 1500 }, { - "epoch": 0.32, - "learning_rate": 0.00016086869093102755, - "loss": 1.9879, + "epoch": 0.64, + "learning_rate": 0.0003218020917135962, + "loss": 2.0056, "step": 1600 }, { - "epoch": 0.34, - "learning_rate": 0.00017092298411421677, - "loss": 1.9499, + "epoch": 0.68, + "learning_rate": 0.0003419147224456959, + "loss": 1.9805, "step": 1700 }, { - "epoch": 0.36, - "learning_rate": 0.00018097727729740598, - "loss": 1.9279, + "epoch": 0.72, + "learning_rate": 0.00036202735317779567, + "loss": 1.9495, "step": 1800 }, { - "epoch": 0.38, - "learning_rate": 0.0001910315704805952, - "loss": 1.8925, + "epoch": 0.76, + "learning_rate": 0.0003821399839098954, + "loss": 1.927, "step": 1900 }, { - "epoch": 0.4, - "learning_rate": 0.00020108586366378444, - "loss": 1.8673, + "epoch": 0.8, + "learning_rate": 0.0004022526146419952, + "loss": 1.9033, "step": 2000 }, { - "epoch": 0.42, - "learning_rate": 0.00021114015684697366, - "loss": 1.8431, + "epoch": 0.84, + "learning_rate": 0.00042236524537409497, + "loss": 1.8861, "step": 2100 }, { - "epoch": 0.44, - "learning_rate": 0.00022119445003016287, - "loss": 1.8236, + "epoch": 0.88, + "learning_rate": 0.0004424778761061947, + "loss": 1.8659, "step": 2200 }, { - "epoch": 0.46, - "learning_rate": 0.0002312487432133521, - "loss": 1.8023, + "epoch": 0.92, + "learning_rate": 0.0004625905068382945, + "loss": 1.849, "step": 2300 }, { - "epoch": 0.48, - "learning_rate": 0.00024130303639654136, - "loss": 1.7889, + "epoch": 0.97, + "learning_rate": 0.0004827031375703942, + "loss": 1.8349, "step": 2400 }, { - "epoch": 0.5, - "learning_rate": 0.00025135732957973054, - "loss": 1.7671, + "epoch": 1.0, + "eval_accuracy": 0.3811347119515808, + "eval_loss": 1.7538774013519287, + "eval_runtime": 21.4451, + "eval_samples_per_second": 3710.317, + "eval_steps_per_second": 14.502, + "step": 2486 + }, + { + "epoch": 1.01, + "learning_rate": 0.000502815768302494, + "loss": 1.8161, "step": 2500 }, { - "epoch": 0.52, - "learning_rate": 0.00026141162276291976, - "loss": 1.7506, + "epoch": 1.05, + "learning_rate": 0.0005229283990345938, + "loss": 1.8011, "step": 2600 }, { - "epoch": 0.54, - "learning_rate": 0.00027146591594610903, - "loss": 1.737, + "epoch": 1.09, + "learning_rate": 0.0005430410297666935, + "loss": 1.7851, "step": 2700 }, { - "epoch": 0.56, - "learning_rate": 0.00028152020912929824, - "loss": 1.7272, + "epoch": 1.13, + "learning_rate": 0.0005631536604987932, + "loss": 1.7762, "step": 2800 }, { - "epoch": 0.58, - "learning_rate": 0.00029157450231248746, - "loss": 1.7112, + "epoch": 1.17, + "learning_rate": 0.0005832662912308931, + "loss": 1.7612, "step": 2900 }, { - "epoch": 0.6, - "learning_rate": 0.0003016287954956767, - "loss": 1.699, + "epoch": 1.21, + "learning_rate": 0.0006033789219629927, + "loss": 1.7521, "step": 3000 }, { - "epoch": 0.62, - "learning_rate": 0.0003116830886788659, - "loss": 1.6933, + "epoch": 1.25, + "learning_rate": 0.0006234915526950925, + "loss": 1.739, "step": 3100 }, { - "epoch": 0.64, - "learning_rate": 0.0003217373818620551, - "loss": 1.6788, + "epoch": 1.29, + "learning_rate": 0.0006436041834271924, + "loss": 1.7296, "step": 3200 }, { - "epoch": 0.66, - "learning_rate": 0.0003317916750452443, - "loss": 1.6694, + "epoch": 1.33, + "learning_rate": 0.0006637168141592921, + "loss": 1.7195, "step": 3300 }, { - "epoch": 0.68, - "learning_rate": 0.00034184596822843354, - "loss": 1.6607, + "epoch": 1.37, + "learning_rate": 0.0006838294448913918, + "loss": 1.7106, "step": 3400 }, { - "epoch": 0.7, - "learning_rate": 0.00035190026141162275, - "loss": 1.6534, + "epoch": 1.41, + "learning_rate": 0.0007039420756234916, + "loss": 1.7033, "step": 3500 }, { - "epoch": 0.72, - "learning_rate": 0.00036195455459481197, - "loss": 1.645, + "epoch": 1.45, + "learning_rate": 0.0007240547063555913, + "loss": 1.6935, "step": 3600 }, { - "epoch": 0.74, - "learning_rate": 0.0003720088477780012, - "loss": 1.632, + "epoch": 1.49, + "learning_rate": 0.0007441673370876911, + "loss": 1.6846, "step": 3700 }, { - "epoch": 0.76, - "learning_rate": 0.0003820631409611904, - "loss": 1.6229, + "epoch": 1.53, + "learning_rate": 0.0007642799678197908, + "loss": 1.6774, "step": 3800 }, { - "epoch": 0.78, - "learning_rate": 0.00039211743414437967, - "loss": 1.6161, + "epoch": 1.57, + "learning_rate": 0.0007843925985518905, + "loss": 1.6706, "step": 3900 }, { - "epoch": 0.8, - "learning_rate": 0.0004021717273275689, - "loss": 1.6061, + "epoch": 1.61, + "learning_rate": 0.0008045052292839904, + "loss": 1.6594, "step": 4000 }, { - "epoch": 0.82, - "learning_rate": 0.0004122260205107581, - "loss": 1.6006, + "epoch": 1.65, + "learning_rate": 0.0008246178600160902, + "loss": 1.6572, "step": 4100 }, { - "epoch": 0.84, - "learning_rate": 0.0004222803136939473, - "loss": 1.5875, + "epoch": 1.69, + "learning_rate": 0.0008447304907481899, + "loss": 1.649, "step": 4200 }, { - "epoch": 0.86, - "learning_rate": 0.00043233460687713653, - "loss": 1.5828, + "epoch": 1.73, + "learning_rate": 0.0008648431214802896, + "loss": 1.6442, "step": 4300 }, { - "epoch": 0.88, - "learning_rate": 0.00044238890006032574, - "loss": 1.5849, + "epoch": 1.77, + "learning_rate": 0.0008849557522123894, + "loss": 1.6377, "step": 4400 }, { - "epoch": 0.9, - "learning_rate": 0.00045244319324351496, - "loss": 1.572, + "epoch": 1.81, + "learning_rate": 0.0009050683829444891, + "loss": 1.6332, "step": 4500 }, { - "epoch": 0.92, - "learning_rate": 0.0004624974864267042, - "loss": 1.565, + "epoch": 1.85, + "learning_rate": 0.000925181013676589, + "loss": 1.6288, "step": 4600 }, { - "epoch": 0.95, - "learning_rate": 0.0004725517796098934, - "loss": 1.5589, + "epoch": 1.89, + "learning_rate": 0.0009452936444086887, + "loss": 1.6221, "step": 4700 }, { - "epoch": 0.97, - "learning_rate": 0.0004826060727930827, - "loss": 1.5519, + "epoch": 1.93, + "learning_rate": 0.0009654062751407884, + "loss": 1.6152, "step": 4800 }, { - "epoch": 0.99, - "learning_rate": 0.0004926603659762719, - "loss": 1.5489, + "epoch": 1.97, + "learning_rate": 0.000985518905872888, + "loss": 1.6122, "step": 4900 }, { - "epoch": 1.0, - "eval_accuracy": 0.38639571407117423, - "eval_loss": 1.490514874458313, - "eval_runtime": 19.6663, - "eval_samples_per_second": 4045.908, - "eval_steps_per_second": 15.814, + "epoch": 2.0, + "eval_accuracy": 0.3949071464783068, + "eval_loss": 1.5559048652648926, + "eval_runtime": 17.9618, + "eval_samples_per_second": 4429.835, + "eval_steps_per_second": 17.314, "step": 4973 }, { - "epoch": 1.01, - "learning_rate": 0.0005027146591594611, - "loss": 1.5386, + "epoch": 2.01, + "learning_rate": 0.001005631536604988, + "loss": 1.6094, "step": 5000 }, { - "epoch": 1.03, - "learning_rate": 0.0005127689523426503, - "loss": 1.5361, + "epoch": 2.05, + "learning_rate": 0.0010257441673370879, + "loss": 1.6001, "step": 5100 }, { - "epoch": 1.05, - "learning_rate": 0.0005228232455258395, - "loss": 1.5279, + "epoch": 2.09, + "learning_rate": 0.0010458567980691875, + "loss": 1.5933, "step": 5200 }, { - "epoch": 1.07, - "learning_rate": 0.0005328775387090288, - "loss": 1.5273, + "epoch": 2.13, + "learning_rate": 0.0010659694288012872, + "loss": 1.5933, "step": 5300 }, { - "epoch": 1.09, - "learning_rate": 0.0005429318318922181, - "loss": 1.5192, + "epoch": 2.17, + "learning_rate": 0.001086082059533387, + "loss": 1.5913, "step": 5400 }, { - "epoch": 1.11, - "learning_rate": 0.0005529861250754073, - "loss": 1.5175, + "epoch": 2.21, + "learning_rate": 0.0011061946902654867, + "loss": 1.5864, "step": 5500 }, { - "epoch": 1.13, - "learning_rate": 0.0005630404182585965, - "loss": 1.5165, + "epoch": 2.25, + "learning_rate": 0.0011263073209975864, + "loss": 1.5847, "step": 5600 }, { - "epoch": 1.15, - "learning_rate": 0.0005730947114417857, - "loss": 1.5102, + "epoch": 2.29, + "learning_rate": 0.0011464199517296862, + "loss": 1.5822, "step": 5700 }, { - "epoch": 1.17, - "learning_rate": 0.0005831490046249749, - "loss": 1.5006, + "epoch": 2.33, + "learning_rate": 0.0011665325824617861, + "loss": 1.5794, "step": 5800 }, { - "epoch": 1.19, - "learning_rate": 0.0005932032978081641, - "loss": 1.4952, + "epoch": 2.37, + "learning_rate": 0.0011864440868865648, + "loss": 1.5771, "step": 5900 }, { - "epoch": 1.21, - "learning_rate": 0.0006032575909913533, - "loss": 1.4956, + "epoch": 2.41, + "learning_rate": 0.0012065567176186645, + "loss": 1.5719, "step": 6000 }, { - "epoch": 1.23, - "learning_rate": 0.0006133118841745426, - "loss": 1.4906, + "epoch": 2.45, + "learning_rate": 0.0012266693483507644, + "loss": 1.5731, "step": 6100 }, { - "epoch": 1.25, - "learning_rate": 0.0006233661773577318, - "loss": 1.4845, + "epoch": 2.49, + "learning_rate": 0.001246781979082864, + "loss": 1.5661, "step": 6200 }, { - "epoch": 1.27, - "learning_rate": 0.0006334204705409209, - "loss": 1.4826, + "epoch": 2.53, + "learning_rate": 0.001266894609814964, + "loss": 1.5704, "step": 6300 }, { - "epoch": 1.29, - "learning_rate": 0.0006434747637241102, - "loss": 1.4844, + "epoch": 2.57, + "learning_rate": 0.0012870072405470638, + "loss": 1.5659, "step": 6400 }, { - "epoch": 1.31, - "learning_rate": 0.0006535290569072995, - "loss": 1.4789, + "epoch": 2.61, + "learning_rate": 0.0013071198712791632, + "loss": 1.5616, "step": 6500 }, { - "epoch": 1.33, - "learning_rate": 0.0006635833500904886, - "loss": 1.474, + "epoch": 2.65, + "learning_rate": 0.001327232502011263, + "loss": 1.5614, "step": 6600 }, { - "epoch": 1.35, - "learning_rate": 0.000673637643273678, - "loss": 1.4716, + "epoch": 2.69, + "learning_rate": 0.0013473451327433628, + "loss": 1.5582, "step": 6700 }, { - "epoch": 1.37, - "learning_rate": 0.0006836919364568671, - "loss": 1.471, + "epoch": 2.73, + "learning_rate": 0.0013674577634754626, + "loss": 1.5545, "step": 6800 }, { - "epoch": 1.39, - "learning_rate": 0.0006937462296400564, - "loss": 1.4675, + "epoch": 2.77, + "learning_rate": 0.0013875703942075623, + "loss": 1.5547, "step": 6900 }, { - "epoch": 1.41, - "learning_rate": 0.0007038005228232455, - "loss": 1.4627, + "epoch": 2.81, + "learning_rate": 0.0014076830249396622, + "loss": 1.5548, "step": 7000 }, { - "epoch": 1.43, - "learning_rate": 0.0007138548160064348, - "loss": 1.4575, + "epoch": 2.86, + "learning_rate": 0.001427795655671762, + "loss": 1.55, "step": 7100 }, { - "epoch": 1.45, - "learning_rate": 0.0007239091091896239, - "loss": 1.4615, + "epoch": 2.9, + "learning_rate": 0.0014479082864038617, + "loss": 1.5533, "step": 7200 }, { - "epoch": 1.47, - "learning_rate": 0.0007339634023728133, - "loss": 1.451, + "epoch": 2.94, + "learning_rate": 0.0014680209171359616, + "loss": 1.5483, "step": 7300 }, { - "epoch": 1.49, - "learning_rate": 0.0007440176955560024, - "loss": 1.4533, + "epoch": 2.98, + "learning_rate": 0.001488133547868061, + "loss": 1.5457, "step": 7400 }, { - "epoch": 1.51, - "learning_rate": 0.0007540719887391917, - "loss": 1.4469, + "epoch": 3.0, + "eval_accuracy": 0.39967479317894095, + "eval_loss": 1.4914867877960205, + "eval_runtime": 18.1883, + "eval_samples_per_second": 4374.679, + "eval_steps_per_second": 17.099, + "step": 7460 + }, + { + "epoch": 3.02, + "learning_rate": 0.001508246178600161, + "loss": 1.5441, "step": 7500 }, { - "epoch": 1.53, - "learning_rate": 0.0007641262819223808, - "loss": 1.4471, + "epoch": 3.06, + "learning_rate": 0.0015283588093322606, + "loss": 1.5386, "step": 7600 }, { - "epoch": 1.55, - "learning_rate": 0.0007741805751055701, - "loss": 1.4433, + "epoch": 3.1, + "learning_rate": 0.0015484714400643604, + "loss": 1.5364, "step": 7700 }, { - "epoch": 1.57, - "learning_rate": 0.0007842348682887593, - "loss": 1.4447, + "epoch": 3.14, + "learning_rate": 0.0015685840707964603, + "loss": 1.536, "step": 7800 }, { - "epoch": 1.59, - "learning_rate": 0.0007942891614719485, - "loss": 1.4389, + "epoch": 3.18, + "learning_rate": 0.00158869670152856, + "loss": 1.5351, "step": 7900 }, { - "epoch": 1.61, - "learning_rate": 0.0008043434546551378, - "loss": 1.4352, + "epoch": 3.22, + "learning_rate": 0.0016088093322606598, + "loss": 1.5372, "step": 8000 }, { - "epoch": 1.63, - "learning_rate": 0.000814397747838327, - "loss": 1.432, + "epoch": 3.26, + "learning_rate": 0.0016289219629927595, + "loss": 1.5407, "step": 8100 }, { - "epoch": 1.65, - "learning_rate": 0.0008244520410215162, - "loss": 1.4366, + "epoch": 3.3, + "learning_rate": 0.0016490345937248594, + "loss": 1.5338, "step": 8200 }, { - "epoch": 1.67, - "learning_rate": 0.0008345063342047054, - "loss": 1.4331, + "epoch": 3.34, + "learning_rate": 0.0016691472244569588, + "loss": 1.534, "step": 8300 }, { - "epoch": 1.69, - "learning_rate": 0.0008445606273878946, - "loss": 1.4331, + "epoch": 3.38, + "learning_rate": 0.0016892598551890587, + "loss": 1.5331, "step": 8400 }, { - "epoch": 1.71, - "learning_rate": 0.0008546149205710838, - "loss": 1.4229, + "epoch": 3.42, + "learning_rate": 0.0017093724859211584, + "loss": 1.534, "step": 8500 }, { - "epoch": 1.73, - "learning_rate": 0.0008646692137542731, - "loss": 1.4255, + "epoch": 3.46, + "learning_rate": 0.0017294851166532582, + "loss": 1.5347, "step": 8600 }, { - "epoch": 1.75, - "learning_rate": 0.0008747235069374624, - "loss": 1.4223, + "epoch": 3.5, + "learning_rate": 0.0017495977473853581, + "loss": 1.5271, "step": 8700 }, { - "epoch": 1.77, - "learning_rate": 0.0008847778001206515, - "loss": 1.4239, + "epoch": 3.54, + "learning_rate": 0.0017697103781174578, + "loss": 1.5301, "step": 8800 }, { - "epoch": 1.79, - "learning_rate": 0.0008948320933038408, - "loss": 1.4214, + "epoch": 3.58, + "learning_rate": 0.0017898230088495577, + "loss": 1.5301, "step": 8900 }, { - "epoch": 1.81, - "learning_rate": 0.0009048863864870299, - "loss": 1.4187, + "epoch": 3.62, + "learning_rate": 0.0018099356395816573, + "loss": 1.5266, "step": 9000 }, { - "epoch": 1.83, - "learning_rate": 0.0009149406796702192, - "loss": 1.4181, + "epoch": 3.66, + "learning_rate": 0.0018300482703137572, + "loss": 1.5283, "step": 9100 }, { - "epoch": 1.85, - "learning_rate": 0.0009249949728534083, - "loss": 1.4188, + "epoch": 3.7, + "learning_rate": 0.0018501609010458566, + "loss": 1.5276, "step": 9200 }, { - "epoch": 1.87, - "learning_rate": 0.0009350492660365977, - "loss": 1.4166, + "epoch": 3.74, + "learning_rate": 0.0018702735317779565, + "loss": 1.5259, "step": 9300 }, { - "epoch": 1.89, - "learning_rate": 0.0009451035592197868, - "loss": 1.4089, + "epoch": 3.78, + "learning_rate": 0.0018903861625100564, + "loss": 1.5223, "step": 9400 }, { - "epoch": 1.91, - "learning_rate": 0.0009551578524029761, - "loss": 1.4085, + "epoch": 3.82, + "learning_rate": 0.001910498793242156, + "loss": 1.5289, "step": 9500 }, { - "epoch": 1.93, - "learning_rate": 0.0009652121455861654, - "loss": 1.4111, + "epoch": 3.86, + "learning_rate": 0.001930611423974256, + "loss": 1.5279, "step": 9600 }, { - "epoch": 1.95, - "learning_rate": 0.0009752664387693545, - "loss": 1.4065, + "epoch": 3.9, + "learning_rate": 0.0019507240547063556, + "loss": 1.5226, "step": 9700 }, { - "epoch": 1.97, - "learning_rate": 0.0009853207319525437, - "loss": 1.4065, + "epoch": 3.94, + "learning_rate": 0.0019708366854384552, + "loss": 1.5237, "step": 9800 }, { - "epoch": 1.99, - "learning_rate": 0.000995375025135733, - "loss": 1.4079, + "epoch": 3.98, + "learning_rate": 0.001990949316170555, + "loss": 1.5245, "step": 9900 }, { - "epoch": 2.0, - "eval_accuracy": 0.39677088556687123, - "eval_loss": 1.357865333557129, - "eval_runtime": 19.5955, - "eval_samples_per_second": 4060.518, - "eval_steps_per_second": 15.871, - "step": 9946 + "epoch": 4.0, + "eval_accuracy": 0.40115904050888884, + "eval_loss": 1.47209894657135, + "eval_runtime": 18.1424, + "eval_samples_per_second": 4385.747, + "eval_steps_per_second": 17.142, + "step": 9947 }, { - "epoch": 2.01, - "learning_rate": 0.0010054293183189222, - "loss": 1.4024, + "epoch": 4.02, + "learning_rate": 0.002011061946902655, + "loss": 1.5211, "step": 10000 }, { - "epoch": 2.03, - "learning_rate": 0.0010154836115021115, - "loss": 1.3983, + "epoch": 4.06, + "learning_rate": 0.0020311745776347544, + "loss": 1.5108, "step": 10100 }, { - "epoch": 2.05, - "learning_rate": 0.0010255379046853006, - "loss": 1.3965, + "epoch": 4.1, + "learning_rate": 0.0020512872083668543, + "loss": 1.5157, "step": 10200 }, { - "epoch": 2.07, - "learning_rate": 0.00103559219786849, - "loss": 1.3945, + "epoch": 4.14, + "learning_rate": 0.002071399839098954, + "loss": 1.5155, "step": 10300 }, { - "epoch": 2.09, - "learning_rate": 0.0010455459481198471, - "loss": 1.3989, + "epoch": 4.18, + "learning_rate": 0.002091512469831054, + "loss": 1.5171, "step": 10400 }, { - "epoch": 2.11, - "learning_rate": 0.0010556002413030364, - "loss": 1.3975, + "epoch": 4.22, + "learning_rate": 0.0021116251005631535, + "loss": 1.5195, "step": 10500 }, { - "epoch": 2.13, - "learning_rate": 0.0010655539915543938, - "loss": 1.3924, + "epoch": 4.26, + "learning_rate": 0.0021317377312952534, + "loss": 1.5162, "step": 10600 }, { - "epoch": 2.15, - "learning_rate": 0.001075608284737583, - "loss": 1.3933, + "epoch": 4.3, + "learning_rate": 0.0021518503620273533, + "loss": 1.5174, "step": 10700 }, { - "epoch": 2.17, - "learning_rate": 0.0010856625779207723, - "loss": 1.394, + "epoch": 4.34, + "learning_rate": 0.002171962992759453, + "loss": 1.5163, "step": 10800 }, { - "epoch": 2.19, - "learning_rate": 0.0010957168711039614, - "loss": 1.3945, + "epoch": 4.38, + "learning_rate": 0.002192075623491553, + "loss": 1.5158, "step": 10900 }, { - "epoch": 2.21, - "learning_rate": 0.0011057711642871507, - "loss": 1.3929, + "epoch": 4.42, + "learning_rate": 0.0022121882542236525, + "loss": 1.516, "step": 11000 }, { - "epoch": 2.23, - "learning_rate": 0.0011158254574703398, - "loss": 1.3943, + "epoch": 4.46, + "learning_rate": 0.0022323008849557523, + "loss": 1.5143, "step": 11100 }, { - "epoch": 2.25, - "learning_rate": 0.0011258797506535291, - "loss": 1.3893, + "epoch": 4.5, + "learning_rate": 0.0022524135156878518, + "loss": 1.5163, "step": 11200 }, { - "epoch": 2.27, - "learning_rate": 0.0011359340438367182, - "loss": 1.3908, + "epoch": 4.54, + "learning_rate": 0.0022725261464199517, + "loss": 1.5128, "step": 11300 }, { - "epoch": 2.29, - "learning_rate": 0.0011459883370199076, - "loss": 1.3887, + "epoch": 4.58, + "learning_rate": 0.0022926387771520515, + "loss": 1.5193, "step": 11400 }, { - "epoch": 2.31, - "learning_rate": 0.0011560426302030967, - "loss": 1.3902, + "epoch": 4.62, + "learning_rate": 0.0023127514078841514, + "loss": 1.5113, "step": 11500 }, { - "epoch": 2.33, - "learning_rate": 0.001166096923386286, - "loss": 1.3934, + "epoch": 4.66, + "learning_rate": 0.0023328640386162513, + "loss": 1.5149, "step": 11600 }, { - "epoch": 2.35, - "learning_rate": 0.001176151216569475, - "loss": 1.3857, + "epoch": 4.7, + "learning_rate": 0.0023529766693483507, + "loss": 1.5127, "step": 11700 }, { - "epoch": 2.37, - "learning_rate": 0.0011862055097526644, - "loss": 1.3885, + "epoch": 4.75, + "learning_rate": 0.0023730893000804506, + "loss": 1.5126, "step": 11800 }, { - "epoch": 2.39, - "learning_rate": 0.0011962598029358535, - "loss": 1.3847, + "epoch": 4.79, + "learning_rate": 0.00239320193081255, + "loss": 1.5118, "step": 11900 }, { - "epoch": 2.41, - "learning_rate": 0.0012063140961190428, - "loss": 1.3863, + "epoch": 4.83, + "learning_rate": 0.00241331456154465, + "loss": 1.5127, "step": 12000 }, { - "epoch": 2.43, - "learning_rate": 0.001216368389302232, - "loss": 1.3846, + "epoch": 4.87, + "learning_rate": 0.00243342719227675, + "loss": 1.5136, "step": 12100 }, { - "epoch": 2.45, - "learning_rate": 0.0012264226824854213, - "loss": 1.3815, + "epoch": 4.91, + "learning_rate": 0.0024535398230088497, + "loss": 1.513, "step": 12200 }, { - "epoch": 2.47, - "learning_rate": 0.0012364769756686104, - "loss": 1.3876, + "epoch": 4.95, + "learning_rate": 0.0024736524537409496, + "loss": 1.5142, "step": 12300 }, { - "epoch": 2.49, - "learning_rate": 0.0012465312688517997, - "loss": 1.3812, + "epoch": 4.99, + "learning_rate": 0.002493765084473049, + "loss": 1.5146, "step": 12400 }, { - "epoch": 2.51, - "learning_rate": 0.0012565855620349888, - "loss": 1.3838, + "epoch": 5.0, + "eval_accuracy": 0.4013220687487078, + "eval_loss": 1.4687080383300781, + "eval_runtime": 18.9065, + "eval_samples_per_second": 4208.5, + "eval_steps_per_second": 16.449, + "step": 12433 + }, + { + "epoch": 5.03, + "learning_rate": 0.0024999976533380463, + "loss": 1.5061, "step": 12500 }, { - "epoch": 2.53, - "learning_rate": 0.0012666398552181781, - "loss": 1.3891, + "epoch": 5.07, + "learning_rate": 0.0024999859225147613, + "loss": 1.502, "step": 12600 }, { - "epoch": 2.55, - "learning_rate": 0.0012766941484013675, - "loss": 1.3852, + "epoch": 5.11, + "learning_rate": 0.0024999643339526922, + "loss": 1.5045, "step": 12700 }, { - "epoch": 2.57, - "learning_rate": 0.0012867484415845566, - "loss": 1.3856, + "epoch": 5.15, + "learning_rate": 0.002499932887822093, + "loss": 1.5039, "step": 12800 }, { - "epoch": 2.59, - "learning_rate": 0.0012968027347677459, - "loss": 1.3816, + "epoch": 5.19, + "learning_rate": 0.0024998915843709568, + "loss": 1.5011, "step": 12900 }, { - "epoch": 2.61, - "learning_rate": 0.001306857027950935, - "loss": 1.3808, + "epoch": 5.23, + "learning_rate": 0.0024998404239250133, + "loss": 1.5024, "step": 13000 }, { - "epoch": 2.63, - "learning_rate": 0.0013169113211341243, - "loss": 1.3808, + "epoch": 5.27, + "learning_rate": 0.002499779406887729, + "loss": 1.5024, "step": 13100 }, { - "epoch": 2.65, - "learning_rate": 0.0013269656143173134, - "loss": 1.3802, + "epoch": 5.31, + "learning_rate": 0.0024997085337403013, + "loss": 1.5007, "step": 13200 }, { - "epoch": 2.67, - "learning_rate": 0.0013370199075005028, - "loss": 1.3781, + "epoch": 5.35, + "learning_rate": 0.0024996278050416552, + "loss": 1.5031, "step": 13300 }, { - "epoch": 2.69, - "learning_rate": 0.0013470742006836919, - "loss": 1.3813, + "epoch": 5.39, + "learning_rate": 0.0024995372214284403, + "loss": 1.4963, "step": 13400 }, { - "epoch": 2.71, - "learning_rate": 0.0013571284938668812, - "loss": 1.3789, + "epoch": 5.43, + "learning_rate": 0.002499436783615024, + "loss": 1.4998, "step": 13500 }, { - "epoch": 2.73, - "learning_rate": 0.0013671827870500705, - "loss": 1.3808, + "epoch": 5.47, + "learning_rate": 0.0024993264923934867, + "loss": 1.5001, "step": 13600 }, { - "epoch": 2.75, - "learning_rate": 0.0013772370802332596, - "loss": 1.3793, + "epoch": 5.51, + "learning_rate": 0.0024992063486336162, + "loss": 1.4964, "step": 13700 }, { - "epoch": 2.77, - "learning_rate": 0.001387291373416449, - "loss": 1.3839, + "epoch": 5.55, + "learning_rate": 0.0024990763532829, + "loss": 1.4961, "step": 13800 }, { - "epoch": 2.79, - "learning_rate": 0.001397345666599638, - "loss": 1.3746, + "epoch": 5.59, + "learning_rate": 0.0024989365073665175, + "loss": 1.4939, "step": 13900 }, { - "epoch": 2.82, - "learning_rate": 0.0014072994168509952, - "loss": 1.3719, + "epoch": 5.63, + "learning_rate": 0.002498786811987333, + "loss": 1.4958, "step": 14000 }, { - "epoch": 2.84, - "learning_rate": 0.0014173537100341845, - "loss": 1.3761, + "epoch": 5.67, + "learning_rate": 0.002498627268325886, + "loss": 1.4941, "step": 14100 }, { - "epoch": 2.86, - "learning_rate": 0.0014274080032173739, - "loss": 1.3775, + "epoch": 5.71, + "learning_rate": 0.0024984578776403826, + "loss": 1.4901, "step": 14200 }, { - "epoch": 2.88, - "learning_rate": 0.0014374622964005632, - "loss": 1.3781, + "epoch": 5.75, + "learning_rate": 0.0024982786412666848, + "loss": 1.4942, "step": 14300 }, { - "epoch": 2.9, - "learning_rate": 0.0014475165895837523, - "loss": 1.376, + "epoch": 5.79, + "learning_rate": 0.002498089560618301, + "loss": 1.4931, "step": 14400 }, { - "epoch": 2.92, - "learning_rate": 0.0014575708827669414, - "loss": 1.3759, + "epoch": 5.83, + "learning_rate": 0.002497890637186374, + "loss": 1.4936, "step": 14500 }, { - "epoch": 2.94, - "learning_rate": 0.0014676251759501307, - "loss": 1.3709, + "epoch": 5.87, + "learning_rate": 0.00249768187253967, + "loss": 1.4899, "step": 14600 }, { - "epoch": 2.96, - "learning_rate": 0.00147767946913332, - "loss": 1.3724, + "epoch": 5.91, + "learning_rate": 0.0024974632683245654, + "loss": 1.4883, "step": 14700 }, { - "epoch": 2.98, - "learning_rate": 0.0014877337623165092, - "loss": 1.3792, + "epoch": 5.95, + "learning_rate": 0.002497234826265034, + "loss": 1.4887, "step": 14800 }, { - "epoch": 3.0, - "learning_rate": 0.0014977880554996985, - "loss": 1.3705, + "epoch": 5.99, + "learning_rate": 0.0024969965481626334, + "loss": 1.4858, "step": 14900 }, { - "epoch": 3.0, - "eval_accuracy": 0.3990531464846882, - "eval_loss": 1.3305182456970215, - "eval_runtime": 19.8107, - "eval_samples_per_second": 4016.413, - "eval_steps_per_second": 15.699, - "step": 14919 + "epoch": 6.0, + "eval_accuracy": 0.40323114536808624, + "eval_loss": 1.439610242843628, + "eval_runtime": 20.1557, + "eval_samples_per_second": 3947.668, + "eval_steps_per_second": 15.43, + "step": 14920 }, { - "epoch": 3.02, - "learning_rate": 0.0015078423486828876, - "loss": 1.3671, + "epoch": 6.03, + "learning_rate": 0.002496748435896492, + "loss": 1.4802, "step": 15000 }, { - "epoch": 3.04, - "learning_rate": 0.001517896641866077, - "loss": 1.3701, + "epoch": 6.07, + "learning_rate": 0.0024964904914232923, + "loss": 1.4769, "step": 15100 }, { - "epoch": 3.06, - "learning_rate": 0.001527950935049266, - "loss": 1.3686, + "epoch": 6.11, + "learning_rate": 0.002496222716777257, + "loss": 1.479, "step": 15200 }, { - "epoch": 3.08, - "learning_rate": 0.0015380052282324553, - "loss": 1.3682, + "epoch": 6.15, + "learning_rate": 0.0024959479387389807, + "loss": 1.4803, "step": 15300 }, { - "epoch": 3.1, - "learning_rate": 0.0015480595214156445, - "loss": 1.3661, + "epoch": 6.19, + "learning_rate": 0.002495660608407645, + "loss": 1.4815, "step": 15400 }, { - "epoch": 3.12, - "learning_rate": 0.0015581138145988338, - "loss": 1.3605, + "epoch": 6.23, + "learning_rate": 0.0024953634544481627, + "loss": 1.4808, "step": 15500 }, { - "epoch": 3.14, - "learning_rate": 0.0015681681077820229, - "loss": 1.3689, + "epoch": 6.27, + "learning_rate": 0.0024950564792039736, + "loss": 1.4785, "step": 15600 }, { - "epoch": 3.16, - "learning_rate": 0.0015782224009652122, - "loss": 1.3741, + "epoch": 6.31, + "learning_rate": 0.00249473968509597, + "loss": 1.4752, "step": 15700 }, { - "epoch": 3.18, - "learning_rate": 0.0015882766941484015, - "loss": 1.3674, + "epoch": 6.35, + "learning_rate": 0.002494413074622479, + "loss": 1.4801, "step": 15800 }, { - "epoch": 3.2, - "learning_rate": 0.0015983309873315906, - "loss": 1.367, + "epoch": 6.39, + "learning_rate": 0.002494076650359243, + "loss": 1.4795, "step": 15900 }, { - "epoch": 3.22, - "learning_rate": 0.0016083852805147797, - "loss": 1.3694, + "epoch": 6.43, + "learning_rate": 0.0024937304149593967, + "loss": 1.4828, "step": 16000 }, { - "epoch": 3.24, - "learning_rate": 0.001618439573697969, - "loss": 1.3683, + "epoch": 6.47, + "learning_rate": 0.00249337437115345, + "loss": 1.4783, "step": 16100 }, { - "epoch": 3.26, - "learning_rate": 0.0016284938668811584, - "loss": 1.3669, + "epoch": 6.51, + "learning_rate": 0.0024930085217492637, + "loss": 1.4761, "step": 16200 }, { - "epoch": 3.28, - "learning_rate": 0.0016385481600643475, - "loss": 1.3713, + "epoch": 6.55, + "learning_rate": 0.002492632869632029, + "loss": 1.4757, "step": 16300 }, { - "epoch": 3.3, - "learning_rate": 0.0016486024532475366, - "loss": 1.3663, + "epoch": 6.59, + "learning_rate": 0.0024922474177642433, + "loss": 1.4782, "step": 16400 }, { - "epoch": 3.32, - "learning_rate": 0.001658656746430726, - "loss": 1.3708, + "epoch": 6.64, + "learning_rate": 0.0024918521691856877, + "loss": 1.4783, "step": 16500 }, { - "epoch": 3.34, - "learning_rate": 0.0016687110396139153, - "loss": 1.3693, + "epoch": 6.68, + "learning_rate": 0.0024914471270134036, + "loss": 1.4739, "step": 16600 }, { - "epoch": 3.36, - "learning_rate": 0.0016787653327971046, - "loss": 1.3685, + "epoch": 6.72, + "learning_rate": 0.0024910322944416666, + "loss": 1.4727, "step": 16700 }, { - "epoch": 3.38, - "learning_rate": 0.0016888196259802935, - "loss": 1.3692, + "epoch": 6.76, + "learning_rate": 0.0024906076747419625, + "loss": 1.4728, "step": 16800 }, { - "epoch": 3.4, - "learning_rate": 0.0016988739191634828, - "loss": 1.3682, + "epoch": 6.8, + "learning_rate": 0.002490173271262961, + "loss": 1.4753, "step": 16900 }, { - "epoch": 3.42, - "learning_rate": 0.0017089282123466721, - "loss": 1.3657, + "epoch": 6.84, + "learning_rate": 0.0024897290874304895, + "loss": 1.4742, "step": 17000 }, { - "epoch": 3.44, - "learning_rate": 0.0017189825055298614, - "loss": 1.3685, + "epoch": 6.88, + "learning_rate": 0.0024892751267475066, + "loss": 1.4719, "step": 17100 }, { - "epoch": 3.46, - "learning_rate": 0.0017290367987130505, - "loss": 1.3683, + "epoch": 6.92, + "learning_rate": 0.002488811392794072, + "loss": 1.4748, "step": 17200 }, { - "epoch": 3.48, - "learning_rate": 0.0017390910918962397, - "loss": 1.368, + "epoch": 6.96, + "learning_rate": 0.002488337889227323, + "loss": 1.4709, "step": 17300 }, { - "epoch": 3.5, - "learning_rate": 0.001749145385079429, - "loss": 1.368, + "epoch": 7.0, + "learning_rate": 0.00248785461978144, + "loss": 1.4754, "step": 17400 }, { - "epoch": 3.52, - "learning_rate": 0.0017591996782626183, - "loss": 1.3668, + "epoch": 7.0, + "eval_accuracy": 0.40480212867795956, + "eval_loss": 1.4229631423950195, + "eval_runtime": 17.9608, + "eval_samples_per_second": 4430.085, + "eval_steps_per_second": 17.315, + "step": 17407 + }, + { + "epoch": 7.04, + "learning_rate": 0.0024873615882676217, + "loss": 1.4627, "step": 17500 }, { - "epoch": 3.54, - "learning_rate": 0.0017692539714458076, - "loss": 1.363, + "epoch": 7.08, + "learning_rate": 0.002486858798574052, + "loss": 1.4651, "step": 17600 }, { - "epoch": 3.56, - "learning_rate": 0.0017793082646289965, - "loss": 1.3651, + "epoch": 7.12, + "learning_rate": 0.002486346254665872, + "loss": 1.4677, "step": 17700 }, { - "epoch": 3.58, - "learning_rate": 0.0017893625578121858, - "loss": 1.363, + "epoch": 7.16, + "learning_rate": 0.002485823960585146, + "loss": 1.4716, "step": 17800 }, { - "epoch": 3.6, - "learning_rate": 0.0017994168509953752, - "loss": 1.3652, + "epoch": 7.2, + "learning_rate": 0.0024852919204508307, + "loss": 1.4668, "step": 17900 }, { - "epoch": 3.62, - "learning_rate": 0.0018094711441785645, - "loss": 1.3666, + "epoch": 7.24, + "learning_rate": 0.0024847501384587444, + "loss": 1.4655, "step": 18000 }, { - "epoch": 3.64, - "learning_rate": 0.0018194248944299217, - "loss": 1.3653, + "epoch": 7.28, + "learning_rate": 0.0024841986188815315, + "loss": 1.4662, "step": 18100 }, { - "epoch": 3.66, - "learning_rate": 0.001829479187613111, - "loss": 1.3685, + "epoch": 7.32, + "learning_rate": 0.0024836373660686284, + "loss": 1.4645, "step": 18200 }, { - "epoch": 3.68, - "learning_rate": 0.0018395334807963, - "loss": 1.3641, + "epoch": 7.36, + "learning_rate": 0.0024830663844462334, + "loss": 1.4642, "step": 18300 }, { - "epoch": 3.7, - "learning_rate": 0.0018495877739794892, - "loss": 1.3642, + "epoch": 7.4, + "learning_rate": 0.0024824856785172667, + "loss": 1.4639, "step": 18400 }, { - "epoch": 3.72, - "learning_rate": 0.0018596420671626785, - "loss": 1.3668, + "epoch": 7.44, + "learning_rate": 0.0024818952528613374, + "loss": 1.4637, "step": 18500 }, { - "epoch": 3.74, - "learning_rate": 0.0018696963603458678, - "loss": 1.3662, + "epoch": 7.48, + "learning_rate": 0.0024812951121347083, + "loss": 1.4662, "step": 18600 }, { - "epoch": 3.76, - "learning_rate": 0.001879650110597225, - "loss": 1.3659, + "epoch": 7.52, + "learning_rate": 0.0024806852610702564, + "loss": 1.4635, "step": 18700 }, { - "epoch": 3.78, - "learning_rate": 0.0018897044037804143, - "loss": 1.368, + "epoch": 7.56, + "learning_rate": 0.0024800657044774382, + "loss": 1.4648, "step": 18800 }, { - "epoch": 3.8, - "learning_rate": 0.0018997586969636035, - "loss": 1.3631, + "epoch": 7.6, + "learning_rate": 0.0024794364472422504, + "loss": 1.4616, "step": 18900 }, { - "epoch": 3.82, - "learning_rate": 0.0019098129901467928, - "loss": 1.3629, + "epoch": 7.64, + "learning_rate": 0.002478797494327191, + "loss": 1.4658, "step": 19000 }, { - "epoch": 3.84, - "learning_rate": 0.0019198672833299819, - "loss": 1.3609, + "epoch": 7.68, + "learning_rate": 0.0024781488507712225, + "loss": 1.4603, "step": 19100 }, { - "epoch": 3.86, - "learning_rate": 0.0019299215765131712, - "loss": 1.368, + "epoch": 7.72, + "learning_rate": 0.0024774905216897293, + "loss": 1.4614, "step": 19200 }, { - "epoch": 3.88, - "learning_rate": 0.0019399758696963603, - "loss": 1.3663, + "epoch": 7.76, + "learning_rate": 0.002476829240269048, + "loss": 1.4645, "step": 19300 }, { - "epoch": 3.9, - "learning_rate": 0.0019500301628795496, - "loss": 1.3684, + "epoch": 7.8, + "learning_rate": 0.002476158476264398, + "loss": 1.4623, "step": 19400 }, { - "epoch": 3.92, - "learning_rate": 0.0019600844560627387, - "loss": 1.3653, + "epoch": 7.84, + "learning_rate": 0.0024754713154038154, + "loss": 1.4618, "step": 19500 }, { - "epoch": 3.94, - "learning_rate": 0.001970138749245928, - "loss": 1.3657, + "epoch": 7.88, + "learning_rate": 0.0024747744901335014, + "loss": 1.4615, "step": 19600 }, { - "epoch": 3.96, - "learning_rate": 0.0019801930424291174, - "loss": 1.3619, + "epoch": 7.92, + "learning_rate": 0.0024740680059488146, + "loss": 1.4611, "step": 19700 }, { - "epoch": 3.98, - "learning_rate": 0.0019902473356123067, - "loss": 1.3648, + "epoch": 7.96, + "learning_rate": 0.002473351868421287, + "loss": 1.4627, "step": 19800 }, { - "epoch": 4.0, - "eval_accuracy": 0.39993003357370305, - "eval_loss": 1.3229814767837524, - "eval_runtime": 19.7901, - "eval_samples_per_second": 4020.592, - "eval_steps_per_second": 15.715, - "step": 19893 + "epoch": 8.0, + "eval_accuracy": 0.40544716114907825, + "eval_loss": 1.414832353591919, + "eval_runtime": 18.0495, + "eval_samples_per_second": 4408.317, + "eval_steps_per_second": 17.23, + "step": 19894 }, { - "epoch": 4.0, - "learning_rate": 0.0020003016287954956, - "loss": 1.3611, + "epoch": 8.0, + "learning_rate": 0.00247262608319858, + "loss": 1.463, "step": 19900 }, { - "epoch": 4.02, - "learning_rate": 0.002010355921978685, - "loss": 1.3592, + "epoch": 8.04, + "learning_rate": 0.0024718906560044383, + "loss": 1.454, "step": 20000 }, { - "epoch": 4.04, - "learning_rate": 0.0020204102151618743, - "loss": 1.3543, + "epoch": 8.08, + "learning_rate": 0.0024711455926386466, + "loss": 1.4563, "step": 20100 }, { - "epoch": 4.06, - "learning_rate": 0.0020304645083450636, - "loss": 1.3601, + "epoch": 8.12, + "learning_rate": 0.0024703908989769843, + "loss": 1.4566, "step": 20200 }, { - "epoch": 4.08, - "learning_rate": 0.0020405188015282525, - "loss": 1.3539, + "epoch": 8.16, + "learning_rate": 0.0024696265809711748, + "loss": 1.4524, "step": 20300 }, { - "epoch": 4.1, - "learning_rate": 0.002050573094711442, - "loss": 1.3579, + "epoch": 8.2, + "learning_rate": 0.0024688526446488453, + "loss": 1.4551, "step": 20400 }, { - "epoch": 4.12, - "learning_rate": 0.002060627387894631, - "loss": 1.3596, + "epoch": 8.24, + "learning_rate": 0.0024680690961134738, + "loss": 1.4555, "step": 20500 }, { - "epoch": 4.14, - "learning_rate": 0.0020706816810778204, - "loss": 1.3603, + "epoch": 8.28, + "learning_rate": 0.0024672759415443435, + "loss": 1.4587, "step": 20600 }, { - "epoch": 4.16, - "learning_rate": 0.0020807359742610093, - "loss": 1.3594, + "epoch": 8.32, + "learning_rate": 0.002466473187196493, + "loss": 1.4553, "step": 20700 }, { - "epoch": 4.18, - "learning_rate": 0.0020907902674441987, - "loss": 1.3609, + "epoch": 8.36, + "learning_rate": 0.0024656608394006676, + "loss": 1.4581, "step": 20800 }, { - "epoch": 4.2, - "learning_rate": 0.002100844560627388, - "loss": 1.3648, + "epoch": 8.4, + "learning_rate": 0.00246483890456327, + "loss": 1.4556, "step": 20900 }, { - "epoch": 4.22, - "learning_rate": 0.0021108988538105773, - "loss": 1.3616, + "epoch": 8.44, + "learning_rate": 0.002464007389166307, + "loss": 1.4559, "step": 21000 }, { - "epoch": 4.24, - "learning_rate": 0.0021209531469937666, - "loss": 1.3631, + "epoch": 8.48, + "learning_rate": 0.0024631662997673435, + "loss": 1.455, "step": 21100 }, { - "epoch": 4.26, - "learning_rate": 0.0021310074401769555, - "loss": 1.3639, + "epoch": 8.53, + "learning_rate": 0.0024623156429994446, + "loss": 1.4568, "step": 21200 }, { - "epoch": 4.28, - "learning_rate": 0.002140961190428313, - "loss": 1.3646, + "epoch": 8.57, + "learning_rate": 0.002461455425571128, + "loss": 1.4531, "step": 21300 }, { - "epoch": 4.3, - "learning_rate": 0.0021510154836115025, - "loss": 1.3593, + "epoch": 8.61, + "learning_rate": 0.0024605856542663095, + "loss": 1.4568, "step": 21400 }, { - "epoch": 4.32, - "learning_rate": 0.0021610697767946913, - "loss": 1.3604, + "epoch": 8.65, + "learning_rate": 0.002459715176362497, + "loss": 1.455, "step": 21500 }, { - "epoch": 4.34, - "learning_rate": 0.0021711240699778807, - "loss": 1.362, + "epoch": 8.69, + "learning_rate": 0.0024588264133239973, + "loss": 1.4526, "step": 21600 }, { - "epoch": 4.36, - "learning_rate": 0.00218117836316107, - "loss": 1.3583, + "epoch": 8.73, + "learning_rate": 0.002457928117142122, + "loss": 1.4506, "step": 21700 }, { - "epoch": 4.38, - "learning_rate": 0.0021912326563442593, - "loss": 1.364, + "epoch": 8.77, + "learning_rate": 0.0024570202949010856, + "loss": 1.4506, "step": 21800 }, { - "epoch": 4.4, - "learning_rate": 0.002201286949527448, - "loss": 1.3625, + "epoch": 8.81, + "learning_rate": 0.0024561029537602283, + "loss": 1.4561, "step": 21900 }, { - "epoch": 4.42, - "learning_rate": 0.0022113412427106375, - "loss": 1.367, + "epoch": 8.85, + "learning_rate": 0.0024551761009539595, + "loss": 1.4556, "step": 22000 }, { - "epoch": 4.44, - "learning_rate": 0.002221395535893827, - "loss": 1.3646, + "epoch": 8.89, + "learning_rate": 0.0024542397437916992, + "loss": 1.4535, "step": 22100 }, { - "epoch": 4.46, - "learning_rate": 0.002231449829077016, - "loss": 1.3625, + "epoch": 8.93, + "learning_rate": 0.0024532938896578227, + "loss": 1.4519, "step": 22200 }, { - "epoch": 4.48, - "learning_rate": 0.002241504122260205, - "loss": 1.3617, + "epoch": 8.97, + "learning_rate": 0.0024523385460115997, + "loss": 1.454, "step": 22300 }, { - "epoch": 4.5, - "learning_rate": 0.0022515584154433944, - "loss": 1.3616, + "epoch": 9.0, + "eval_accuracy": 0.4060033334938244, + "eval_loss": 1.4071589708328247, + "eval_runtime": 18.1007, + "eval_samples_per_second": 4395.863, + "eval_steps_per_second": 17.182, + "step": 22380 + }, + { + "epoch": 9.01, + "learning_rate": 0.002451373720387138, + "loss": 1.4531, "step": 22400 }, { - "epoch": 4.52, - "learning_rate": 0.0022616127086265837, - "loss": 1.3624, + "epoch": 9.05, + "learning_rate": 0.0024503994203933224, + "loss": 1.4438, "step": 22500 }, { - "epoch": 4.54, - "learning_rate": 0.002271667001809773, - "loss": 1.3635, + "epoch": 9.09, + "learning_rate": 0.0024494156537137554, + "loss": 1.4447, "step": 22600 }, { - "epoch": 4.56, - "learning_rate": 0.002281721294992962, - "loss": 1.36, + "epoch": 9.13, + "learning_rate": 0.002448422428106696, + "loss": 1.4519, "step": 22700 }, { - "epoch": 4.58, - "learning_rate": 0.0022917755881761512, - "loss": 1.3599, + "epoch": 9.17, + "learning_rate": 0.002447419751404999, + "loss": 1.446, "step": 22800 }, { - "epoch": 4.6, - "learning_rate": 0.0023018298813593406, - "loss": 1.3674, + "epoch": 9.21, + "learning_rate": 0.0024464076315160546, + "loss": 1.4473, "step": 22900 }, { - "epoch": 4.62, - "learning_rate": 0.00231188417454253, - "loss": 1.3667, + "epoch": 9.25, + "learning_rate": 0.002445386076421723, + "loss": 1.4464, "step": 23000 }, { - "epoch": 4.64, - "learning_rate": 0.002321938467725719, - "loss": 1.3555, + "epoch": 9.29, + "learning_rate": 0.002444355094178273, + "loss": 1.4468, "step": 23100 }, { - "epoch": 4.66, - "learning_rate": 0.002331992760908908, - "loss": 1.3594, + "epoch": 9.33, + "learning_rate": 0.002443314692916319, + "loss": 1.4458, "step": 23200 }, { - "epoch": 4.69, - "learning_rate": 0.0023420470540920974, - "loss": 1.3629, + "epoch": 9.37, + "learning_rate": 0.002442264880840757, + "loss": 1.4466, "step": 23300 }, { - "epoch": 4.71, - "learning_rate": 0.0023521013472752868, - "loss": 1.3641, + "epoch": 9.41, + "learning_rate": 0.002441205666230699, + "loss": 1.4503, "step": 23400 }, { - "epoch": 4.73, - "learning_rate": 0.0023621556404584756, - "loss": 1.3594, + "epoch": 9.45, + "learning_rate": 0.002440137057439408, + "loss": 1.4497, "step": 23500 }, { - "epoch": 4.75, - "learning_rate": 0.002372209933641665, - "loss": 1.3594, + "epoch": 9.49, + "learning_rate": 0.002439069889271312, + "loss": 1.4424, "step": 23600 }, { - "epoch": 4.77, - "learning_rate": 0.0023821636838930226, - "loss": 1.3639, + "epoch": 9.53, + "learning_rate": 0.0024379826112038167, + "loss": 1.4455, "step": 23700 }, { - "epoch": 4.79, - "learning_rate": 0.0023922179770762115, - "loss": 1.3609, + "epoch": 9.57, + "learning_rate": 0.002436885964373002, + "loss": 1.4484, "step": 23800 }, { - "epoch": 4.81, - "learning_rate": 0.002402272270259401, - "loss": 1.3605, + "epoch": 9.61, + "learning_rate": 0.0024357799574273318, + "loss": 1.4497, "step": 23900 }, { - "epoch": 4.83, - "learning_rate": 0.00241232656344259, - "loss": 1.3608, + "epoch": 9.65, + "learning_rate": 0.002434664599089086, + "loss": 1.4468, "step": 24000 }, { - "epoch": 4.85, - "learning_rate": 0.0024223808566257794, - "loss": 1.3646, + "epoch": 9.69, + "learning_rate": 0.0024335398981542924, + "loss": 1.4449, "step": 24100 }, { - "epoch": 4.87, - "learning_rate": 0.0024324351498089683, - "loss": 1.3599, + "epoch": 9.73, + "learning_rate": 0.0024324058634926583, + "loss": 1.4449, "step": 24200 }, { - "epoch": 4.89, - "learning_rate": 0.0024424894429921577, - "loss": 1.3586, + "epoch": 9.77, + "learning_rate": 0.0024312625040474973, + "loss": 1.4472, "step": 24300 }, { - "epoch": 4.91, - "learning_rate": 0.002452543736175347, - "loss": 1.3608, + "epoch": 9.81, + "learning_rate": 0.002430109828835662, + "loss": 1.449, "step": 24400 }, { - "epoch": 4.93, - "learning_rate": 0.0024625980293585363, - "loss": 1.3615, + "epoch": 9.85, + "learning_rate": 0.0024289478469474725, + "loss": 1.4505, "step": 24500 }, { - "epoch": 4.95, - "learning_rate": 0.002472652322541725, - "loss": 1.3629, + "epoch": 9.89, + "learning_rate": 0.002427776567546643, + "loss": 1.4471, "step": 24600 }, { - "epoch": 4.97, - "learning_rate": 0.0024827066157249145, - "loss": 1.3638, + "epoch": 9.93, + "learning_rate": 0.0024265959998702098, + "loss": 1.4465, "step": 24700 }, { - "epoch": 4.99, - "learning_rate": 0.002492760908908104, - "loss": 1.3652, + "epoch": 9.97, + "learning_rate": 0.0024254061532284605, + "loss": 1.4466, "step": 24800 }, { - "epoch": 5.0, - "eval_accuracy": 0.39963747273250233, - "eval_loss": 1.3215824365615845, - "eval_runtime": 19.5099, - "eval_samples_per_second": 4078.343, - "eval_steps_per_second": 15.941, - "step": 24866 + "epoch": 10.0, + "eval_accuracy": 0.406620929073334, + "eval_loss": 1.3988122940063477, + "eval_runtime": 18.1787, + "eval_samples_per_second": 4376.995, + "eval_steps_per_second": 17.108, + "step": 24867 }, { - "epoch": 5.01, - "learning_rate": 0.002499999903431858, - "loss": 1.3594, + "epoch": 10.01, + "learning_rate": 0.002424207037004859, + "loss": 1.4474, "step": 24900 }, { - "epoch": 5.03, - "learning_rate": 0.0024999979819234353, - "loss": 1.3548, + "epoch": 10.05, + "learning_rate": 0.002422998660655971, + "loss": 1.4368, "step": 25000 }, { - "epoch": 5.05, - "learning_rate": 0.0024999935969463615, - "loss": 1.3572, + "epoch": 10.09, + "learning_rate": 0.0024217810337113905, + "loss": 1.4385, "step": 25100 }, { - "epoch": 5.07, - "learning_rate": 0.0024999867485092793, - "loss": 1.3579, + "epoch": 10.13, + "learning_rate": 0.0024205541657736644, + "loss": 1.4393, "step": 25200 }, { - "epoch": 5.09, - "learning_rate": 0.002499977436625685, - "loss": 1.3553, + "epoch": 10.17, + "learning_rate": 0.002419318066518217, + "loss": 1.4421, "step": 25300 }, { - "epoch": 5.11, - "learning_rate": 0.0024999656613139305, - "loss": 1.3558, + "epoch": 10.21, + "learning_rate": 0.002418072745693272, + "loss": 1.4386, "step": 25400 }, { - "epoch": 5.13, - "learning_rate": 0.0024999514225972227, - "loss": 1.3556, + "epoch": 10.25, + "learning_rate": 0.002416818213119779, + "loss": 1.4415, "step": 25500 }, { - "epoch": 5.15, - "learning_rate": 0.002499934720503622, - "loss": 1.356, + "epoch": 10.29, + "learning_rate": 0.002415554478691332, + "loss": 1.4401, "step": 25600 }, { - "epoch": 5.17, - "learning_rate": 0.0024999155550660458, - "loss": 1.3537, + "epoch": 10.33, + "learning_rate": 0.002414281552374095, + "loss": 1.4384, "step": 25700 }, { - "epoch": 5.19, - "learning_rate": 0.002499893926322264, - "loss": 1.3582, + "epoch": 10.37, + "learning_rate": 0.002412999444206721, + "loss": 1.441, "step": 25800 }, { - "epoch": 5.21, - "learning_rate": 0.0024998698343149024, - "loss": 1.3528, + "epoch": 10.42, + "learning_rate": 0.0024117081643002737, + "loss": 1.4446, "step": 25900 }, { - "epoch": 5.23, - "learning_rate": 0.0024998432790914404, - "loss": 1.3531, + "epoch": 10.46, + "learning_rate": 0.002410407722838148, + "loss": 1.4396, "step": 26000 }, { - "epoch": 5.25, - "learning_rate": 0.0024998145630805626, - "loss": 1.3578, + "epoch": 10.5, + "learning_rate": 0.002409098130075989, + "loss": 1.4394, "step": 26100 }, { - "epoch": 5.27, - "learning_rate": 0.002499783106217526, - "loss": 1.3548, + "epoch": 10.54, + "learning_rate": 0.0024077793963416115, + "loss": 1.4417, "step": 26200 }, { - "epoch": 5.29, - "learning_rate": 0.0024997491863093103, - "loss": 1.3516, + "epoch": 10.58, + "learning_rate": 0.002406451532034919, + "loss": 1.4395, "step": 26300 }, { - "epoch": 5.31, - "learning_rate": 0.002499712803422766, - "loss": 1.3549, + "epoch": 10.62, + "learning_rate": 0.0024051145476278214, + "loss": 1.4405, "step": 26400 }, { - "epoch": 5.33, - "learning_rate": 0.0024996739576295945, - "loss": 1.3539, + "epoch": 10.66, + "learning_rate": 0.0024037684536641515, + "loss": 1.4384, "step": 26500 }, { - "epoch": 5.35, - "learning_rate": 0.0024996326490063525, - "loss": 1.356, + "epoch": 10.7, + "learning_rate": 0.0024024132607595823, + "loss": 1.4413, "step": 26600 }, { - "epoch": 5.37, - "learning_rate": 0.0024995888776344504, - "loss": 1.3513, + "epoch": 10.74, + "learning_rate": 0.0024010489796015455, + "loss": 1.4423, "step": 26700 }, { - "epoch": 5.39, - "learning_rate": 0.002499542643600152, - "loss": 1.354, + "epoch": 10.78, + "learning_rate": 0.0023996756209491432, + "loss": 1.4449, "step": 26800 }, { - "epoch": 5.41, - "learning_rate": 0.0024994939469945737, - "loss": 1.3535, + "epoch": 10.82, + "learning_rate": 0.002398293195633067, + "loss": 1.4401, "step": 26900 }, { - "epoch": 5.43, - "learning_rate": 0.0024994427879136854, - "loss": 1.3483, + "epoch": 10.86, + "learning_rate": 0.002396901714555509, + "loss": 1.4412, "step": 27000 }, { - "epoch": 5.45, - "learning_rate": 0.002499389166458312, - "loss": 1.354, + "epoch": 10.9, + "learning_rate": 0.0023955011886900783, + "loss": 1.4428, "step": 27100 }, { - "epoch": 5.47, - "learning_rate": 0.0024993330827341276, - "loss": 1.3512, + "epoch": 10.94, + "learning_rate": 0.0023940916290817143, + "loss": 1.4398, "step": 27200 }, { - "epoch": 5.49, - "learning_rate": 0.0024992745368516618, - "loss": 1.3492, + "epoch": 10.98, + "learning_rate": 0.0023926730468465983, + "loss": 1.4389, "step": 27300 }, { - "epoch": 5.51, - "learning_rate": 0.0024992135289262953, - "loss": 1.3521, + "epoch": 11.0, + "eval_accuracy": 0.4070477054970078, + "eval_loss": 1.3937705755233765, + "eval_runtime": 18.2285, + "eval_samples_per_second": 4365.035, + "eval_steps_per_second": 17.061, + "step": 27354 + }, + { + "epoch": 11.02, + "learning_rate": 0.002391245453172067, + "loss": 1.4391, "step": 27400 }, { - "epoch": 5.53, - "learning_rate": 0.002499150059078261, - "loss": 1.3494, + "epoch": 11.06, + "learning_rate": 0.0023898232697688333, + "loss": 1.4313, "step": 27500 }, { - "epoch": 5.55, - "learning_rate": 0.0024990841274326442, - "loss": 1.3497, + "epoch": 11.1, + "learning_rate": 0.0023883922762806178, + "loss": 1.4304, "step": 27600 }, { - "epoch": 5.57, - "learning_rate": 0.0024990157341193814, - "loss": 1.3494, + "epoch": 11.14, + "learning_rate": 0.002386937895558928, + "loss": 1.4324, "step": 27700 }, { - "epoch": 5.59, - "learning_rate": 0.0024989448792732604, - "loss": 1.3487, + "epoch": 11.18, + "learning_rate": 0.0023854745486268304, + "loss": 1.4352, "step": 27800 }, { - "epoch": 5.61, - "learning_rate": 0.0024988715630339213, - "loss": 1.3468, + "epoch": 11.22, + "learning_rate": 0.002384002247024689, + "loss": 1.4361, "step": 27900 }, { - "epoch": 5.63, - "learning_rate": 0.002498795785545853, - "loss": 1.3465, + "epoch": 11.26, + "learning_rate": 0.0023825210023634864, + "loss": 1.4335, "step": 28000 }, { - "epoch": 5.65, - "learning_rate": 0.002498718341526211, - "loss": 1.3505, + "epoch": 11.3, + "learning_rate": 0.0023810308263247314, + "loss": 1.4349, "step": 28100 }, { - "epoch": 5.67, - "learning_rate": 0.00249863766660223, - "loss": 1.3512, + "epoch": 11.34, + "learning_rate": 0.00237953173066037, + "loss": 1.4324, "step": 28200 }, { - "epoch": 5.69, - "learning_rate": 0.0024985545308904788, - "loss": 1.3497, + "epoch": 11.38, + "learning_rate": 0.002378023727192691, + "loss": 1.4355, "step": 28300 }, { - "epoch": 5.71, - "learning_rate": 0.0024984689345547983, - "loss": 1.3495, + "epoch": 11.42, + "learning_rate": 0.00237650682781423, + "loss": 1.4348, "step": 28400 }, { - "epoch": 5.73, - "learning_rate": 0.002498380877763881, - "loss": 1.3484, + "epoch": 11.46, + "learning_rate": 0.002374981044487681, + "loss": 1.4363, "step": 28500 }, { - "epoch": 5.75, - "learning_rate": 0.0024982903606912666, - "loss": 1.3465, + "epoch": 11.5, + "learning_rate": 0.0023734463892457975, + "loss": 1.4323, "step": 28600 }, { - "epoch": 5.77, - "learning_rate": 0.0024981973835153442, - "loss": 1.3494, + "epoch": 11.54, + "learning_rate": 0.0023719028741913013, + "loss": 1.436, "step": 28700 }, { - "epoch": 5.79, - "learning_rate": 0.0024981019464193513, - "loss": 1.3448, + "epoch": 11.58, + "learning_rate": 0.0023703505114967835, + "loss": 1.437, "step": 28800 }, { - "epoch": 5.81, - "learning_rate": 0.002498004049591373, - "loss": 1.3447, + "epoch": 11.62, + "learning_rate": 0.0023687893134046105, + "loss": 1.4345, "step": 28900 }, { - "epoch": 5.83, - "learning_rate": 0.002497903693224343, - "loss": 1.3489, + "epoch": 11.66, + "learning_rate": 0.002367219292226828, + "loss": 1.4345, "step": 29000 }, { - "epoch": 5.85, - "learning_rate": 0.0024978008775160404, - "loss": 1.3491, + "epoch": 11.7, + "learning_rate": 0.002365640460345062, + "loss": 1.4343, "step": 29100 }, { - "epoch": 5.87, - "learning_rate": 0.002497695602669093, - "loss": 1.3511, + "epoch": 11.74, + "learning_rate": 0.0023640528302104223, + "loss": 1.4328, "step": 29200 }, { - "epoch": 5.89, - "learning_rate": 0.002497587868890974, - "loss": 1.344, + "epoch": 11.78, + "learning_rate": 0.002362456414343405, + "loss": 1.4336, "step": 29300 }, { - "epoch": 5.91, - "learning_rate": 0.002497477676394002, - "loss": 1.3442, + "epoch": 11.82, + "learning_rate": 0.0023608512253337913, + "loss": 1.4337, "step": 29400 }, { - "epoch": 5.93, - "learning_rate": 0.002497365025395343, - "loss": 1.3518, + "epoch": 11.86, + "learning_rate": 0.002359237275840552, + "loss": 1.4344, "step": 29500 }, { - "epoch": 5.95, - "learning_rate": 0.0024972499161170065, - "loss": 1.3396, + "epoch": 11.9, + "learning_rate": 0.002357614578591744, + "loss": 1.4398, "step": 29600 }, { - "epoch": 5.97, - "learning_rate": 0.0024971323487858474, - "loss": 1.3427, + "epoch": 11.94, + "learning_rate": 0.0023559831463844123, + "loss": 1.4358, "step": 29700 }, { - "epoch": 5.99, - "learning_rate": 0.002497012323633565, - "loss": 1.3434, + "epoch": 11.98, + "learning_rate": 0.002354342992084487, + "loss": 1.4345, "step": 29800 }, { - "epoch": 6.0, - "eval_accuracy": 0.4013078853657854, - "eval_loss": 1.3029085397720337, - "eval_runtime": 20.0428, - "eval_samples_per_second": 3969.911, - "eval_steps_per_second": 15.517, - "step": 29839 + "epoch": 12.0, + "eval_accuracy": 0.4074224403327263, + "eval_loss": 1.3885866403579712, + "eval_runtime": 18.0852, + "eval_samples_per_second": 4399.614, + "eval_steps_per_second": 17.196, + "step": 29841 }, { - "epoch": 6.01, - "learning_rate": 0.002496889840896702, - "loss": 1.3447, + "epoch": 12.02, + "learning_rate": 0.002352694128626685, + "loss": 1.4304, "step": 29900 }, { - "epoch": 6.03, - "learning_rate": 0.0024967649008166455, - "loss": 1.3366, + "epoch": 12.06, + "learning_rate": 0.002351036569014404, + "loss": 1.4256, "step": 30000 }, { - "epoch": 6.05, - "learning_rate": 0.0024966375036396234, - "loss": 1.3399, + "epoch": 12.1, + "learning_rate": 0.0023493703263196236, + "loss": 1.4274, "step": 30100 }, { - "epoch": 6.07, - "learning_rate": 0.0024965076496167083, - "loss": 1.3375, + "epoch": 12.14, + "learning_rate": 0.0023476954136827997, + "loss": 1.4275, "step": 30200 }, { - "epoch": 6.09, - "learning_rate": 0.00249637667426921, - "loss": 1.3349, + "epoch": 12.18, + "learning_rate": 0.002346011844312762, + "loss": 1.4272, "step": 30300 }, { - "epoch": 6.11, - "learning_rate": 0.0024962419318890756, - "loss": 1.3401, + "epoch": 12.22, + "learning_rate": 0.0023443196314866096, + "loss": 1.4314, "step": 30400 }, { - "epoch": 6.13, - "learning_rate": 0.0024961047334426316, - "loss": 1.339, + "epoch": 12.27, + "learning_rate": 0.002342618788549607, + "loss": 1.4298, "step": 30500 }, { - "epoch": 6.15, - "learning_rate": 0.0024959650792002663, - "loss": 1.3457, + "epoch": 12.31, + "learning_rate": 0.0023409264661198655, + "loss": 1.4311, "step": 30600 }, { - "epoch": 6.17, - "learning_rate": 0.0024958229694372063, - "loss": 1.34, + "epoch": 12.35, + "learning_rate": 0.0023392084892342917, + "loss": 1.4278, "step": 30700 }, { - "epoch": 6.19, - "learning_rate": 0.00249567840443352, - "loss": 1.3389, + "epoch": 12.39, + "learning_rate": 0.0023374819225457665, + "loss": 1.432, "step": 30800 }, { - "epoch": 6.21, - "learning_rate": 0.0024955313844741115, - "loss": 1.3385, + "epoch": 12.43, + "learning_rate": 0.0023357467796704763, + "loss": 1.4299, "step": 30900 }, { - "epoch": 6.23, - "learning_rate": 0.002495381909848725, - "loss": 1.3409, + "epoch": 12.47, + "learning_rate": 0.002334003074292244, + "loss": 1.43, "step": 31000 }, { - "epoch": 6.25, - "learning_rate": 0.002495229980851942, - "loss": 1.3394, + "epoch": 12.51, + "learning_rate": 0.002332250820162418, + "loss": 1.431, "step": 31100 }, { - "epoch": 6.27, - "learning_rate": 0.002495075597783181, - "loss": 1.3392, + "epoch": 12.55, + "learning_rate": 0.0023304900310997653, + "loss": 1.4272, "step": 31200 }, { - "epoch": 6.29, - "learning_rate": 0.0024949187609466963, - "loss": 1.3401, + "epoch": 12.59, + "learning_rate": 0.0023287207209903606, + "loss": 1.4301, "step": 31300 }, { - "epoch": 6.31, - "learning_rate": 0.002494759470651578, - "loss": 1.3375, + "epoch": 12.63, + "learning_rate": 0.0023269429037874783, + "loss": 1.4328, "step": 31400 }, { - "epoch": 6.33, - "learning_rate": 0.0024945977272117534, - "loss": 1.342, + "epoch": 12.67, + "learning_rate": 0.002325156593511483, + "loss": 1.4304, "step": 31500 }, { - "epoch": 6.35, - "learning_rate": 0.002494433530945982, - "loss": 1.335, + "epoch": 12.71, + "learning_rate": 0.0023233618042497167, + "loss": 1.4286, "step": 31600 }, { - "epoch": 6.37, - "learning_rate": 0.002494266882177858, - "loss": 1.3347, + "epoch": 12.75, + "learning_rate": 0.0023215585501563905, + "loss": 1.4319, "step": 31700 }, { - "epoch": 6.39, - "learning_rate": 0.0024940977812358094, - "loss": 1.3392, + "epoch": 12.79, + "learning_rate": 0.002319746845452471, + "loss": 1.4311, "step": 31800 }, { - "epoch": 6.41, - "learning_rate": 0.002493926228453097, - "loss": 1.3365, + "epoch": 12.83, + "learning_rate": 0.0023179267044255675, + "loss": 1.4293, "step": 31900 }, { - "epoch": 6.43, - "learning_rate": 0.0024937522241678133, - "loss": 1.3374, + "epoch": 12.87, + "learning_rate": 0.0023160981414298222, + "loss": 1.4296, "step": 32000 }, { - "epoch": 6.45, - "learning_rate": 0.0024935757687228814, - "loss": 1.3372, + "epoch": 12.91, + "learning_rate": 0.0023142611708857944, + "loss": 1.4288, "step": 32100 }, { - "epoch": 6.47, - "learning_rate": 0.002493396862466056, - "loss": 1.3375, + "epoch": 12.95, + "learning_rate": 0.002312415807280348, + "loss": 1.4231, "step": 32200 }, { - "epoch": 6.49, - "learning_rate": 0.0024932173314456876, - "loss": 1.3388, + "epoch": 12.99, + "learning_rate": 0.0023105620651665366, + "loss": 1.4286, "step": 32300 }, { - "epoch": 6.51, - "learning_rate": 0.002493033549126893, - "loss": 1.3394, + "epoch": 13.0, + "eval_accuracy": 0.4075583857053441, + "eval_loss": 1.3839157819747925, + "eval_runtime": 18.2394, + "eval_samples_per_second": 4362.423, + "eval_steps_per_second": 17.051, + "step": 32327 + }, + { + "epoch": 13.03, + "learning_rate": 0.002308699959163489, + "loss": 1.4222, "step": 32400 }, { - "epoch": 6.53, - "learning_rate": 0.002492847317064799, - "loss": 1.3353, + "epoch": 13.07, + "learning_rate": 0.002306829503956295, + "loss": 1.4208, "step": 32500 }, { - "epoch": 6.56, - "learning_rate": 0.002492658635626429, - "loss": 1.334, + "epoch": 13.11, + "learning_rate": 0.0023049507142958872, + "loss": 1.4234, "step": 32600 }, { - "epoch": 6.58, - "learning_rate": 0.0024924675051836314, - "loss": 1.3385, + "epoch": 13.15, + "learning_rate": 0.0023030636049989265, + "loss": 1.4199, "step": 32700 }, { - "epoch": 6.6, - "learning_rate": 0.0024922739261130818, - "loss": 1.3378, + "epoch": 13.19, + "learning_rate": 0.002301168190947686, + "loss": 1.4248, "step": 32800 }, { - "epoch": 6.62, - "learning_rate": 0.002492077898796282, - "loss": 1.3403, + "epoch": 13.23, + "learning_rate": 0.00229926448708993, + "loss": 1.4215, "step": 32900 }, { - "epoch": 6.64, - "learning_rate": 0.002491879423619558, - "loss": 1.3408, + "epoch": 13.27, + "learning_rate": 0.002297352508438801, + "loss": 1.4243, "step": 33000 }, { - "epoch": 6.66, - "learning_rate": 0.002491678500974061, - "loss": 1.3345, + "epoch": 13.31, + "learning_rate": 0.0022954322700726964, + "loss": 1.421, "step": 33100 }, { - "epoch": 6.68, - "learning_rate": 0.002491475131255764, - "loss": 1.3331, + "epoch": 13.35, + "learning_rate": 0.002293503787135154, + "loss": 1.4235, "step": 33200 }, { - "epoch": 6.7, - "learning_rate": 0.002491269314865464, - "loss": 1.3382, + "epoch": 13.39, + "learning_rate": 0.0022915670748347304, + "loss": 1.4255, "step": 33300 }, { - "epoch": 6.72, - "learning_rate": 0.00249106105220878, - "loss": 1.3372, + "epoch": 13.43, + "learning_rate": 0.00228962214844488, + "loss": 1.4246, "step": 33400 }, { - "epoch": 6.74, - "learning_rate": 0.0024908503436961503, - "loss": 1.3308, + "epoch": 13.47, + "learning_rate": 0.0022876690233038367, + "loss": 1.4283, "step": 33500 }, { - "epoch": 6.76, - "learning_rate": 0.002490637189742836, - "loss": 1.3346, + "epoch": 13.51, + "learning_rate": 0.0022857077148144924, + "loss": 1.4174, "step": 33600 }, { - "epoch": 6.78, - "learning_rate": 0.002490421590768915, - "loss": 1.3356, + "epoch": 13.55, + "learning_rate": 0.0022837382384442747, + "loss": 1.4262, "step": 33700 }, { - "epoch": 6.8, - "learning_rate": 0.0024902035471992857, - "loss": 1.3314, + "epoch": 13.59, + "learning_rate": 0.002281780426315188, + "loss": 1.4258, "step": 33800 }, { - "epoch": 6.82, - "learning_rate": 0.002489983059463664, - "loss": 1.3342, + "epoch": 13.63, + "learning_rate": 0.0022797947421331574, + "loss": 1.422, "step": 33900 }, { - "epoch": 6.84, - "learning_rate": 0.002489760127996581, - "loss": 1.3349, + "epoch": 13.67, + "learning_rate": 0.00227780093670161, + "loss": 1.4279, "step": 34000 }, { - "epoch": 6.86, - "learning_rate": 0.0024895347532373864, - "loss": 1.336, + "epoch": 13.71, + "learning_rate": 0.0022757990257442533, + "loss": 1.4264, "step": 34100 }, { - "epoch": 6.88, - "learning_rate": 0.002489306935630243, - "loss": 1.3345, + "epoch": 13.75, + "learning_rate": 0.0022737890250487188, + "loss": 1.4249, "step": 34200 }, { - "epoch": 6.9, - "learning_rate": 0.0024890766756241293, - "loss": 1.3351, + "epoch": 13.79, + "learning_rate": 0.0022717709504664323, + "loss": 1.4252, "step": 34300 }, { - "epoch": 6.92, - "learning_rate": 0.0024888463127784766, - "loss": 1.3308, + "epoch": 13.83, + "learning_rate": 0.002269744817912497, + "loss": 1.4198, "step": 34400 }, { - "epoch": 6.94, - "learning_rate": 0.0024886111937531884, - "loss": 1.3369, + "epoch": 13.87, + "learning_rate": 0.0022677106433655597, + "loss": 1.4259, "step": 34500 }, { - "epoch": 6.96, - "learning_rate": 0.0024883736337000827, - "loss": 1.3351, + "epoch": 13.91, + "learning_rate": 0.00226566844286769, + "loss": 1.4243, "step": 34600 }, { - "epoch": 6.98, - "learning_rate": 0.0024881336330873373, - "loss": 1.333, + "epoch": 13.95, + "learning_rate": 0.002263618232524254, + "loss": 1.4211, "step": 34700 }, { - "epoch": 7.0, - "learning_rate": 0.0024878911923879405, - "loss": 1.3314, + "epoch": 13.99, + "learning_rate": 0.0022615600285037824, + "loss": 1.4259, "step": 34800 }, { - "epoch": 7.0, - "eval_accuracy": 0.4019466193393693, - "eval_loss": 1.2937954664230347, - "eval_runtime": 19.8606, - "eval_samples_per_second": 4006.315, - "eval_steps_per_second": 15.659, - "step": 34812 + "epoch": 14.0, + "eval_accuracy": 0.40785895242761616, + "eval_loss": 1.3789013624191284, + "eval_runtime": 18.1893, + "eval_samples_per_second": 4374.436, + "eval_steps_per_second": 17.098, + "step": 34814 }, { - "epoch": 7.02, - "learning_rate": 0.0024876463120796894, - "loss": 1.3277, + "epoch": 14.03, + "learning_rate": 0.002259493847037849, + "loss": 1.4158, "step": 34900 }, { - "epoch": 7.04, - "learning_rate": 0.002487398992645188, - "loss": 1.3304, + "epoch": 14.07, + "learning_rate": 0.002257419704420939, + "loss": 1.4142, "step": 35000 }, { - "epoch": 7.06, - "learning_rate": 0.002487149234571848, - "loss": 1.327, + "epoch": 14.11, + "learning_rate": 0.002255337617010322, + "loss": 1.4167, "step": 35100 }, { - "epoch": 7.08, - "learning_rate": 0.002486897038351888, - "loss": 1.3317, + "epoch": 14.16, + "learning_rate": 0.0022532476012259205, + "loss": 1.4164, "step": 35200 }, { - "epoch": 7.1, - "learning_rate": 0.0024866424044823303, - "loss": 1.3264, + "epoch": 14.2, + "learning_rate": 0.0022511496735501853, + "loss": 1.4176, "step": 35300 }, { - "epoch": 7.12, - "learning_rate": 0.0024863853334650015, - "loss": 1.3287, + "epoch": 14.24, + "learning_rate": 0.0022490438505279606, + "loss": 1.4169, "step": 35400 }, { - "epoch": 7.14, - "learning_rate": 0.002486125825806532, - "loss": 1.3266, + "epoch": 14.28, + "learning_rate": 0.0022469301487663563, + "loss": 1.4199, "step": 35500 }, { - "epoch": 7.16, - "learning_rate": 0.002485863882018354, - "loss": 1.3311, + "epoch": 14.32, + "learning_rate": 0.002244808584934615, + "loss": 1.4194, "step": 35600 }, { - "epoch": 7.18, - "learning_rate": 0.0024855995026166994, - "loss": 1.3303, + "epoch": 14.36, + "learning_rate": 0.0022426791757639846, + "loss": 1.418, "step": 35700 }, { - "epoch": 7.2, - "learning_rate": 0.0024853326881226026, - "loss": 1.328, + "epoch": 14.4, + "learning_rate": 0.00224054193804758, + "loss": 1.4175, "step": 35800 }, { - "epoch": 7.22, - "learning_rate": 0.002485063439061895, - "loss": 1.3339, + "epoch": 14.44, + "learning_rate": 0.0022383968886402566, + "loss": 1.4203, "step": 35900 }, { - "epoch": 7.24, - "learning_rate": 0.0024847917559652067, - "loss": 1.3296, + "epoch": 14.48, + "learning_rate": 0.00223626561142873, + "loss": 1.422, "step": 36000 }, { - "epoch": 7.26, - "learning_rate": 0.002484517639367966, - "loss": 1.3294, + "epoch": 14.52, + "learning_rate": 0.0022341050671441437, + "loss": 1.4194, "step": 36100 }, { - "epoch": 7.28, - "learning_rate": 0.0024842410898103947, - "loss": 1.3311, + "epoch": 14.56, + "learning_rate": 0.0022319367619316037, + "loss": 1.4161, "step": 36200 }, { - "epoch": 7.3, - "learning_rate": 0.0024839621078375103, - "loss": 1.3282, + "epoch": 14.6, + "learning_rate": 0.002229760712890972, + "loss": 1.4183, "step": 36300 }, { - "epoch": 7.32, - "learning_rate": 0.002483680693999126, - "loss": 1.3248, + "epoch": 14.64, + "learning_rate": 0.00222757693718318, + "loss": 1.4212, "step": 36400 }, { - "epoch": 7.34, - "learning_rate": 0.0024833996993344934, - "loss": 1.3279, + "epoch": 14.68, + "learning_rate": 0.0022253854520300938, + "loss": 1.42, "step": 36500 }, { - "epoch": 7.36, - "learning_rate": 0.002483113447738441, - "loss": 1.3267, + "epoch": 14.72, + "learning_rate": 0.0022231862747143795, + "loss": 1.4214, "step": 36600 }, { - "epoch": 7.38, - "learning_rate": 0.0024828247659494087, - "loss": 1.3265, + "epoch": 14.76, + "learning_rate": 0.0022209794225793644, + "loss": 1.4172, "step": 36700 }, { - "epoch": 7.4, - "learning_rate": 0.0024825336545363243, - "loss": 1.3287, + "epoch": 14.8, + "learning_rate": 0.002218764913028901, + "loss": 1.4243, "step": 36800 }, { - "epoch": 7.42, - "learning_rate": 0.0024822401140729027, - "loss": 1.3274, + "epoch": 14.84, + "learning_rate": 0.002216542763527233, + "loss": 1.4162, "step": 36900 }, { - "epoch": 7.44, - "learning_rate": 0.0024819441451376482, - "loss": 1.3317, + "epoch": 14.88, + "learning_rate": 0.0022143129915988525, + "loss": 1.4195, "step": 37000 }, { - "epoch": 7.46, - "learning_rate": 0.00248164574831385, - "loss": 1.3274, + "epoch": 14.92, + "learning_rate": 0.0022120756148283644, + "loss": 1.4206, "step": 37100 }, { - "epoch": 7.48, - "learning_rate": 0.002481344924189581, - "loss": 1.3275, + "epoch": 14.96, + "learning_rate": 0.002209830650860349, + "loss": 1.4193, "step": 37200 }, { - "epoch": 7.5, - "learning_rate": 0.0024810416733576997, - "loss": 1.3314, + "epoch": 15.0, + "learning_rate": 0.0022075781173992192, + "loss": 1.4177, "step": 37300 }, { - "epoch": 7.52, - "learning_rate": 0.002480735996415845, - "loss": 1.3308, + "epoch": 15.0, + "eval_accuracy": 0.40866896027279703, + "eval_loss": 1.3724240064620972, + "eval_runtime": 18.1531, + "eval_samples_per_second": 4383.159, + "eval_steps_per_second": 17.132, + "step": 37301 + }, + { + "epoch": 15.04, + "learning_rate": 0.0022053180322090856, + "loss": 1.4072, "step": 37400 }, { - "epoch": 7.54, - "learning_rate": 0.00248042789396644, - "loss": 1.3268, + "epoch": 15.08, + "learning_rate": 0.002203050413113611, + "loss": 1.4091, "step": 37500 }, { - "epoch": 7.56, - "learning_rate": 0.002480117366616685, - "loss": 1.3238, + "epoch": 15.12, + "learning_rate": 0.0022007752779958753, + "loss": 1.4126, "step": 37600 }, { - "epoch": 7.58, - "learning_rate": 0.002479804414978561, - "loss": 1.3264, + "epoch": 15.16, + "learning_rate": 0.0021984926447982302, + "loss": 1.4133, "step": 37700 }, { - "epoch": 7.6, - "learning_rate": 0.0024794890396688256, - "loss": 1.3302, + "epoch": 15.2, + "learning_rate": 0.00219620253152216, + "loss": 1.4127, "step": 37800 }, { - "epoch": 7.62, - "learning_rate": 0.002479171241309015, - "loss": 1.3292, + "epoch": 15.24, + "learning_rate": 0.002193904956228139, + "loss": 1.4174, "step": 37900 }, { - "epoch": 7.64, - "learning_rate": 0.002478851020525438, - "loss": 1.3292, + "epoch": 15.28, + "learning_rate": 0.0021915999370354894, + "loss": 1.4141, "step": 38000 }, { - "epoch": 7.66, - "learning_rate": 0.0024785283779491787, - "loss": 1.3286, + "epoch": 15.32, + "learning_rate": 0.002189287492122236, + "loss": 1.4127, "step": 38100 }, { - "epoch": 7.68, - "learning_rate": 0.0024782033142160946, - "loss": 1.3253, + "epoch": 15.36, + "learning_rate": 0.0021869676397249685, + "loss": 1.4144, "step": 38200 }, { - "epoch": 7.7, - "learning_rate": 0.002477875829966814, - "loss": 1.3284, + "epoch": 15.4, + "learning_rate": 0.0021846403981386903, + "loss": 1.4158, "step": 38300 }, { - "epoch": 7.72, - "learning_rate": 0.0024775459258467355, - "loss": 1.3302, + "epoch": 15.44, + "learning_rate": 0.002182305785716681, + "loss": 1.4143, "step": 38400 }, { - "epoch": 7.74, - "learning_rate": 0.00247721693771243, - "loss": 1.3224, + "epoch": 15.48, + "learning_rate": 0.002179963820870347, + "loss": 1.4132, "step": 38500 }, { - "epoch": 7.76, - "learning_rate": 0.0024768822199884265, - "loss": 1.3232, + "epoch": 15.52, + "learning_rate": 0.0021776145220690785, + "loss": 1.418, "step": 38600 }, { - "epoch": 7.78, - "learning_rate": 0.00247654508435181, - "loss": 1.3267, + "epoch": 15.56, + "learning_rate": 0.0021752579078401038, + "loss": 1.4117, "step": 38700 }, { - "epoch": 7.8, - "learning_rate": 0.002476205531467, - "loss": 1.3268, + "epoch": 15.6, + "learning_rate": 0.002172893996768341, + "loss": 1.4159, "step": 38800 }, { - "epoch": 7.82, - "learning_rate": 0.0024758635620031806, - "loss": 1.319, + "epoch": 15.64, + "learning_rate": 0.002170522807496255, + "loss": 1.4136, "step": 38900 }, { - "epoch": 7.84, - "learning_rate": 0.0024755191766342964, - "loss": 1.3272, + "epoch": 15.68, + "learning_rate": 0.0021681443587237086, + "loss": 1.4158, "step": 39000 }, { - "epoch": 7.86, - "learning_rate": 0.0024751723760390552, - "loss": 1.3269, + "epoch": 15.72, + "learning_rate": 0.002165758669207814, + "loss": 1.4137, "step": 39100 }, { - "epoch": 7.88, - "learning_rate": 0.0024748231609009247, - "loss": 1.3272, + "epoch": 15.76, + "learning_rate": 0.0021633657577627857, + "loss": 1.4131, "step": 39200 }, { - "epoch": 7.9, - "learning_rate": 0.0024744715319081293, - "loss": 1.3293, + "epoch": 15.8, + "learning_rate": 0.0021609656432597935, + "loss": 1.415, "step": 39300 }, { - "epoch": 7.92, - "learning_rate": 0.002474117489753652, - "loss": 1.3235, + "epoch": 15.84, + "learning_rate": 0.002158558344626812, + "loss": 1.4097, "step": 39400 }, { - "epoch": 7.94, - "learning_rate": 0.002473761035135232, - "loss": 1.3279, + "epoch": 15.88, + "learning_rate": 0.002156143880848472, + "loss": 1.4134, "step": 39500 }, { - "epoch": 7.96, - "learning_rate": 0.0024734021687553617, - "loss": 1.3226, + "epoch": 15.92, + "learning_rate": 0.0021537465223752917, + "loss": 1.4143, "step": 39600 }, { - "epoch": 7.98, - "learning_rate": 0.002473040891321287, - "loss": 1.3244, + "epoch": 15.96, + "learning_rate": 0.002151317856661352, + "loss": 1.4132, "step": 39700 }, { - "epoch": 8.0, - "eval_accuracy": 0.4026586923819002, - "eval_loss": 1.2859399318695068, - "eval_runtime": 19.8804, - "eval_samples_per_second": 4002.332, - "eval_steps_per_second": 15.644, - "step": 39786 + "epoch": 16.0, + "eval_accuracy": 0.40897395230016736, + "eval_loss": 1.3687959909439087, + "eval_runtime": 21.4564, + "eval_samples_per_second": 3708.364, + "eval_steps_per_second": 14.495, + "step": 39788 }, { - "epoch": 8.0, - "learning_rate": 0.002472677203545006, - "loss": 1.3265, + "epoch": 16.0, + "learning_rate": 0.0021488820829025676, + "loss": 1.4126, "step": 39800 }, { - "epoch": 8.02, - "learning_rate": 0.002472311106143266, - "loss": 1.3222, + "epoch": 16.05, + "learning_rate": 0.0021464392203081324, + "loss": 1.4066, "step": 39900 }, { - "epoch": 8.04, - "learning_rate": 0.0024719425998375646, - "loss": 1.3229, + "epoch": 16.09, + "learning_rate": 0.0021439892881431437, + "loss": 1.404, "step": 40000 }, { - "epoch": 8.06, - "learning_rate": 0.002471571685354145, - "loss": 1.3203, + "epoch": 16.13, + "learning_rate": 0.00214155691038776, + "loss": 1.4058, "step": 40100 }, { - "epoch": 8.08, - "learning_rate": 0.002471198363423998, - "loss": 1.324, + "epoch": 16.17, + "learning_rate": 0.0021390929673124554, + "loss": 1.407, "step": 40200 }, { - "epoch": 8.1, - "learning_rate": 0.002470822634782858, - "loss": 1.3205, + "epoch": 16.21, + "learning_rate": 0.002136622012601205, + "loss": 1.409, "step": 40300 }, { - "epoch": 8.12, - "learning_rate": 0.0024704445001712027, - "loss": 1.3261, + "epoch": 16.25, + "learning_rate": 0.002134144065740649, + "loss": 1.4099, "step": 40400 }, { - "epoch": 8.14, - "learning_rate": 0.002470067777636028, - "loss": 1.3198, + "epoch": 16.29, + "learning_rate": 0.0021316591462725697, + "loss": 1.4068, "step": 40500 }, { - "epoch": 8.16, - "learning_rate": 0.0024696848573647666, - "loss": 1.319, + "epoch": 16.33, + "learning_rate": 0.002129167273793738, + "loss": 1.4062, "step": 40600 }, { - "epoch": 8.18, - "learning_rate": 0.0024692995333652967, - "loss": 1.3251, + "epoch": 16.37, + "learning_rate": 0.002126668467955756, + "loss": 1.4082, "step": 40700 }, { - "epoch": 8.2, - "learning_rate": 0.0024689118063970067, - "loss": 1.3224, + "epoch": 16.41, + "learning_rate": 0.002124162748464908, + "loss": 1.4089, "step": 40800 }, { - "epoch": 8.22, - "learning_rate": 0.0024685216772240203, - "loss": 1.3196, + "epoch": 16.45, + "learning_rate": 0.0021216501350819973, + "loss": 1.4106, "step": 40900 }, { - "epoch": 8.24, - "learning_rate": 0.0024681291466151956, - "loss": 1.3172, + "epoch": 16.49, + "learning_rate": 0.0021191306476221975, + "loss": 1.4105, "step": 41000 }, { - "epoch": 8.26, - "learning_rate": 0.0024677342153441232, - "loss": 1.3211, + "epoch": 16.53, + "learning_rate": 0.002116604305954891, + "loss": 1.407, "step": 41100 }, { - "epoch": 8.28, - "learning_rate": 0.002467336884189125, - "loss": 1.3166, + "epoch": 16.57, + "learning_rate": 0.0021140711300035153, + "loss": 1.4089, "step": 41200 }, { - "epoch": 8.3, - "learning_rate": 0.0024669371539332526, - "loss": 1.3225, + "epoch": 16.61, + "learning_rate": 0.0021115311397454047, + "loss": 1.4097, "step": 41300 }, { - "epoch": 8.32, - "learning_rate": 0.0024665350253642855, - "loss": 1.3187, + "epoch": 16.65, + "learning_rate": 0.0021089843552116335, + "loss": 1.4132, "step": 41400 }, { - "epoch": 8.34, - "learning_rate": 0.00246613049927473, - "loss": 1.3188, + "epoch": 16.69, + "learning_rate": 0.0021064307964868572, + "loss": 1.4076, "step": 41500 }, { - "epoch": 8.36, - "learning_rate": 0.002465723576461818, - "loss": 1.3194, + "epoch": 16.73, + "learning_rate": 0.002103870483709154, + "loss": 1.4095, "step": 41600 }, { - "epoch": 8.38, - "learning_rate": 0.0024653142577275022, - "loss": 1.322, + "epoch": 16.77, + "learning_rate": 0.0021013034370698683, + "loss": 1.4111, "step": 41700 }, { - "epoch": 8.4, - "learning_rate": 0.002464902543878461, - "loss": 1.3192, + "epoch": 16.81, + "learning_rate": 0.002098729676813446, + "loss": 1.4066, "step": 41800 }, { - "epoch": 8.43, - "learning_rate": 0.00246448843572609, - "loss": 1.3229, + "epoch": 16.85, + "learning_rate": 0.002096149223237283, + "loss": 1.4072, "step": 41900 }, { - "epoch": 8.45, - "learning_rate": 0.0024640719340865043, - "loss": 1.3235, + "epoch": 16.89, + "learning_rate": 0.002093562096691557, + "loss": 1.4083, "step": 42000 }, { - "epoch": 8.47, - "learning_rate": 0.0024636530397805366, - "loss": 1.3213, + "epoch": 16.93, + "learning_rate": 0.0020909683175790723, + "loss": 1.4105, "step": 42100 }, { - "epoch": 8.49, - "learning_rate": 0.002463231753633735, - "loss": 1.3187, + "epoch": 16.97, + "learning_rate": 0.002088367906355097, + "loss": 1.4091, "step": 42200 }, { - "epoch": 8.51, - "learning_rate": 0.0024628080764763603, - "loss": 1.3226, + "epoch": 17.0, + "eval_accuracy": 0.40902723297355015, + "eval_loss": 1.3661445379257202, + "eval_runtime": 18.1703, + "eval_samples_per_second": 4379.015, + "eval_steps_per_second": 17.116, + "step": 42274 + }, + { + "epoch": 17.01, + "learning_rate": 0.002085760883527201, + "loss": 1.4069, "step": 42300 }, { - "epoch": 8.53, - "learning_rate": 0.002462382009143387, - "loss": 1.3192, + "epoch": 17.05, + "learning_rate": 0.002083147269655097, + "loss": 1.3983, "step": 42400 }, { - "epoch": 8.55, - "learning_rate": 0.002461953552474498, - "loss": 1.3222, + "epoch": 17.09, + "learning_rate": 0.0020805270853504745, + "loss": 1.4009, "step": 42500 }, { - "epoch": 8.57, - "learning_rate": 0.0024615270275859403, - "loss": 1.3172, + "epoch": 17.13, + "learning_rate": 0.0020779003512768402, + "loss": 1.3998, "step": 42600 }, { - "epoch": 8.59, - "learning_rate": 0.002461093818655314, - "loss": 1.3191, + "epoch": 17.17, + "learning_rate": 0.0020752670881493546, + "loss": 1.4026, "step": 42700 }, { - "epoch": 8.61, - "learning_rate": 0.0024606582229275095, - "loss": 1.3203, + "epoch": 17.21, + "learning_rate": 0.002072627316734667, + "loss": 1.4042, "step": 42800 }, { - "epoch": 8.63, - "learning_rate": 0.0024602202412609907, - "loss": 1.3171, + "epoch": 17.25, + "learning_rate": 0.0020699810578507544, + "loss": 1.4057, "step": 42900 }, { - "epoch": 8.65, - "learning_rate": 0.0024597798745189217, - "loss": 1.3239, + "epoch": 17.29, + "learning_rate": 0.002067328332366754, + "loss": 1.404, "step": 43000 }, { - "epoch": 8.67, - "learning_rate": 0.0024593371235691673, - "loss": 1.3189, + "epoch": 17.33, + "learning_rate": 0.002064669161202802, + "loss": 1.4059, "step": 43100 }, { - "epoch": 8.69, - "learning_rate": 0.0024588919892842924, - "loss": 1.3168, + "epoch": 17.37, + "learning_rate": 0.0020620035653298656, + "loss": 1.4007, "step": 43200 }, { - "epoch": 8.71, - "learning_rate": 0.002458444472541557, - "loss": 1.3214, + "epoch": 17.41, + "learning_rate": 0.0020593315657695807, + "loss": 1.4018, "step": 43300 }, { - "epoch": 8.73, - "learning_rate": 0.0024579945742229177, - "loss": 1.321, + "epoch": 17.45, + "learning_rate": 0.0020566531835940825, + "loss": 1.4044, "step": 43400 }, { - "epoch": 8.75, - "learning_rate": 0.0024575422952150235, - "loss": 1.3202, + "epoch": 17.49, + "learning_rate": 0.002053995318782436, + "loss": 1.4034, "step": 43500 }, { - "epoch": 8.77, - "learning_rate": 0.002457087636409217, - "loss": 1.3188, + "epoch": 17.53, + "learning_rate": 0.0020513042980923276, + "loss": 1.4043, "step": 43600 }, { - "epoch": 8.79, - "learning_rate": 0.0024566305987015298, - "loss": 1.3166, + "epoch": 17.57, + "learning_rate": 0.0020486069580922853, + "loss": 1.4042, "step": 43700 }, { - "epoch": 8.81, - "learning_rate": 0.002456171182992681, - "loss": 1.3244, + "epoch": 17.61, + "learning_rate": 0.0020459033200542877, + "loss": 1.4043, "step": 43800 }, { - "epoch": 8.83, - "learning_rate": 0.002455709390188077, - "loss": 1.3186, + "epoch": 17.65, + "learning_rate": 0.002043193405299981, + "loss": 1.4043, "step": 43900 }, { - "epoch": 8.85, - "learning_rate": 0.00245524522119781, - "loss": 1.3191, + "epoch": 17.69, + "learning_rate": 0.002040477235200511, + "loss": 1.4028, "step": 44000 }, { - "epoch": 8.87, - "learning_rate": 0.002454778676936653, - "loss": 1.3218, + "epoch": 17.73, + "learning_rate": 0.0020377548311763553, + "loss": 1.4029, "step": 44100 }, { - "epoch": 8.89, - "learning_rate": 0.0024543097583240615, - "loss": 1.3217, + "epoch": 17.77, + "learning_rate": 0.0020350262146971543, + "loss": 1.4024, "step": 44200 }, { - "epoch": 8.91, - "learning_rate": 0.0024538384662841704, - "loss": 1.3169, + "epoch": 17.81, + "learning_rate": 0.002032291407281541, + "loss": 1.4044, "step": 44300 }, { - "epoch": 8.93, - "learning_rate": 0.0024533648017457917, - "loss": 1.3201, + "epoch": 17.85, + "learning_rate": 0.0020295504304969716, + "loss": 1.4015, "step": 44400 }, { - "epoch": 8.95, - "learning_rate": 0.002452888765642413, - "loss": 1.3189, + "epoch": 17.89, + "learning_rate": 0.002026803305959556, + "loss": 1.4073, "step": 44500 }, { - "epoch": 8.97, - "learning_rate": 0.0024524103589121955, - "loss": 1.3206, + "epoch": 17.94, + "learning_rate": 0.002024050055333887, + "loss": 1.4031, "step": 44600 }, { - "epoch": 8.99, - "learning_rate": 0.002451934401988944, - "loss": 1.3163, + "epoch": 17.98, + "learning_rate": 0.00202129070033287, + "loss": 1.4017, "step": 44700 }, { - "epoch": 9.0, - "eval_accuracy": 0.4035565789440192, - "eval_loss": 1.2766544818878174, - "eval_runtime": 19.7806, - "eval_samples_per_second": 4022.534, - "eval_steps_per_second": 15.723, - "step": 44759 + "epoch": 18.0, + "eval_accuracy": 0.4093614320145688, + "eval_loss": 1.3598405122756958, + "eval_runtime": 18.1233, + "eval_samples_per_second": 4390.377, + "eval_steps_per_second": 17.16, + "step": 44761 }, { - "epoch": 9.01, - "learning_rate": 0.0024514512805208794, - "loss": 1.3158, + "epoch": 18.02, + "learning_rate": 0.002018525262717551, + "loss": 1.4004, "step": 44800 }, { - "epoch": 9.03, - "learning_rate": 0.0024509657912589406, - "loss": 1.3109, + "epoch": 18.06, + "learning_rate": 0.002015753764296944, + "loss": 1.3946, "step": 44900 }, { - "epoch": 9.05, - "learning_rate": 0.0024504779351599195, - "loss": 1.312, + "epoch": 18.1, + "learning_rate": 0.0020129762269278624, + "loss": 1.3968, "step": 45000 }, { - "epoch": 9.07, - "learning_rate": 0.002449987713185271, - "loss": 1.3134, + "epoch": 18.14, + "learning_rate": 0.0020101926725147414, + "loss": 1.395, "step": 45100 }, { - "epoch": 9.09, - "learning_rate": 0.002449495126301115, - "loss": 1.3156, + "epoch": 18.18, + "learning_rate": 0.002007403123009471, + "loss": 1.3964, "step": 45200 }, { - "epoch": 9.11, - "learning_rate": 0.002449000175478231, - "loss": 1.3188, + "epoch": 18.22, + "learning_rate": 0.00200460760041122, + "loss": 1.3958, "step": 45300 }, { - "epoch": 9.13, - "learning_rate": 0.0024485028616920562, - "loss": 1.3159, + "epoch": 18.26, + "learning_rate": 0.0020018061267662608, + "loss": 1.3974, "step": 45400 }, { - "epoch": 9.15, - "learning_rate": 0.0024480031859226863, - "loss": 1.3154, + "epoch": 18.3, + "learning_rate": 0.0019989987241677987, + "loss": 1.3964, "step": 45500 }, { - "epoch": 9.17, - "learning_rate": 0.0024475011491548715, - "loss": 1.3151, + "epoch": 18.34, + "learning_rate": 0.0019961854147557967, + "loss": 1.3991, "step": 45600 }, { - "epoch": 9.19, - "learning_rate": 0.002446996752378015, - "loss": 1.3152, + "epoch": 18.38, + "learning_rate": 0.0019933662207167998, + "loss": 1.3967, "step": 45700 }, { - "epoch": 9.21, - "learning_rate": 0.0024464899965861704, - "loss": 1.3159, + "epoch": 18.42, + "learning_rate": 0.001990541164283761, + "loss": 1.3996, "step": 45800 }, { - "epoch": 9.23, - "learning_rate": 0.002445980882778041, - "loss": 1.3189, + "epoch": 18.46, + "learning_rate": 0.001987710267735866, + "loss": 1.4007, "step": 45900 }, { - "epoch": 9.25, - "learning_rate": 0.0024454694119569777, - "loss": 1.3132, + "epoch": 18.5, + "learning_rate": 0.0019848735533983574, + "loss": 1.3999, "step": 46000 }, { - "epoch": 9.27, - "learning_rate": 0.0024449555851309753, - "loss": 1.3136, + "epoch": 18.54, + "learning_rate": 0.001982031043642358, + "loss": 1.3994, "step": 46100 }, { - "epoch": 9.29, - "learning_rate": 0.0024444394033126733, - "loss": 1.3154, + "epoch": 18.58, + "learning_rate": 0.001979182760884695, + "loss": 1.397, "step": 46200 }, { - "epoch": 9.31, - "learning_rate": 0.0024439208675193502, - "loss": 1.3148, + "epoch": 18.62, + "learning_rate": 0.001976328727587724, + "loss": 1.3993, "step": 46300 }, { - "epoch": 9.33, - "learning_rate": 0.0024433999787729266, - "loss": 1.3119, + "epoch": 18.66, + "learning_rate": 0.00197346896625915, + "loss": 1.3997, "step": 46400 }, { - "epoch": 9.35, - "learning_rate": 0.002442876738099957, - "loss": 1.3143, + "epoch": 18.7, + "learning_rate": 0.0019706034994518506, + "loss": 1.3972, "step": 46500 }, { - "epoch": 9.37, - "learning_rate": 0.002442351146531634, - "loss": 1.316, + "epoch": 18.74, + "learning_rate": 0.0019677323497636996, + "loss": 1.3987, "step": 46600 }, { - "epoch": 9.39, - "learning_rate": 0.0024418284961464525, - "loss": 1.3157, + "epoch": 18.78, + "learning_rate": 0.0019648555398373868, + "loss": 1.3996, "step": 46700 }, { - "epoch": 9.41, - "learning_rate": 0.002441298229382551, - "loss": 1.3142, + "epoch": 18.82, + "learning_rate": 0.0019619730923602394, + "loss": 1.3993, "step": 46800 }, { - "epoch": 9.43, - "learning_rate": 0.002440765614834186, - "loss": 1.3125, + "epoch": 18.86, + "learning_rate": 0.001959085030064046, + "loss": 1.4007, "step": 46900 }, { - "epoch": 9.45, - "learning_rate": 0.0024402306535510226, - "loss": 1.3187, + "epoch": 18.9, + "learning_rate": 0.001956191375724874, + "loss": 1.4001, "step": 47000 }, { - "epoch": 9.47, - "learning_rate": 0.0024396933465873506, - "loss": 1.316, + "epoch": 18.94, + "learning_rate": 0.0019532921521628905, + "loss": 1.3984, "step": 47100 }, { - "epoch": 9.49, - "learning_rate": 0.002439153695002083, - "loss": 1.3129, + "epoch": 18.98, + "learning_rate": 0.0019503873822421841, + "loss": 1.4005, "step": 47200 }, { - "epoch": 9.51, - "learning_rate": 0.0024386116998587537, - "loss": 1.3164, + "epoch": 19.0, + "eval_accuracy": 0.4099590252150344, + "eval_loss": 1.353163480758667, + "eval_runtime": 18.0162, + "eval_samples_per_second": 4416.478, + "eval_steps_per_second": 17.262, + "step": 47248 + }, + { + "epoch": 19.02, + "learning_rate": 0.001947477088870584, + "loss": 1.3927, "step": 47300 }, { - "epoch": 9.53, - "learning_rate": 0.002438067362225514, - "loss": 1.3156, + "epoch": 19.06, + "learning_rate": 0.0019445612949994774, + "loss": 1.39, "step": 47400 }, { - "epoch": 9.55, - "learning_rate": 0.0024375206831751335, - "loss": 1.3151, + "epoch": 19.1, + "learning_rate": 0.0019416400236236303, + "loss": 1.3912, "step": 47500 }, { - "epoch": 9.57, - "learning_rate": 0.0024369716637849944, - "loss": 1.3131, + "epoch": 19.14, + "learning_rate": 0.0019387132977810076, + "loss": 1.3908, "step": 47600 }, { - "epoch": 9.59, - "learning_rate": 0.0024364203051370926, - "loss": 1.3154, + "epoch": 19.18, + "learning_rate": 0.0019357811405525877, + "loss": 1.3917, "step": 47700 }, { - "epoch": 9.61, - "learning_rate": 0.0024358666083180336, - "loss": 1.3141, + "epoch": 19.22, + "learning_rate": 0.0019328435750621822, + "loss": 1.3978, "step": 47800 }, { - "epoch": 9.63, - "learning_rate": 0.0024353105744190314, - "loss": 1.3131, + "epoch": 19.26, + "learning_rate": 0.0019299006244762559, + "loss": 1.3935, "step": 47900 }, { - "epoch": 9.65, - "learning_rate": 0.0024347522045359044, - "loss": 1.3147, + "epoch": 19.3, + "learning_rate": 0.0019269523120037401, + "loss": 1.3905, "step": 48000 }, { - "epoch": 9.67, - "learning_rate": 0.0024341914997690764, - "loss": 1.3159, + "epoch": 19.34, + "learning_rate": 0.001923998660895852, + "loss": 1.3952, "step": 48100 }, { - "epoch": 9.69, - "learning_rate": 0.002433628461223572, - "loss": 1.3135, + "epoch": 19.38, + "learning_rate": 0.001921039694445911, + "loss": 1.3941, "step": 48200 }, { - "epoch": 9.71, - "learning_rate": 0.0024330630900090155, - "loss": 1.3133, + "epoch": 19.42, + "learning_rate": 0.0019180754359891545, + "loss": 1.3909, "step": 48300 }, { - "epoch": 9.73, - "learning_rate": 0.0024324953872396277, - "loss": 1.3126, + "epoch": 19.46, + "learning_rate": 0.0019151059089025538, + "loss": 1.3919, "step": 48400 }, { - "epoch": 9.75, - "learning_rate": 0.0024319253540342253, - "loss": 1.3144, + "epoch": 19.5, + "learning_rate": 0.0019121311366046307, + "loss": 1.3894, "step": 48500 }, { - "epoch": 9.77, - "learning_rate": 0.0024313529915162163, - "loss": 1.315, + "epoch": 19.54, + "learning_rate": 0.0019091511425552727, + "loss": 1.3954, "step": 48600 }, { - "epoch": 9.79, - "learning_rate": 0.002430784059241426, - "loss": 1.315, + "epoch": 19.58, + "learning_rate": 0.001906165950255546, + "loss": 1.3925, "step": 48700 }, { - "epoch": 9.81, - "learning_rate": 0.0024302070647516908, - "loss": 1.3129, + "epoch": 19.62, + "learning_rate": 0.0019031755832475124, + "loss": 1.3963, "step": 48800 }, { - "epoch": 9.83, - "learning_rate": 0.002429627744335717, - "loss": 1.3163, + "epoch": 19.66, + "learning_rate": 0.0019001800651140435, + "loss": 1.3937, "step": 48900 }, { - "epoch": 9.85, - "learning_rate": 0.0024290460991352155, - "loss": 1.3101, + "epoch": 19.7, + "learning_rate": 0.0018971794194786334, + "loss": 1.3949, "step": 49000 }, { - "epoch": 9.87, - "learning_rate": 0.0024284621302964804, - "loss": 1.3152, + "epoch": 19.74, + "learning_rate": 0.001894173670005213, + "loss": 1.3935, "step": 49100 }, { - "epoch": 9.89, - "learning_rate": 0.0024278758389703853, - "loss": 1.3163, + "epoch": 19.78, + "learning_rate": 0.0018911628403979648, + "loss": 1.3952, "step": 49200 }, { - "epoch": 9.91, - "learning_rate": 0.002427287226312379, - "loss": 1.3089, + "epoch": 19.83, + "learning_rate": 0.0018882676866254673, + "loss": 1.4002, "step": 49300 }, { - "epoch": 9.93, - "learning_rate": 0.002426696293482488, - "loss": 1.3141, + "epoch": 19.87, + "learning_rate": 0.0018852469688702364, + "loss": 1.3918, "step": 49400 }, { - "epoch": 9.95, - "learning_rate": 0.002426103041645309, - "loss": 1.3113, + "epoch": 19.91, + "learning_rate": 0.0018822212413796408, + "loss": 1.3927, "step": 49500 }, { - "epoch": 9.97, - "learning_rate": 0.0024255074719700094, - "loss": 1.3098, + "epoch": 19.95, + "learning_rate": 0.001879190528015413, + "loss": 1.3968, "step": 49600 }, { - "epoch": 9.99, - "learning_rate": 0.0024249095856303253, - "loss": 1.3112, + "epoch": 19.99, + "learning_rate": 0.001876154852678608, + "loss": 1.3949, "step": 49700 }, { - "epoch": 10.0, - "eval_accuracy": 0.4034808535713104, - "eval_loss": 1.2766634225845337, - "eval_runtime": 19.8931, - "eval_samples_per_second": 3999.773, - "eval_steps_per_second": 15.634, - "step": 49732 + "epoch": 20.0, + "eval_accuracy": 0.41007195900114135, + "eval_loss": 1.3496698141098022, + "eval_runtime": 18.2296, + "eval_samples_per_second": 4364.763, + "eval_steps_per_second": 17.06, + "step": 49735 }, { - "epoch": 10.01, - "learning_rate": 0.0024243093838045584, - "loss": 1.3131, + "epoch": 20.03, + "learning_rate": 0.0018731142393094097, + "loss": 1.3862, "step": 49800 }, { - "epoch": 10.03, - "learning_rate": 0.002423706867675572, - "loss": 1.3102, + "epoch": 20.07, + "learning_rate": 0.0018700687118869469, + "loss": 1.3861, "step": 49900 }, { - "epoch": 10.05, - "learning_rate": 0.002423102038430793, - "loss": 1.3043, + "epoch": 20.11, + "learning_rate": 0.0018670182944291003, + "loss": 1.3836, "step": 50000 }, { - "epoch": 10.07, - "learning_rate": 0.0024224948972622054, - "loss": 1.3073, + "epoch": 20.15, + "learning_rate": 0.0018639630109923166, + "loss": 1.385, "step": 50100 }, { - "epoch": 10.09, - "learning_rate": 0.002421885445366349, - "loss": 1.3097, + "epoch": 20.19, + "learning_rate": 0.0018609028856714155, + "loss": 1.3868, "step": 50200 }, { - "epoch": 10.11, - "learning_rate": 0.002421273683944319, - "loss": 1.3065, + "epoch": 20.23, + "learning_rate": 0.0018578379425994015, + "loss": 1.387, "step": 50300 }, { - "epoch": 10.13, - "learning_rate": 0.002420659614201761, - "loss": 1.3096, + "epoch": 20.27, + "learning_rate": 0.0018547682059472737, + "loss": 1.3889, "step": 50400 }, { - "epoch": 10.15, - "learning_rate": 0.002420043237348871, - "loss": 1.3118, + "epoch": 20.31, + "learning_rate": 0.0018516936999238337, + "loss": 1.3892, "step": 50500 }, { - "epoch": 10.17, - "learning_rate": 0.0024194245546003894, - "loss": 1.3081, + "epoch": 20.35, + "learning_rate": 0.0018486144487754972, + "loss": 1.3875, "step": 50600 }, { - "epoch": 10.19, - "learning_rate": 0.002418803567175604, - "loss": 1.3087, + "epoch": 20.39, + "learning_rate": 0.0018455304767861, + "loss": 1.3896, "step": 50700 }, { - "epoch": 10.21, - "learning_rate": 0.002418186520605176, - "loss": 1.3084, + "epoch": 20.43, + "learning_rate": 0.0018424418082767084, + "loss": 1.3891, "step": 50800 }, { - "epoch": 10.23, - "learning_rate": 0.0024175609505199523, - "loss": 1.3071, + "epoch": 20.47, + "learning_rate": 0.0018393484676054264, + "loss": 1.3918, "step": 50900 }, { - "epoch": 10.25, - "learning_rate": 0.002416933079431175, - "loss": 1.3071, + "epoch": 20.51, + "learning_rate": 0.0018362504791672044, + "loss": 1.3882, "step": 51000 }, { - "epoch": 10.27, - "learning_rate": 0.0024163029085762376, - "loss": 1.3126, + "epoch": 20.55, + "learning_rate": 0.0018331478673936463, + "loss": 1.3859, "step": 51100 }, { - "epoch": 10.3, - "learning_rate": 0.0024156704391970684, - "loss": 1.3117, + "epoch": 20.59, + "learning_rate": 0.0018300406567528164, + "loss": 1.3901, "step": 51200 }, { - "epoch": 10.32, - "learning_rate": 0.0024150356725401233, - "loss": 1.3088, + "epoch": 20.63, + "learning_rate": 0.0018269288717490475, + "loss": 1.3866, "step": 51300 }, { - "epoch": 10.34, - "learning_rate": 0.002414404991844438, - "loss": 1.3101, + "epoch": 20.67, + "learning_rate": 0.001823812536922747, + "loss": 1.388, "step": 51400 }, { - "epoch": 10.36, - "learning_rate": 0.0024137656573309015, - "loss": 1.3126, + "epoch": 20.71, + "learning_rate": 0.0018206916768502036, + "loss": 1.3902, "step": 51500 }, { - "epoch": 10.38, - "learning_rate": 0.002413124029293492, - "loss": 1.3098, + "epoch": 20.75, + "learning_rate": 0.0018175663161433928, + "loss": 1.3894, "step": 51600 }, { - "epoch": 10.4, - "learning_rate": 0.0024124801089967156, - "loss": 1.3148, + "epoch": 20.79, + "learning_rate": 0.001814436479449784, + "loss": 1.3872, "step": 51700 }, { - "epoch": 10.42, - "learning_rate": 0.0024118338977095963, - "loss": 1.3099, + "epoch": 20.83, + "learning_rate": 0.0018113021914521452, + "loss": 1.3891, "step": 51800 }, { - "epoch": 10.44, - "learning_rate": 0.0024111853967056732, - "loss": 1.3077, + "epoch": 20.87, + "learning_rate": 0.0018081634768683486, + "loss": 1.3886, "step": 51900 }, { - "epoch": 10.46, - "learning_rate": 0.002410534607262998, - "loss": 1.3099, + "epoch": 20.91, + "learning_rate": 0.0018050203604511755, + "loss": 1.3868, "step": 52000 }, { - "epoch": 10.48, - "learning_rate": 0.0024098815306641316, - "loss": 1.3082, + "epoch": 20.95, + "learning_rate": 0.001801872866988122, + "loss": 1.3863, "step": 52100 }, { - "epoch": 10.5, - "learning_rate": 0.002409226168196144, - "loss": 1.3068, + "epoch": 20.99, + "learning_rate": 0.0017987210213012022, + "loss": 1.3885, "step": 52200 }, { - "epoch": 10.52, - "learning_rate": 0.0024085685211506086, - "loss": 1.3094, + "epoch": 21.0, + "eval_accuracy": 0.41060529677158064, + "eval_loss": 1.3434184789657593, + "eval_runtime": 17.9588, + "eval_samples_per_second": 4430.592, + "eval_steps_per_second": 17.317, + "step": 52221 + }, + { + "epoch": 21.03, + "learning_rate": 0.0017955648482467531, + "loss": 1.3787, "step": 52300 }, { - "epoch": 10.54, - "learning_rate": 0.002407908590823602, - "loss": 1.3076, + "epoch": 21.07, + "learning_rate": 0.001792404372715239, + "loss": 1.3773, "step": 52400 }, { - "epoch": 10.56, - "learning_rate": 0.0024072463785157006, - "loss": 1.3075, + "epoch": 21.11, + "learning_rate": 0.0017892396196310535, + "loss": 1.3854, "step": 52500 }, { - "epoch": 10.58, - "learning_rate": 0.0024065818855319773, - "loss": 1.3118, + "epoch": 21.15, + "learning_rate": 0.0017860706139523259, + "loss": 1.3818, "step": 52600 }, { - "epoch": 10.6, - "learning_rate": 0.0024059151131820013, - "loss": 1.3101, + "epoch": 21.19, + "learning_rate": 0.0017828973806707216, + "loss": 1.3794, "step": 52700 }, { - "epoch": 10.62, - "learning_rate": 0.002405246062779832, - "loss": 1.3058, + "epoch": 21.23, + "learning_rate": 0.0017797199448112463, + "loss": 1.3801, "step": 52800 }, { - "epoch": 10.64, - "learning_rate": 0.0024045747356440197, - "loss": 1.3079, + "epoch": 21.27, + "learning_rate": 0.0017765383314320483, + "loss": 1.3782, "step": 52900 }, { - "epoch": 10.66, - "learning_rate": 0.0024039011330976015, - "loss": 1.309, + "epoch": 21.31, + "learning_rate": 0.0017733525656242206, + "loss": 1.3825, "step": 53000 }, { - "epoch": 10.68, - "learning_rate": 0.0024032252564680978, - "loss": 1.3076, + "epoch": 21.35, + "learning_rate": 0.0017701626725116044, + "loss": 1.3808, "step": 53100 }, { - "epoch": 10.7, - "learning_rate": 0.0024025471070875117, - "loss": 1.3102, + "epoch": 21.39, + "learning_rate": 0.0017669686772505894, + "loss": 1.3836, "step": 53200 }, { - "epoch": 10.72, - "learning_rate": 0.002401866686292326, - "loss": 1.3088, + "epoch": 21.43, + "learning_rate": 0.0017637706050299164, + "loss": 1.3839, "step": 53300 }, { - "epoch": 10.74, - "learning_rate": 0.0024011839954234983, - "loss": 1.3089, + "epoch": 21.47, + "learning_rate": 0.001760568481070477, + "loss": 1.3851, "step": 53400 }, { - "epoch": 10.76, - "learning_rate": 0.0024004990358264607, - "loss": 1.3063, + "epoch": 21.51, + "learning_rate": 0.001757362330625117, + "loss": 1.3832, "step": 53500 }, { - "epoch": 10.78, - "learning_rate": 0.0023998118088511167, - "loss": 1.3066, + "epoch": 21.55, + "learning_rate": 0.001754152178978437, + "loss": 1.3836, "step": 53600 }, { - "epoch": 10.8, - "learning_rate": 0.0023991223158518387, - "loss": 1.3071, + "epoch": 21.59, + "learning_rate": 0.0017509380514465903, + "loss": 1.3857, "step": 53700 }, { - "epoch": 10.82, - "learning_rate": 0.0023984305581874637, - "loss": 1.3097, + "epoch": 21.63, + "learning_rate": 0.0017477199733770866, + "loss": 1.3842, "step": 53800 }, { - "epoch": 10.84, - "learning_rate": 0.002397736537221293, - "loss": 1.3097, + "epoch": 21.67, + "learning_rate": 0.0017444979701485893, + "loss": 1.3831, "step": 53900 }, { - "epoch": 10.86, - "learning_rate": 0.002397040254321088, - "loss": 1.3088, + "epoch": 21.72, + "learning_rate": 0.0017412720671707183, + "loss": 1.3824, "step": 54000 }, { - "epoch": 10.88, - "learning_rate": 0.0023963417108590675, - "loss": 1.3071, + "epoch": 21.76, + "learning_rate": 0.001738042289883847, + "loss": 1.3849, "step": 54100 }, { - "epoch": 10.9, - "learning_rate": 0.0023956409082119055, - "loss": 1.3055, + "epoch": 21.8, + "learning_rate": 0.0017348086637589029, + "loss": 1.3819, "step": 54200 }, { - "epoch": 10.92, - "learning_rate": 0.002394937847760729, - "loss": 1.3059, + "epoch": 21.84, + "learning_rate": 0.0017315712142971665, + "loss": 1.3827, "step": 54300 }, { - "epoch": 10.94, - "learning_rate": 0.002394232530891114, - "loss": 1.3016, + "epoch": 21.88, + "learning_rate": 0.00172832996703007, + "loss": 1.3791, "step": 54400 }, { - "epoch": 10.96, - "learning_rate": 0.0023935249589930835, - "loss": 1.308, + "epoch": 21.92, + "learning_rate": 0.0017250849475189966, + "loss": 1.3841, "step": 54500 }, { - "epoch": 10.98, - "learning_rate": 0.0023928151334611045, - "loss": 1.3063, + "epoch": 21.96, + "learning_rate": 0.0017218361813550788, + "loss": 1.385, "step": 54600 }, { - "epoch": 11.0, - "learning_rate": 0.002392103055694086, - "loss": 1.3058, + "epoch": 22.0, + "learning_rate": 0.001718583694158995, + "loss": 1.3819, "step": 54700 }, { - "epoch": 11.0, - "eval_accuracy": 0.4042048581326505, - "eval_loss": 1.2673618793487549, - "eval_runtime": 19.7081, - "eval_samples_per_second": 4037.327, - "eval_steps_per_second": 15.78, - "step": 54705 + "epoch": 22.0, + "eval_accuracy": 0.4112035980208619, + "eval_loss": 1.3387858867645264, + "eval_runtime": 18.0468, + "eval_samples_per_second": 4408.97, + "eval_steps_per_second": 17.233, + "step": 54708 }, { - "epoch": 11.02, - "learning_rate": 0.0023913887270953743, - "loss": 1.2989, + "epoch": 22.04, + "learning_rate": 0.0017153275115807693, + "loss": 1.3717, "step": 54800 }, { - "epoch": 11.04, - "learning_rate": 0.002390672149072754, - "loss": 1.302, + "epoch": 22.08, + "learning_rate": 0.0017120676592995692, + "loss": 1.3738, "step": 54900 }, { - "epoch": 11.06, - "learning_rate": 0.0023899605224217933, - "loss": 1.2996, + "epoch": 22.12, + "learning_rate": 0.0017088041630235018, + "loss": 1.3753, "step": 55000 }, { - "epoch": 11.08, - "learning_rate": 0.002389239472251357, - "loss": 1.2986, + "epoch": 22.16, + "learning_rate": 0.0017055370484894122, + "loss": 1.3748, "step": 55100 }, { - "epoch": 11.1, - "learning_rate": 0.0023885161768927156, - "loss": 1.3003, + "epoch": 22.2, + "learning_rate": 0.0017022663414626796, + "loss": 1.3736, "step": 55200 }, { - "epoch": 11.12, - "learning_rate": 0.002387790637771324, - "loss": 1.3006, + "epoch": 22.24, + "learning_rate": 0.0016989920677370153, + "loss": 1.3806, "step": 55300 }, { - "epoch": 11.14, - "learning_rate": 0.0023870628563170586, - "loss": 1.3063, + "epoch": 22.28, + "learning_rate": 0.0016957142531342578, + "loss": 1.3772, "step": 55400 }, { - "epoch": 11.16, - "learning_rate": 0.0023863328339642155, - "loss": 1.3082, + "epoch": 22.32, + "learning_rate": 0.0016924329235041703, + "loss": 1.3804, "step": 55500 }, { - "epoch": 11.18, - "learning_rate": 0.002385600572151506, - "loss": 1.3049, + "epoch": 22.36, + "learning_rate": 0.0016891481047242368, + "loss": 1.3765, "step": 55600 }, { - "epoch": 11.2, - "learning_rate": 0.002384866072322057, - "loss": 1.3041, + "epoch": 22.4, + "learning_rate": 0.0016858598226994572, + "loss": 1.3762, "step": 55700 }, { - "epoch": 11.22, - "learning_rate": 0.0023841293359234033, - "loss": 1.305, + "epoch": 22.44, + "learning_rate": 0.0016825681033621443, + "loss": 1.3767, "step": 55800 }, { - "epoch": 11.24, - "learning_rate": 0.00238339036440749, - "loss": 1.3048, + "epoch": 22.48, + "learning_rate": 0.0016792729726717171, + "loss": 1.375, "step": 55900 }, { - "epoch": 11.26, - "learning_rate": 0.002382649159230665, - "loss": 1.3019, + "epoch": 22.52, + "learning_rate": 0.0016759744566144986, + "loss": 1.3782, "step": 56000 }, { - "epoch": 11.28, - "learning_rate": 0.00238190572185368, - "loss": 1.3049, + "epoch": 22.56, + "learning_rate": 0.0016726725812035098, + "loss": 1.3767, "step": 56100 }, { - "epoch": 11.3, - "learning_rate": 0.002381160053741684, - "loss": 1.306, + "epoch": 22.6, + "learning_rate": 0.0016693673724782643, + "loss": 1.378, "step": 56200 }, { - "epoch": 11.32, - "learning_rate": 0.0023804121563642247, - "loss": 1.3058, + "epoch": 22.64, + "learning_rate": 0.0016660588565045626, + "loss": 1.3797, "step": 56300 }, { - "epoch": 11.34, - "learning_rate": 0.0023796620311952415, - "loss": 1.305, + "epoch": 22.68, + "learning_rate": 0.0016627470593742876, + "loss": 1.376, "step": 56400 }, { - "epoch": 11.36, - "learning_rate": 0.0023789096797130643, - "loss": 1.3037, + "epoch": 22.72, + "learning_rate": 0.0016594320072051982, + "loss": 1.3778, "step": 56500 }, { - "epoch": 11.38, - "learning_rate": 0.002378155103400411, - "loss": 1.3064, + "epoch": 22.76, + "learning_rate": 0.0016561137261407242, + "loss": 1.3776, "step": 56600 }, { - "epoch": 11.4, - "learning_rate": 0.002377398303744384, - "loss": 1.3038, + "epoch": 22.8, + "learning_rate": 0.001652792242349758, + "loss": 1.3759, "step": 56700 }, { - "epoch": 11.42, - "learning_rate": 0.002376639282236468, - "loss": 1.3057, + "epoch": 22.84, + "learning_rate": 0.0016494675820264502, + "loss": 1.3764, "step": 56800 }, { - "epoch": 11.44, - "learning_rate": 0.0023758780403725257, - "loss": 1.3044, + "epoch": 22.88, + "learning_rate": 0.001646139771390003, + "loss": 1.3788, "step": 56900 }, { - "epoch": 11.46, - "learning_rate": 0.0023751145796527956, - "loss": 1.3057, + "epoch": 22.92, + "learning_rate": 0.0016428088366844615, + "loss": 1.3767, "step": 57000 }, { - "epoch": 11.48, - "learning_rate": 0.00237434890158189, - "loss": 1.3034, + "epoch": 22.96, + "learning_rate": 0.0016394748041785093, + "loss": 1.3793, "step": 57100 }, { - "epoch": 11.5, - "learning_rate": 0.0023735810076687893, - "loss": 1.3055, + "epoch": 23.0, + "eval_accuracy": 0.4114731876074464, + "eval_loss": 1.3357292413711548, + "eval_runtime": 18.1868, + "eval_samples_per_second": 4375.034, + "eval_steps_per_second": 17.1, + "step": 57195 + }, + { + "epoch": 23.0, + "learning_rate": 0.0016361377001652606, + "loss": 1.3783, "step": 57200 }, { - "epoch": 11.52, - "learning_rate": 0.0023728108994268433, - "loss": 1.3021, + "epoch": 23.04, + "learning_rate": 0.0016327975509620501, + "loss": 1.3643, "step": 57300 }, { - "epoch": 11.54, - "learning_rate": 0.0023720385783737637, - "loss": 1.3051, + "epoch": 23.08, + "learning_rate": 0.0016294543829102304, + "loss": 1.3689, "step": 57400 }, { - "epoch": 11.56, - "learning_rate": 0.0023712640460316244, - "loss": 1.3066, + "epoch": 23.12, + "learning_rate": 0.0016261082223749596, + "loss": 1.371, "step": 57500 }, { - "epoch": 11.58, - "learning_rate": 0.0023704873039268565, - "loss": 1.306, + "epoch": 23.16, + "learning_rate": 0.0016227590957449958, + "loss": 1.3684, "step": 57600 }, { - "epoch": 11.6, - "learning_rate": 0.0023697083535902464, - "loss": 1.2988, + "epoch": 23.2, + "learning_rate": 0.0016194070294324897, + "loss": 1.3739, "step": 57700 }, { - "epoch": 11.62, - "learning_rate": 0.0023689271965569325, - "loss": 1.3044, + "epoch": 23.24, + "learning_rate": 0.0016160520498727739, + "loss": 1.3706, "step": 57800 }, { - "epoch": 11.64, - "learning_rate": 0.0023681438343664016, - "loss": 1.3054, + "epoch": 23.28, + "learning_rate": 0.001612694183524156, + "loss": 1.3708, "step": 57900 }, { - "epoch": 11.66, - "learning_rate": 0.002367358268562487, - "loss": 1.3035, + "epoch": 23.32, + "learning_rate": 0.0016093334568677096, + "loss": 1.3712, "step": 58000 }, { - "epoch": 11.68, - "learning_rate": 0.0023665705006933645, - "loss": 1.2999, + "epoch": 23.36, + "learning_rate": 0.0016059698964070654, + "loss": 1.3712, "step": 58100 }, { - "epoch": 11.7, - "learning_rate": 0.00236578053231155, - "loss": 1.3027, + "epoch": 23.4, + "learning_rate": 0.0016026035286682032, + "loss": 1.3743, "step": 58200 }, { - "epoch": 11.72, - "learning_rate": 0.0023649883649738954, - "loss": 1.3035, + "epoch": 23.44, + "learning_rate": 0.0015992343801992403, + "loss": 1.3704, "step": 58300 }, { - "epoch": 11.74, - "learning_rate": 0.0023641940002415876, - "loss": 1.3023, + "epoch": 23.48, + "learning_rate": 0.0015958624775702249, + "loss": 1.3716, "step": 58400 }, { - "epoch": 11.76, - "learning_rate": 0.002363397439680142, - "loss": 1.2999, + "epoch": 23.52, + "learning_rate": 0.0015925216070889932, + "loss": 1.3719, "step": 58500 }, { - "epoch": 11.78, - "learning_rate": 0.002362598684859403, - "loss": 1.2994, + "epoch": 23.56, + "learning_rate": 0.00158914430281443, + "loss": 1.3713, "step": 58600 }, { - "epoch": 11.8, - "learning_rate": 0.0023617977373535403, - "loss": 1.3024, + "epoch": 23.61, + "learning_rate": 0.001585764323952988, + "loss": 1.3679, "step": 58700 }, { - "epoch": 11.82, - "learning_rate": 0.0023609945987410432, - "loss": 1.302, + "epoch": 23.65, + "learning_rate": 0.0015823816971601262, + "loss": 1.3728, "step": 58800 }, { - "epoch": 11.84, - "learning_rate": 0.0023601892706047187, - "loss": 1.3044, + "epoch": 23.69, + "learning_rate": 0.0015789964491121867, + "loss": 1.3732, "step": 58900 }, { - "epoch": 11.86, - "learning_rate": 0.002359381754531691, - "loss": 1.3026, + "epoch": 23.73, + "learning_rate": 0.0015756086065061815, + "loss": 1.3729, "step": 59000 }, { - "epoch": 11.88, - "learning_rate": 0.0023585801599547503, - "loss": 1.3061, + "epoch": 23.77, + "learning_rate": 0.001572218196059587, + "loss": 1.3738, "step": 59100 }, { - "epoch": 11.9, - "learning_rate": 0.002357768294626511, - "loss": 1.2997, + "epoch": 23.81, + "learning_rate": 0.0015688252445101281, + "loss": 1.3732, "step": 59200 }, { - "epoch": 11.92, - "learning_rate": 0.0023569542461327744, - "loss": 1.2999, + "epoch": 23.85, + "learning_rate": 0.00156542977861557, + "loss": 1.3693, "step": 59300 }, { - "epoch": 11.94, - "learning_rate": 0.0023561380160778494, - "loss": 1.3, + "epoch": 23.89, + "learning_rate": 0.0015620318251535073, + "loss": 1.3726, "step": 59400 }, { - "epoch": 11.96, - "learning_rate": 0.0023553196060703448, - "loss": 1.3009, + "epoch": 23.93, + "learning_rate": 0.001558631410921152, + "loss": 1.3702, "step": 59500 }, { - "epoch": 11.98, - "learning_rate": 0.0023544990177231644, - "loss": 1.3043, + "epoch": 23.97, + "learning_rate": 0.0015552626031769532, + "loss": 1.3728, "step": 59600 }, { - "epoch": 12.0, - "eval_accuracy": 0.40439273978216544, - "eval_loss": 1.2642629146575928, - "eval_runtime": 19.7374, - "eval_samples_per_second": 4031.338, - "eval_steps_per_second": 15.757, - "step": 59679 + "epoch": 24.0, + "eval_accuracy": 0.41196191830249546, + "eval_loss": 1.3301055431365967, + "eval_runtime": 17.9639, + "eval_samples_per_second": 4429.334, + "eval_steps_per_second": 17.313, + "step": 59682 }, { - "epoch": 12.0, - "learning_rate": 0.0023536762526535065, - "loss": 1.3005, + "epoch": 24.01, + "learning_rate": 0.0015518573718113404, + "loss": 1.3665, "step": 59700 }, { - "epoch": 12.02, - "learning_rate": 0.002352851312482858, - "loss": 1.2972, + "epoch": 24.05, + "learning_rate": 0.001548449759914021, + "loss": 1.3565, "step": 59800 }, { - "epoch": 12.04, - "learning_rate": 0.0023520241988369927, - "loss": 1.2979, + "epoch": 24.09, + "learning_rate": 0.0015450397943583764, + "loss": 1.362, "step": 59900 }, { - "epoch": 12.06, - "learning_rate": 0.0023511949133459688, - "loss": 1.2933, + "epoch": 24.13, + "learning_rate": 0.0015416275020363487, + "loss": 1.3646, "step": 60000 }, { - "epoch": 12.08, - "learning_rate": 0.0023503634576441234, - "loss": 1.2949, + "epoch": 24.17, + "learning_rate": 0.0015382129098582302, + "loss": 1.36, "step": 60100 }, { - "epoch": 12.1, - "learning_rate": 0.0023495298333700712, - "loss": 1.3001, + "epoch": 24.21, + "learning_rate": 0.0015347960447524506, + "loss": 1.364, "step": 60200 }, { - "epoch": 12.12, - "learning_rate": 0.0023486940421667012, - "loss": 1.2958, + "epoch": 24.25, + "learning_rate": 0.0015313769336653643, + "loss": 1.3669, "step": 60300 }, { - "epoch": 12.14, - "learning_rate": 0.0023478560856811715, - "loss": 1.2996, + "epoch": 24.29, + "learning_rate": 0.0015279556035610376, + "loss": 1.3645, "step": 60400 }, { - "epoch": 12.17, - "learning_rate": 0.0023470159655649093, - "loss": 1.2979, + "epoch": 24.33, + "learning_rate": 0.0015245320814210378, + "loss": 1.3645, "step": 60500 }, { - "epoch": 12.19, - "learning_rate": 0.0023461736834736045, - "loss": 1.2997, + "epoch": 24.37, + "learning_rate": 0.0015211063942442184, + "loss": 1.3657, "step": 60600 }, { - "epoch": 12.21, - "learning_rate": 0.0023453292410672085, - "loss": 1.3016, + "epoch": 24.41, + "learning_rate": 0.0015176785690465067, + "loss": 1.3671, "step": 60700 }, { - "epoch": 12.23, - "learning_rate": 0.002344482640009929, - "loss": 1.299, + "epoch": 24.45, + "learning_rate": 0.001514248632860692, + "loss": 1.3664, "step": 60800 }, { - "epoch": 12.25, - "learning_rate": 0.00234363388197023, - "loss": 1.3009, + "epoch": 24.49, + "learning_rate": 0.0015108166127362102, + "loss": 1.3678, "step": 60900 }, { - "epoch": 12.27, - "learning_rate": 0.0023427829686208245, - "loss": 1.3003, + "epoch": 24.53, + "learning_rate": 0.0015073825357389335, + "loss": 1.3682, "step": 61000 }, { - "epoch": 12.29, - "learning_rate": 0.002341938442963461, - "loss": 1.303, + "epoch": 24.57, + "learning_rate": 0.001503946428950953, + "loss": 1.3646, "step": 61100 }, { - "epoch": 12.31, - "learning_rate": 0.0023410832455409507, - "loss": 1.2995, + "epoch": 24.61, + "learning_rate": 0.0015005083194703692, + "loss": 1.3662, "step": 61200 }, { - "epoch": 12.33, - "learning_rate": 0.002340225897835473, - "loss": 1.2984, + "epoch": 24.65, + "learning_rate": 0.0014970682344110752, + "loss": 1.3672, "step": 61300 }, { - "epoch": 12.35, - "learning_rate": 0.0023393664015366696, - "loss": 1.3007, + "epoch": 24.69, + "learning_rate": 0.0014936262009025445, + "loss": 1.3652, "step": 61400 }, { - "epoch": 12.37, - "learning_rate": 0.0023385047583384167, - "loss": 1.3039, + "epoch": 24.73, + "learning_rate": 0.0014901822460896164, + "loss": 1.366, "step": 61500 }, { - "epoch": 12.39, - "learning_rate": 0.0023376582667187836, - "loss": 1.2987, + "epoch": 24.77, + "learning_rate": 0.0014867363971322828, + "loss": 1.3638, "step": 61600 }, { - "epoch": 12.41, - "learning_rate": 0.0023367923776734523, - "loss": 1.295, + "epoch": 24.81, + "learning_rate": 0.0014832886812054728, + "loss": 1.3659, "step": 61700 }, { - "epoch": 12.43, - "learning_rate": 0.0023359243468015017, - "loss": 1.3033, + "epoch": 24.85, + "learning_rate": 0.0014798391254988387, + "loss": 1.3647, "step": 61800 }, { - "epoch": 12.45, - "learning_rate": 0.002335054175813628, - "loss": 1.3007, + "epoch": 24.89, + "learning_rate": 0.0014763877572165419, + "loss": 1.3667, "step": 61900 }, { - "epoch": 12.47, - "learning_rate": 0.002334181866424745, - "loss": 1.3005, + "epoch": 24.93, + "learning_rate": 0.0014729691438615455, + "loss": 1.3653, "step": 62000 }, { - "epoch": 12.49, - "learning_rate": 0.002333307420353981, - "loss": 1.3006, + "epoch": 24.97, + "learning_rate": 0.0014695142495437725, + "loss": 1.3671, "step": 62100 }, { - "epoch": 12.51, - "learning_rate": 0.0023324308393246752, - "loss": 1.2948, + "epoch": 25.0, + "eval_accuracy": 0.4123754188108745, + "eval_loss": 1.3233122825622559, + "eval_runtime": 21.5586, + "eval_samples_per_second": 3690.779, + "eval_steps_per_second": 14.426, + "step": 62168 + }, + { + "epoch": 25.01, + "learning_rate": 0.001466057624075198, + "loss": 1.363, "step": 62200 }, { - "epoch": 12.53, - "learning_rate": 0.002331552125064374, - "loss": 1.2972, + "epoch": 25.05, + "learning_rate": 0.0014625992947157381, + "loss": 1.3519, "step": 62300 }, { - "epoch": 12.55, - "learning_rate": 0.0023306712793048287, - "loss": 1.3009, + "epoch": 25.09, + "learning_rate": 0.0014591392887387455, + "loss": 1.3502, "step": 62400 }, { - "epoch": 12.57, - "learning_rate": 0.0023297883037819906, - "loss": 1.2992, + "epoch": 25.13, + "learning_rate": 0.0014556776334307964, + "loss": 1.3581, "step": 62500 }, { - "epoch": 12.59, - "learning_rate": 0.002328903200236008, - "loss": 1.3011, + "epoch": 25.17, + "learning_rate": 0.0014522143560914728, + "loss": 1.3562, "step": 62600 }, { - "epoch": 12.61, - "learning_rate": 0.0023280159704112255, - "loss": 1.3016, + "epoch": 25.21, + "learning_rate": 0.001448749484033149, + "loss": 1.3593, "step": 62700 }, { - "epoch": 12.63, - "learning_rate": 0.0023271266160561736, - "loss": 1.2949, + "epoch": 25.25, + "learning_rate": 0.0014452830445807752, + "loss": 1.3583, "step": 62800 }, { - "epoch": 12.65, - "learning_rate": 0.0023262351389235743, - "loss": 1.2994, + "epoch": 25.29, + "learning_rate": 0.0014418150650716635, + "loss": 1.3601, "step": 62900 }, { - "epoch": 12.67, - "learning_rate": 0.00232534154077033, - "loss": 1.3009, + "epoch": 25.33, + "learning_rate": 0.0014383455728552708, + "loss": 1.3611, "step": 63000 }, { - "epoch": 12.69, - "learning_rate": 0.002324445823357525, - "loss": 1.2988, + "epoch": 25.37, + "learning_rate": 0.0014348745952929838, + "loss": 1.36, "step": 63100 }, { - "epoch": 12.71, - "learning_rate": 0.0023235479884504196, - "loss": 1.2966, + "epoch": 25.41, + "learning_rate": 0.0014314021597579025, + "loss": 1.3599, "step": 63200 }, { - "epoch": 12.73, - "learning_rate": 0.0023226480378184472, - "loss": 1.2976, + "epoch": 25.45, + "learning_rate": 0.0014279282936346253, + "loss": 1.3618, "step": 63300 }, { - "epoch": 12.75, - "learning_rate": 0.0023217459732352104, - "loss": 1.2981, + "epoch": 25.5, + "learning_rate": 0.0014244530243190323, + "loss": 1.3617, "step": 63400 }, { - "epoch": 12.77, - "learning_rate": 0.002320841796478479, - "loss": 1.2969, + "epoch": 25.54, + "learning_rate": 0.0014209763792180708, + "loss": 1.3595, "step": 63500 }, { - "epoch": 12.79, - "learning_rate": 0.0023199355093301835, - "loss": 1.2999, + "epoch": 25.58, + "learning_rate": 0.001417498385749536, + "loss": 1.361, "step": 63600 }, { - "epoch": 12.81, - "learning_rate": 0.0023190271135764165, - "loss": 1.295, + "epoch": 25.62, + "learning_rate": 0.0014140190713418583, + "loss": 1.355, "step": 63700 }, { - "epoch": 12.83, - "learning_rate": 0.0023181166110074224, - "loss": 1.2908, + "epoch": 25.66, + "learning_rate": 0.0014105384634338846, + "loss": 1.3624, "step": 63800 }, { - "epoch": 12.85, - "learning_rate": 0.002317204003417602, - "loss": 1.2966, + "epoch": 25.7, + "learning_rate": 0.0014070565894746625, + "loss": 1.3633, "step": 63900 }, { - "epoch": 12.87, - "learning_rate": 0.0023162892926055006, - "loss": 1.2965, + "epoch": 25.74, + "learning_rate": 0.001403573476923225, + "loss": 1.3595, "step": 64000 }, { - "epoch": 12.89, - "learning_rate": 0.002315372480373811, - "loss": 1.2946, + "epoch": 25.78, + "learning_rate": 0.0014000891532483723, + "loss": 1.3595, "step": 64100 }, { - "epoch": 12.91, - "learning_rate": 0.0023144535685293665, - "loss": 1.2988, + "epoch": 25.82, + "learning_rate": 0.0013966036459284556, + "loss": 1.3603, "step": 64200 }, { - "epoch": 12.93, - "learning_rate": 0.0023135325588831386, - "loss": 1.3009, + "epoch": 25.86, + "learning_rate": 0.001393116982451161, + "loss": 1.361, "step": 64300 }, { - "epoch": 12.95, - "learning_rate": 0.0023126094532502327, - "loss": 1.2955, + "epoch": 25.9, + "learning_rate": 0.001389629190313293, + "loss": 1.3588, "step": 64400 }, { - "epoch": 12.97, - "learning_rate": 0.0023116842534498857, - "loss": 1.3004, + "epoch": 25.94, + "learning_rate": 0.001386140297020556, + "loss": 1.3593, "step": 64500 }, { - "epoch": 12.99, - "learning_rate": 0.00231075696130546, - "loss": 1.2943, + "epoch": 25.98, + "learning_rate": 0.0013826503300873386, + "loss": 1.3586, "step": 64600 }, { - "epoch": 13.0, - "eval_accuracy": 0.4050755408318846, - "eval_loss": 1.25895094871521, - "eval_runtime": 19.7319, - "eval_samples_per_second": 4032.464, - "eval_steps_per_second": 15.761, - "step": 64652 + "epoch": 26.0, + "eval_accuracy": 0.41281971944273727, + "eval_loss": 1.3182361125946045, + "eval_runtime": 18.1839, + "eval_samples_per_second": 4375.747, + "eval_steps_per_second": 17.103, + "step": 64655 }, { - "epoch": 13.01, - "learning_rate": 0.0023098275786444435, - "loss": 1.2935, + "epoch": 26.02, + "learning_rate": 0.0013791593170364973, + "loss": 1.3551, "step": 64700 }, { - "epoch": 13.03, - "learning_rate": 0.002308896107298443, - "loss": 1.2893, + "epoch": 26.06, + "learning_rate": 0.0013756672853991376, + "loss": 1.3495, "step": 64800 }, { - "epoch": 13.05, - "learning_rate": 0.0023079625491031824, - "loss": 1.2912, + "epoch": 26.1, + "learning_rate": 0.0013721742627143979, + "loss": 1.3501, "step": 64900 }, { - "epoch": 13.07, - "learning_rate": 0.002307026905898497, - "loss": 1.2944, + "epoch": 26.14, + "learning_rate": 0.0013686802765292333, + "loss": 1.3509, "step": 65000 }, { - "epoch": 13.09, - "learning_rate": 0.002306089179528332, - "loss": 1.2949, + "epoch": 26.18, + "learning_rate": 0.0013651853543981964, + "loss": 1.3532, "step": 65100 }, { - "epoch": 13.11, - "learning_rate": 0.002305149371840738, - "loss": 1.2898, + "epoch": 26.22, + "learning_rate": 0.0013616895238832208, + "loss": 1.3516, "step": 65200 }, { - "epoch": 13.13, - "learning_rate": 0.002304207484687868, - "loss": 1.2934, + "epoch": 26.26, + "learning_rate": 0.0013581928125534046, + "loss": 1.3548, "step": 65300 }, { - "epoch": 13.15, - "learning_rate": 0.002303263519925972, - "loss": 1.2922, + "epoch": 26.3, + "learning_rate": 0.0013546952479847913, + "loss": 1.3533, "step": 65400 }, { - "epoch": 13.17, - "learning_rate": 0.002302317479415395, - "loss": 1.2913, + "epoch": 26.34, + "learning_rate": 0.0013511968577601544, + "loss": 1.3521, "step": 65500 }, { - "epoch": 13.19, - "learning_rate": 0.002301369365020573, - "loss": 1.2921, + "epoch": 26.38, + "learning_rate": 0.001347697669468778, + "loss": 1.3535, "step": 65600 }, { - "epoch": 13.21, - "learning_rate": 0.0023004191786100297, - "loss": 1.2935, + "epoch": 26.42, + "learning_rate": 0.0013441977107062405, + "loss": 1.3553, "step": 65700 }, { - "epoch": 13.23, - "learning_rate": 0.002299466922056371, - "loss": 1.298, + "epoch": 26.46, + "learning_rate": 0.001340697009074196, + "loss": 1.3534, "step": 65800 }, { - "epoch": 13.25, - "learning_rate": 0.0022985125972362834, - "loss": 1.2949, + "epoch": 26.5, + "learning_rate": 0.0013371955921801565, + "loss": 1.3545, "step": 65900 }, { - "epoch": 13.27, - "learning_rate": 0.0022975562060305295, - "loss": 1.2926, + "epoch": 26.54, + "learning_rate": 0.0013336934876372766, + "loss": 1.355, "step": 66000 }, { - "epoch": 13.29, - "learning_rate": 0.002296597750323944, - "loss": 1.293, + "epoch": 26.58, + "learning_rate": 0.001330190723064132, + "loss": 1.3519, "step": 66100 }, { - "epoch": 13.31, - "learning_rate": 0.0022956372320054306, - "loss": 1.2942, + "epoch": 26.62, + "learning_rate": 0.0013266873260845047, + "loss": 1.352, "step": 66200 }, { - "epoch": 13.33, - "learning_rate": 0.0022946746529679575, - "loss": 1.2944, + "epoch": 26.66, + "learning_rate": 0.0013231833243271636, + "loss": 1.3541, "step": 66300 }, { - "epoch": 13.35, - "learning_rate": 0.002293710015108555, - "loss": 1.297, + "epoch": 26.7, + "learning_rate": 0.0013196787454256467, + "loss": 1.3526, "step": 66400 }, { - "epoch": 13.37, - "learning_rate": 0.0022927433203283093, - "loss": 1.2982, + "epoch": 26.74, + "learning_rate": 0.0013161736170180446, + "loss": 1.3578, "step": 66500 }, { - "epoch": 13.39, - "learning_rate": 0.0022917745705323617, - "loss": 1.2904, + "epoch": 26.78, + "learning_rate": 0.0013126679667467805, + "loss": 1.3519, "step": 66600 }, { - "epoch": 13.41, - "learning_rate": 0.0022908037676299038, - "loss": 1.2948, + "epoch": 26.82, + "learning_rate": 0.0013093020771800632, + "loss": 1.3564, "step": 66700 }, { - "epoch": 13.43, - "learning_rate": 0.0022898309135341716, - "loss": 1.2965, + "epoch": 26.86, + "learning_rate": 0.0013057954842567222, + "loss": 1.3507, "step": 66800 }, { - "epoch": 13.45, - "learning_rate": 0.0022888560101624452, - "loss": 1.2892, + "epoch": 26.9, + "learning_rate": 0.0013022884513145783, + "loss": 1.3546, "step": 66900 }, { - "epoch": 13.47, - "learning_rate": 0.002287888839071394, - "loss": 1.2942, + "epoch": 26.94, + "learning_rate": 0.0012987810060110744, + "loss": 1.3533, "step": 67000 }, { - "epoch": 13.49, - "learning_rate": 0.0022869098633604185, - "loss": 1.2886, + "epoch": 26.98, + "learning_rate": 0.0012952731760069067, + "loss": 1.354, "step": 67100 }, { - "epoch": 13.51, - "learning_rate": 0.0022859288441301902, - "loss": 1.2901, + "epoch": 27.0, + "eval_accuracy": 0.4132404774514774, + "eval_loss": 1.3120365142822266, + "eval_runtime": 18.1888, + "eval_samples_per_second": 4374.571, + "eval_steps_per_second": 17.098, + "step": 67142 + }, + { + "epoch": 27.02, + "learning_rate": 0.0012917649889658037, + "loss": 1.3444, "step": 67200 }, { - "epoch": 13.53, - "learning_rate": 0.0022849457833140803, - "loss": 1.2941, + "epoch": 27.06, + "learning_rate": 0.0012882564725543097, + "loss": 1.3432, "step": 67300 }, { - "epoch": 13.55, - "learning_rate": 0.0022839606828494842, - "loss": 1.2947, + "epoch": 27.1, + "learning_rate": 0.0012847476544415677, + "loss": 1.3426, "step": 67400 }, { - "epoch": 13.57, - "learning_rate": 0.002282973544677816, - "loss": 1.2952, + "epoch": 27.14, + "learning_rate": 0.0012812385622990989, + "loss": 1.341, "step": 67500 }, { - "epoch": 13.59, - "learning_rate": 0.0022819843707445058, - "loss": 1.2945, + "epoch": 27.18, + "learning_rate": 0.0012777292238005855, + "loss": 1.3417, "step": 67600 }, { - "epoch": 13.61, - "learning_rate": 0.002280993162998996, - "loss": 1.2935, + "epoch": 27.22, + "learning_rate": 0.0012742196666216538, + "loss": 1.3482, "step": 67700 }, { - "epoch": 13.63, - "learning_rate": 0.002279999923394737, - "loss": 1.2948, + "epoch": 27.26, + "learning_rate": 0.0012707099184396534, + "loss": 1.3497, "step": 67800 }, { - "epoch": 13.65, - "learning_rate": 0.0022790046538891844, - "loss": 1.2895, + "epoch": 27.3, + "learning_rate": 0.0012672000069334408, + "loss": 1.348, "step": 67900 }, { - "epoch": 13.67, - "learning_rate": 0.0022780073564437927, - "loss": 1.2952, + "epoch": 27.34, + "learning_rate": 0.0012636899597831602, + "loss": 1.3479, "step": 68000 }, { - "epoch": 13.69, - "learning_rate": 0.002277008033024014, - "loss": 1.2946, + "epoch": 27.39, + "learning_rate": 0.0012601798046700254, + "loss": 1.3492, "step": 68100 }, { - "epoch": 13.71, - "learning_rate": 0.002276006685599293, - "loss": 1.298, + "epoch": 27.43, + "learning_rate": 0.0012566695692761026, + "loss": 1.3451, "step": 68200 }, { - "epoch": 13.73, - "learning_rate": 0.002275003316143064, - "loss": 1.2942, + "epoch": 27.47, + "learning_rate": 0.00125315928128409, + "loss": 1.3485, "step": 68300 }, { - "epoch": 13.75, - "learning_rate": 0.0022739979266327448, - "loss": 1.2936, + "epoch": 27.51, + "learning_rate": 0.001249648968377101, + "loss": 1.3481, "step": 68400 }, { - "epoch": 13.77, - "learning_rate": 0.002272990519049735, - "loss": 1.2937, + "epoch": 27.55, + "learning_rate": 0.0012461386582384459, + "loss": 1.3471, "step": 68500 }, { - "epoch": 13.79, - "learning_rate": 0.0022719810953794116, - "loss": 1.2991, + "epoch": 27.59, + "learning_rate": 0.0012426283785514122, + "loss": 1.347, "step": 68600 }, { - "epoch": 13.81, - "learning_rate": 0.002270969657611125, - "loss": 1.2924, + "epoch": 27.63, + "learning_rate": 0.0012391181569990483, + "loss": 1.3473, "step": 68700 }, { - "epoch": 13.83, - "learning_rate": 0.0022699562077381943, - "loss": 1.2924, + "epoch": 27.67, + "learning_rate": 0.0012356080212639429, + "loss": 1.3478, "step": 68800 }, { - "epoch": 13.85, - "learning_rate": 0.002268940747757904, - "loss": 1.2936, + "epoch": 27.71, + "learning_rate": 0.0012320979990280098, + "loss": 1.3484, "step": 68900 }, { - "epoch": 13.87, - "learning_rate": 0.002267923279671501, - "loss": 1.2963, + "epoch": 27.75, + "learning_rate": 0.0012285881179722661, + "loss": 1.3476, "step": 69000 }, { - "epoch": 13.89, - "learning_rate": 0.0022669038054841887, - "loss": 1.2875, + "epoch": 27.79, + "learning_rate": 0.001225078405776616, + "loss": 1.3486, "step": 69100 }, { - "epoch": 13.91, - "learning_rate": 0.0022658823272051244, - "loss": 1.2912, + "epoch": 27.83, + "learning_rate": 0.0012215688901196322, + "loss": 1.3485, "step": 69200 }, { - "epoch": 13.93, - "learning_rate": 0.0022648588468474158, - "loss": 1.2943, + "epoch": 27.87, + "learning_rate": 0.0012180595986783368, + "loss": 1.3457, "step": 69300 }, { - "epoch": 13.95, - "learning_rate": 0.002263833366428115, - "loss": 1.2974, + "epoch": 27.91, + "learning_rate": 0.0012145505591279848, + "loss": 1.3455, "step": 69400 }, { - "epoch": 13.97, - "learning_rate": 0.002262816172636471, - "loss": 1.2929, + "epoch": 27.95, + "learning_rate": 0.0012110417991418438, + "loss": 1.349, "step": 69500 }, { - "epoch": 13.99, - "learning_rate": 0.002261786718111027, - "loss": 1.2926, + "epoch": 27.99, + "learning_rate": 0.0012075333463909769, + "loss": 1.3472, "step": 69600 }, { - "epoch": 14.0, - "eval_accuracy": 0.40531926198942647, - "eval_loss": 1.2532228231430054, - "eval_runtime": 19.5692, - "eval_samples_per_second": 4065.991, - "eval_steps_per_second": 15.892, - "step": 69625 + "epoch": 28.0, + "eval_accuracy": 0.41376142436764163, + "eval_loss": 1.304537057876587, + "eval_runtime": 17.96, + "eval_samples_per_second": 4430.295, + "eval_steps_per_second": 17.316, + "step": 69629 }, { - "epoch": 14.01, - "learning_rate": 0.002260755269578475, - "loss": 1.2829, + "epoch": 28.03, + "learning_rate": 0.001204025228544024, + "loss": 1.338, "step": 69700 }, { - "epoch": 14.04, - "learning_rate": 0.0022597218290715715, - "loss": 1.2825, + "epoch": 28.07, + "learning_rate": 0.0012005174732669848, + "loss": 1.3342, "step": 69800 }, { - "epoch": 14.06, - "learning_rate": 0.002258686398626998, - "loss": 1.2912, + "epoch": 28.11, + "learning_rate": 0.0011970101082229981, + "loss": 1.3388, "step": 69900 }, { - "epoch": 14.08, - "learning_rate": 0.0022576489802853578, - "loss": 1.2877, + "epoch": 28.15, + "learning_rate": 0.0011935031610721273, + "loss": 1.34, "step": 70000 }, { - "epoch": 14.1, - "learning_rate": 0.002256609576091173, - "loss": 1.2876, + "epoch": 28.19, + "learning_rate": 0.0011899966594711383, + "loss": 1.3389, "step": 70100 }, { - "epoch": 14.12, - "learning_rate": 0.0022555681880928784, - "loss": 1.2921, + "epoch": 28.23, + "learning_rate": 0.0011864906310732845, + "loss": 1.338, "step": 70200 }, { - "epoch": 14.14, - "learning_rate": 0.0022545248183428184, - "loss": 1.2898, + "epoch": 28.27, + "learning_rate": 0.0011829851035280864, + "loss": 1.3411, "step": 70300 }, { - "epoch": 14.16, - "learning_rate": 0.0022534794688972436, - "loss": 1.2881, + "epoch": 28.31, + "learning_rate": 0.0011794801044811157, + "loss": 1.3373, "step": 70400 }, { - "epoch": 14.18, - "learning_rate": 0.0022524321418163056, - "loss": 1.2887, + "epoch": 28.35, + "learning_rate": 0.0011759756615737753, + "loss": 1.3411, "step": 70500 }, { - "epoch": 14.2, - "learning_rate": 0.0022513828391640535, - "loss": 1.2835, + "epoch": 28.39, + "learning_rate": 0.0011724718024430827, + "loss": 1.3412, "step": 70600 }, { - "epoch": 14.22, - "learning_rate": 0.00225033156300843, - "loss": 1.2945, + "epoch": 28.43, + "learning_rate": 0.0011689685547214518, + "loss": 1.3413, "step": 70700 }, { - "epoch": 14.24, - "learning_rate": 0.0022492783154212676, - "loss": 1.2912, + "epoch": 28.47, + "learning_rate": 0.0011654659460364745, + "loss": 1.3395, "step": 70800 }, { - "epoch": 14.26, - "learning_rate": 0.002248223098478284, - "loss": 1.2864, + "epoch": 28.51, + "learning_rate": 0.0011619640040107027, + "loss": 1.3418, "step": 70900 }, { - "epoch": 14.28, - "learning_rate": 0.0022471659142590758, - "loss": 1.2914, + "epoch": 28.55, + "learning_rate": 0.0011584627562614316, + "loss": 1.3402, "step": 71000 }, { - "epoch": 14.3, - "learning_rate": 0.00224610676484712, - "loss": 1.2877, + "epoch": 28.59, + "learning_rate": 0.0011549622304004806, + "loss": 1.3412, "step": 71100 }, { - "epoch": 14.32, - "learning_rate": 0.0022450456523297646, - "loss": 1.288, + "epoch": 28.63, + "learning_rate": 0.0011514624540339764, + "loss": 1.3396, "step": 71200 }, { - "epoch": 14.34, - "learning_rate": 0.0022439825787982275, - "loss": 1.2901, + "epoch": 28.67, + "learning_rate": 0.0011479634547621343, + "loss": 1.339, "step": 71300 }, { - "epoch": 14.36, - "learning_rate": 0.0022429175463475897, - "loss": 1.2885, + "epoch": 28.71, + "learning_rate": 0.0011444652601790425, + "loss": 1.3399, "step": 71400 }, { - "epoch": 14.38, - "learning_rate": 0.002241861236648861, - "loss": 1.2915, + "epoch": 28.75, + "learning_rate": 0.0011409678978724417, + "loss": 1.3419, "step": 71500 }, { - "epoch": 14.4, - "learning_rate": 0.00224079231219746, - "loss": 1.2862, + "epoch": 28.79, + "learning_rate": 0.0011374713954235103, + "loss": 1.3423, "step": 71600 }, { - "epoch": 14.42, - "learning_rate": 0.002239721435114266, - "loss": 1.2887, + "epoch": 28.83, + "learning_rate": 0.0011339757804066446, + "loss": 1.3402, "step": 71700 }, { - "epoch": 14.44, - "learning_rate": 0.0022386486075097406, - "loss": 1.2953, + "epoch": 28.87, + "learning_rate": 0.0011304810803892432, + "loss": 1.3436, "step": 71800 }, { - "epoch": 14.46, - "learning_rate": 0.0022375738314981885, - "loss": 1.2835, + "epoch": 28.91, + "learning_rate": 0.0011269873229314876, + "loss": 1.3402, "step": 71900 }, { - "epoch": 14.48, - "learning_rate": 0.0022364971091977555, - "loss": 1.2931, + "epoch": 28.95, + "learning_rate": 0.0011234945355861282, + "loss": 1.3406, "step": 72000 }, { - "epoch": 14.5, - "learning_rate": 0.002235418442730422, - "loss": 1.2878, + "epoch": 28.99, + "learning_rate": 0.0011200027458982628, + "loss": 1.3393, "step": 72100 }, { - "epoch": 14.52, - "learning_rate": 0.0022343378342220004, - "loss": 1.2877, + "epoch": 29.0, + "eval_accuracy": 0.4142487389650592, + "eval_loss": 1.2993851900100708, + "eval_runtime": 18.3165, + "eval_samples_per_second": 4344.051, + "eval_steps_per_second": 16.979, + "step": 72115 + }, + { + "epoch": 29.03, + "learning_rate": 0.0011165119814051218, + "loss": 1.3309, "step": 72200 }, { - "epoch": 14.54, - "learning_rate": 0.0022332552858021298, - "loss": 1.2856, + "epoch": 29.07, + "learning_rate": 0.0011130222696358512, + "loss": 1.3314, "step": 72300 }, { - "epoch": 14.56, - "learning_rate": 0.002232170799604273, - "loss": 1.2855, + "epoch": 29.11, + "learning_rate": 0.001109533638111294, + "loss": 1.3332, "step": 72400 }, { - "epoch": 14.58, - "learning_rate": 0.002231084377765712, - "loss": 1.2872, + "epoch": 29.15, + "learning_rate": 0.0011060461143437754, + "loss": 1.3335, "step": 72500 }, { - "epoch": 14.6, - "learning_rate": 0.002229996022427543, - "loss": 1.2863, + "epoch": 29.19, + "learning_rate": 0.0011025597258368834, + "loss": 1.3329, "step": 72600 }, { - "epoch": 14.62, - "learning_rate": 0.002228905735734673, - "loss": 1.2894, + "epoch": 29.23, + "learning_rate": 0.0010990745000852534, + "loss": 1.3321, "step": 72700 }, { - "epoch": 14.64, - "learning_rate": 0.002227813519835815, - "loss": 1.2877, + "epoch": 29.28, + "learning_rate": 0.001095590464574351, + "loss": 1.3342, "step": 72800 }, { - "epoch": 14.66, - "learning_rate": 0.0022267193768834843, - "loss": 1.2847, + "epoch": 29.32, + "learning_rate": 0.001092107646780255, + "loss": 1.3315, "step": 72900 }, { - "epoch": 14.68, - "learning_rate": 0.002225623309033993, - "loss": 1.2925, + "epoch": 29.36, + "learning_rate": 0.0010886260741694415, + "loss": 1.3332, "step": 73000 }, { - "epoch": 14.7, - "learning_rate": 0.0022245253184474496, - "loss": 1.2866, + "epoch": 29.4, + "learning_rate": 0.0010851457741985666, + "loss": 1.3323, "step": 73100 }, { - "epoch": 14.72, - "learning_rate": 0.002223425407287748, - "loss": 1.2889, + "epoch": 29.44, + "learning_rate": 0.0010816667743142494, + "loss": 1.3331, "step": 73200 }, { - "epoch": 14.74, - "learning_rate": 0.00222232357772257, - "loss": 1.2886, + "epoch": 29.48, + "learning_rate": 0.0010781891019528568, + "loss": 1.3314, "step": 73300 }, { - "epoch": 14.76, - "learning_rate": 0.0022212198319233765, - "loss": 1.2896, + "epoch": 29.52, + "learning_rate": 0.001074712784540286, + "loss": 1.3359, "step": 73400 }, { - "epoch": 14.78, - "learning_rate": 0.0022201141720654062, - "loss": 1.2915, + "epoch": 29.56, + "learning_rate": 0.0010712378494917494, + "loss": 1.3325, "step": 73500 }, { - "epoch": 14.8, - "learning_rate": 0.002219006600327669, - "loss": 1.287, + "epoch": 29.6, + "learning_rate": 0.0010677643242115566, + "loss": 1.3383, "step": 73600 }, { - "epoch": 14.82, - "learning_rate": 0.002217897118892943, - "loss": 1.2942, + "epoch": 29.64, + "learning_rate": 0.0010642922360929003, + "loss": 1.3327, "step": 73700 }, { - "epoch": 14.84, - "learning_rate": 0.0022167857299477704, - "loss": 1.2878, + "epoch": 29.68, + "learning_rate": 0.001060821612517639, + "loss": 1.3328, "step": 73800 }, { - "epoch": 14.86, - "learning_rate": 0.0022156724356824516, - "loss": 1.285, + "epoch": 29.72, + "learning_rate": 0.0010573524808560812, + "loss": 1.3312, "step": 73900 }, { - "epoch": 14.88, - "learning_rate": 0.0022145683996782177, - "loss": 1.2891, + "epoch": 29.76, + "learning_rate": 0.0010538848684667704, + "loss": 1.3339, "step": 74000 }, { - "epoch": 14.9, - "learning_rate": 0.0022134513203569167, - "loss": 1.29, + "epoch": 29.8, + "learning_rate": 0.0010504188026962684, + "loss": 1.3313, "step": 74100 }, { - "epoch": 14.92, - "learning_rate": 0.0022123323422868512, - "loss": 1.2884, + "epoch": 29.84, + "learning_rate": 0.0010469543108789395, + "loss": 1.3321, "step": 74200 }, { - "epoch": 14.94, - "learning_rate": 0.002211211467673279, - "loss": 1.2901, + "epoch": 29.88, + "learning_rate": 0.0010434914203367358, + "loss": 1.3363, "step": 74300 }, { - "epoch": 14.96, - "learning_rate": 0.0022100886987251943, - "loss": 1.2901, + "epoch": 29.92, + "learning_rate": 0.001040030158378981, + "loss": 1.3312, "step": 74400 }, { - "epoch": 14.98, - "learning_rate": 0.0022089640376553267, - "loss": 1.2917, + "epoch": 29.96, + "learning_rate": 0.0010365705523021557, + "loss": 1.3335, "step": 74500 }, { - "epoch": 15.0, - "eval_accuracy": 0.40564348112720083, - "eval_loss": 1.2492839097976685, - "eval_runtime": 19.7618, - "eval_samples_per_second": 4026.36, - "eval_steps_per_second": 15.737, - "step": 74598 + "epoch": 30.0, + "learning_rate": 0.0010331126293896813, + "loss": 1.3349, + "step": 74600 }, { - "epoch": 15.0, - "learning_rate": 0.0022078374866801326, - "loss": 1.2901, - "step": 74600 + "epoch": 30.0, + "eval_accuracy": 0.4148599621052274, + "eval_loss": 1.2924647331237793, + "eval_runtime": 18.1541, + "eval_samples_per_second": 4382.932, + "eval_steps_per_second": 17.131, + "step": 74602 }, { - "epoch": 15.02, - "learning_rate": 0.002206709048019794, - "loss": 1.2815, + "epoch": 30.04, + "learning_rate": 0.001029656416911706, + "loss": 1.3219, "step": 74700 }, { - "epoch": 15.04, - "learning_rate": 0.0022055787238982145, - "loss": 1.2824, + "epoch": 30.08, + "learning_rate": 0.0010262019421248881, + "loss": 1.3239, "step": 74800 }, { - "epoch": 15.06, - "learning_rate": 0.0022044465165430107, - "loss": 1.2836, + "epoch": 30.12, + "learning_rate": 0.0010227492322721827, + "loss": 1.3254, "step": 74900 }, { - "epoch": 15.08, - "learning_rate": 0.002203312428185512, - "loss": 1.2848, + "epoch": 30.16, + "learning_rate": 0.0010192983145826263, + "loss": 1.3246, "step": 75000 }, { - "epoch": 15.1, - "learning_rate": 0.002202176461060756, - "loss": 1.2856, + "epoch": 30.2, + "learning_rate": 0.0010158492162711211, + "loss": 1.3219, "step": 75100 }, { - "epoch": 15.12, - "learning_rate": 0.0022010386174074813, - "loss": 1.2858, + "epoch": 30.24, + "learning_rate": 0.001012436427825709, + "loss": 1.3249, "step": 75200 }, { - "epoch": 15.14, - "learning_rate": 0.002199898899468125, - "loss": 1.2827, + "epoch": 30.28, + "learning_rate": 0.0010089910309852388, + "loss": 1.3267, "step": 75300 }, { - "epoch": 15.16, - "learning_rate": 0.0021987573094888182, - "loss": 1.2816, + "epoch": 30.32, + "learning_rate": 0.0010055475348089424, + "loss": 1.3274, "step": 75400 }, { - "epoch": 15.18, - "learning_rate": 0.0021976138497193823, - "loss": 1.2845, + "epoch": 30.36, + "learning_rate": 0.001002105966453195, + "loss": 1.3264, "step": 75500 }, { - "epoch": 15.2, - "learning_rate": 0.0021964685224133214, - "loss": 1.2876, + "epoch": 30.4, + "learning_rate": 0.0009986663530591672, + "loss": 1.3272, "step": 75600 }, { - "epoch": 15.22, - "learning_rate": 0.0021953213298278223, - "loss": 1.2815, + "epoch": 30.44, + "learning_rate": 0.0009952287217526129, + "loss": 1.3283, "step": 75700 }, { - "epoch": 15.24, - "learning_rate": 0.0021941837739942976, - "loss": 1.2838, + "epoch": 30.48, + "learning_rate": 0.0009917930996436546, + "loss": 1.3314, "step": 75800 }, { - "epoch": 15.26, - "learning_rate": 0.002193032876232499, - "loss": 1.2826, + "epoch": 30.52, + "learning_rate": 0.0009883595138265695, + "loss": 1.325, "step": 75900 }, { - "epoch": 15.28, - "learning_rate": 0.002191880119962158, - "loss": 1.2794, + "epoch": 30.56, + "learning_rate": 0.0009849279913795768, + "loss": 1.3265, "step": 76000 }, { - "epoch": 15.3, - "learning_rate": 0.0021907255074551024, - "loss": 1.2845, + "epoch": 30.6, + "learning_rate": 0.0009814985593646216, + "loss": 1.3253, "step": 76100 }, { - "epoch": 15.32, - "learning_rate": 0.0021895690409868165, - "loss": 1.2861, + "epoch": 30.64, + "learning_rate": 0.000978071244827166, + "loss": 1.3291, "step": 76200 }, { - "epoch": 15.34, - "learning_rate": 0.0021884107228364387, - "loss": 1.2845, + "epoch": 30.68, + "learning_rate": 0.0009746460747959707, + "loss": 1.3277, "step": 76300 }, { - "epoch": 15.36, - "learning_rate": 0.002187250555286758, - "loss": 1.2845, + "epoch": 30.72, + "learning_rate": 0.0009712230762828853, + "loss": 1.3246, "step": 76400 }, { - "epoch": 15.38, - "learning_rate": 0.0021860885406242065, - "loss": 1.2879, + "epoch": 30.76, + "learning_rate": 0.0009678022762826335, + "loss": 1.3267, "step": 76500 }, { - "epoch": 15.4, - "learning_rate": 0.002184924681138858, - "loss": 1.2798, + "epoch": 30.8, + "learning_rate": 0.0009643837017726021, + "loss": 1.3257, "step": 76600 }, { - "epoch": 15.42, - "learning_rate": 0.0021837589791244205, - "loss": 1.2823, + "epoch": 30.84, + "learning_rate": 0.0009609673797126259, + "loss": 1.3273, "step": 76700 }, { - "epoch": 15.44, - "learning_rate": 0.0021825914368782343, - "loss": 1.282, + "epoch": 30.88, + "learning_rate": 0.0009575533370447768, + "loss": 1.3245, "step": 76800 }, { - "epoch": 15.46, - "learning_rate": 0.002181422056701266, - "loss": 1.2844, + "epoch": 30.92, + "learning_rate": 0.0009541416006931501, + "loss": 1.3252, "step": 76900 }, { - "epoch": 15.48, - "learning_rate": 0.002180250840898105, - "loss": 1.2854, + "epoch": 30.96, + "learning_rate": 0.0009507321975636546, + "loss": 1.3282, "step": 77000 }, { - "epoch": 15.5, - "learning_rate": 0.002179077791776957, - "loss": 1.287, + "epoch": 31.0, + "eval_accuracy": 0.4151056550442814, + "eval_loss": 1.2873921394348145, + "eval_runtime": 17.9971, + "eval_samples_per_second": 4421.147, + "eval_steps_per_second": 17.281, + "step": 77089 + }, + { + "epoch": 31.0, + "learning_rate": 0.0009473251545437967, + "loss": 1.3231, "step": 77100 }, { - "epoch": 15.52, - "learning_rate": 0.0021779029116496423, - "loss": 1.284, + "epoch": 31.04, + "learning_rate": 0.0009439204985024713, + "loss": 1.3123, "step": 77200 }, { - "epoch": 15.54, - "learning_rate": 0.002176726202831588, - "loss": 1.2866, + "epoch": 31.08, + "learning_rate": 0.0009405182562897486, + "loss": 1.3151, "step": 77300 }, { - "epoch": 15.56, - "learning_rate": 0.0021755476676418267, - "loss": 1.29, + "epoch": 31.12, + "learning_rate": 0.000937118454736663, + "loss": 1.3136, "step": 77400 }, { - "epoch": 15.58, - "learning_rate": 0.0021743673084029897, - "loss": 1.2852, + "epoch": 31.17, + "learning_rate": 0.0009337211206550006, + "loss": 1.3155, "step": 77500 }, { - "epoch": 15.6, - "learning_rate": 0.002173185127441303, - "loss": 1.28, + "epoch": 31.21, + "learning_rate": 0.0009303262808370891, + "loss": 1.3187, "step": 77600 }, { - "epoch": 15.62, - "learning_rate": 0.0021720129760884666, - "loss": 1.2861, + "epoch": 31.25, + "learning_rate": 0.0009269339620555848, + "loss": 1.3211, "step": 77700 }, { - "epoch": 15.64, - "learning_rate": 0.0021708271768331494, - "loss": 1.2843, + "epoch": 31.29, + "learning_rate": 0.0009235441910632633, + "loss": 1.323, "step": 77800 }, { - "epoch": 15.66, - "learning_rate": 0.002169639562831796, - "loss": 1.2825, + "epoch": 31.33, + "learning_rate": 0.0009201569945928068, + "loss": 1.3187, "step": 77900 }, { - "epoch": 15.68, - "learning_rate": 0.002168450136424931, - "loss": 1.2862, + "epoch": 31.37, + "learning_rate": 0.0009167723993565953, + "loss": 1.3142, "step": 78000 }, { - "epoch": 15.7, - "learning_rate": 0.0021672588999566487, - "loss": 1.2842, + "epoch": 31.41, + "learning_rate": 0.0009133904320464937, + "loss": 1.3196, "step": 78100 }, { - "epoch": 15.72, - "learning_rate": 0.0021660658557746126, - "loss": 1.2826, + "epoch": 31.45, + "learning_rate": 0.0009100111193336424, + "loss": 1.3236, "step": 78200 }, { - "epoch": 15.74, - "learning_rate": 0.0021648710062300482, - "loss": 1.2851, + "epoch": 31.49, + "learning_rate": 0.0009066344878682472, + "loss": 1.3203, "step": 78300 }, { - "epoch": 15.76, - "learning_rate": 0.002163674353677738, - "loss": 1.28, + "epoch": 31.53, + "learning_rate": 0.000903260564279369, + "loss": 1.3221, "step": 78400 }, { - "epoch": 15.78, - "learning_rate": 0.0021624759004760198, - "loss": 1.2835, + "epoch": 31.57, + "learning_rate": 0.0008998893751747131, + "loss": 1.3191, "step": 78500 }, { - "epoch": 15.8, - "learning_rate": 0.0021612756489867773, - "loss": 1.282, + "epoch": 31.61, + "learning_rate": 0.0008965209471404204, + "loss": 1.3198, "step": 78600 }, { - "epoch": 15.82, - "learning_rate": 0.002160073601575442, - "loss": 1.2815, + "epoch": 31.65, + "learning_rate": 0.0008931553067408565, + "loss": 1.3167, "step": 78700 }, { - "epoch": 15.84, - "learning_rate": 0.0021588697606109808, - "loss": 1.2854, + "epoch": 31.69, + "learning_rate": 0.000889792480518404, + "loss": 1.3185, "step": 78800 }, { - "epoch": 15.86, - "learning_rate": 0.0021576641284658978, - "loss": 1.2859, + "epoch": 31.73, + "learning_rate": 0.0008864324949932512, + "loss": 1.3217, "step": 78900 }, { - "epoch": 15.88, - "learning_rate": 0.0021564567075162263, - "loss": 1.2845, + "epoch": 31.77, + "learning_rate": 0.0008830753766631847, + "loss": 1.3212, "step": 79000 }, { - "epoch": 15.91, - "learning_rate": 0.002155247500141525, - "loss": 1.286, + "epoch": 31.81, + "learning_rate": 0.0008797211520033786, + "loss": 1.3209, "step": 79100 }, { - "epoch": 15.93, - "learning_rate": 0.0021540365087248737, - "loss": 1.2849, + "epoch": 31.85, + "learning_rate": 0.0008764033459701526, + "loss": 1.3207, "step": 79200 }, { - "epoch": 15.95, - "learning_rate": 0.0021528237356528675, - "loss": 1.2842, + "epoch": 31.89, + "learning_rate": 0.0008730549583886335, + "loss": 1.3185, "step": 79300 }, { - "epoch": 15.97, - "learning_rate": 0.002151609183315613, - "loss": 1.283, + "epoch": 31.93, + "learning_rate": 0.0008697095435011976, + "loss": 1.3145, "step": 79400 }, { - "epoch": 15.99, - "learning_rate": 0.0021503928541067234, - "loss": 1.2873, + "epoch": 31.97, + "learning_rate": 0.000866367127690723, + "loss": 1.3183, "step": 79500 }, { - "epoch": 16.0, - "eval_accuracy": 0.40597676821927015, - "eval_loss": 1.2457048892974854, - "eval_runtime": 19.9932, - "eval_samples_per_second": 3979.752, - "eval_steps_per_second": 15.555, - "step": 79572 + "epoch": 32.0, + "eval_accuracy": 0.4158149429454264, + "eval_loss": 1.2801185846328735, + "eval_runtime": 18.1222, + "eval_samples_per_second": 4390.647, + "eval_steps_per_second": 17.161, + "step": 79576 }, { - "epoch": 16.01, - "learning_rate": 0.002149174750423314, - "loss": 1.2817, + "epoch": 32.01, + "learning_rate": 0.0008630277373164358, + "loss": 1.3178, "step": 79600 }, { - "epoch": 16.03, - "learning_rate": 0.0021479548746659964, - "loss": 1.2792, + "epoch": 32.05, + "learning_rate": 0.0008596913987137034, + "loss": 1.3029, "step": 79700 }, { - "epoch": 16.05, - "learning_rate": 0.002146733229238875, - "loss": 1.2774, + "epoch": 32.09, + "learning_rate": 0.0008563581381938254, + "loss": 1.3081, "step": 79800 }, { - "epoch": 16.07, - "learning_rate": 0.002145509816549542, - "loss": 1.2808, + "epoch": 32.13, + "learning_rate": 0.000853161128356408, + "loss": 1.3116, "step": 79900 }, { - "epoch": 16.09, - "learning_rate": 0.0021442846390090724, - "loss": 1.2771, + "epoch": 32.17, + "learning_rate": 0.0008498339771096045, + "loss": 1.3085, "step": 80000 }, { - "epoch": 16.11, - "learning_rate": 0.0021430576990320196, - "loss": 1.2769, + "epoch": 32.21, + "learning_rate": 0.0008465099816840408, + "loss": 1.3123, "step": 80100 }, { - "epoch": 16.13, - "learning_rate": 0.002141828999036409, - "loss": 1.2819, + "epoch": 32.25, + "learning_rate": 0.0008431891682936748, + "loss": 1.3101, "step": 80200 }, { - "epoch": 16.15, - "learning_rate": 0.0021405985414437367, - "loss": 1.2789, + "epoch": 32.29, + "learning_rate": 0.0008398715631273706, + "loss": 1.3127, "step": 80300 }, { - "epoch": 16.17, - "learning_rate": 0.002139366328678961, - "loss": 1.2777, + "epoch": 32.33, + "learning_rate": 0.0008365571923486908, + "loss": 1.3101, "step": 80400 }, { - "epoch": 16.19, - "learning_rate": 0.0021381323631705002, - "loss": 1.2804, + "epoch": 32.37, + "learning_rate": 0.0008332460820956902, + "loss": 1.3141, "step": 80500 }, { - "epoch": 16.21, - "learning_rate": 0.002136896647350226, - "loss": 1.2802, + "epoch": 32.41, + "learning_rate": 0.0008299382584807122, + "loss": 1.314, "step": 80600 }, { - "epoch": 16.23, - "learning_rate": 0.0021356591836534607, - "loss": 1.2759, + "epoch": 32.45, + "learning_rate": 0.0008266337475901788, + "loss": 1.3147, "step": 80700 }, { - "epoch": 16.25, - "learning_rate": 0.00213441997451897, - "loss": 1.2834, + "epoch": 32.49, + "learning_rate": 0.0008233325754843877, + "loss": 1.3147, "step": 80800 }, { - "epoch": 16.27, - "learning_rate": 0.002133179022388961, - "loss": 1.2772, + "epoch": 32.53, + "learning_rate": 0.0008200347681973055, + "loss": 1.3107, "step": 80900 }, { - "epoch": 16.29, - "learning_rate": 0.0021319363297090744, - "loss": 1.2805, + "epoch": 32.57, + "learning_rate": 0.0008167403517363639, + "loss": 1.3118, "step": 81000 }, { - "epoch": 16.31, - "learning_rate": 0.0021306918989283815, - "loss": 1.2752, + "epoch": 32.61, + "learning_rate": 0.0008134493520822528, + "loss": 1.3101, "step": 81100 }, { - "epoch": 16.33, - "learning_rate": 0.0021294457324993795, - "loss": 1.279, + "epoch": 32.65, + "learning_rate": 0.0008101617951887165, + "loss": 1.3089, "step": 81200 }, { - "epoch": 16.35, - "learning_rate": 0.0021281978328779863, - "loss": 1.2825, + "epoch": 32.69, + "learning_rate": 0.0008068777069823485, + "loss": 1.3154, "step": 81300 }, { - "epoch": 16.37, - "learning_rate": 0.0021269482025235344, - "loss": 1.2824, + "epoch": 32.73, + "learning_rate": 0.0008035971133623876, + "loss": 1.3102, "step": 81400 }, { - "epoch": 16.39, - "learning_rate": 0.002125696843898769, - "loss": 1.2801, + "epoch": 32.77, + "learning_rate": 0.0008003200402005126, + "loss": 1.3097, "step": 81500 }, { - "epoch": 16.41, - "learning_rate": 0.0021244437594698383, - "loss": 1.2782, + "epoch": 32.81, + "learning_rate": 0.0007970465133406403, + "loss": 1.3114, "step": 81600 }, { - "epoch": 16.43, - "learning_rate": 0.0021231889517062965, - "loss": 1.2817, + "epoch": 32.85, + "learning_rate": 0.0007937765585987194, + "loss": 1.3105, "step": 81700 }, { - "epoch": 16.45, - "learning_rate": 0.0021219324230810884, - "loss": 1.2788, + "epoch": 32.89, + "learning_rate": 0.0007905102017625279, + "loss": 1.3128, "step": 81800 }, { - "epoch": 16.47, - "learning_rate": 0.002120674176070555, - "loss": 1.2796, + "epoch": 32.93, + "learning_rate": 0.0007872474685914699, + "loss": 1.3114, "step": 81900 }, { - "epoch": 16.49, - "learning_rate": 0.0021194142131544212, - "loss": 1.2799, + "epoch": 32.97, + "learning_rate": 0.0007839883848163723, + "loss": 1.3111, "step": 82000 }, { - "epoch": 16.51, - "learning_rate": 0.002118152536815795, - "loss": 1.2786, + "epoch": 33.0, + "eval_accuracy": 0.4163647428508311, + "eval_loss": 1.2729204893112183, + "eval_runtime": 18.2365, + "eval_samples_per_second": 4363.111, + "eval_steps_per_second": 17.054, + "step": 82062 + }, + { + "epoch": 33.01, + "learning_rate": 0.0007807329761392816, + "loss": 1.3076, "step": 82100 }, { - "epoch": 16.53, - "learning_rate": 0.0021168891495411592, - "loss": 1.2794, + "epoch": 33.06, + "learning_rate": 0.0007774812682332619, + "loss": 1.2998, "step": 82200 }, { - "epoch": 16.55, - "learning_rate": 0.0021156240538203713, - "loss": 1.2814, + "epoch": 33.1, + "learning_rate": 0.0007742332867421911, + "loss": 1.3002, "step": 82300 }, { - "epoch": 16.57, - "learning_rate": 0.0021143572521466533, - "loss": 1.2811, + "epoch": 33.14, + "learning_rate": 0.000770989057280561, + "loss": 1.2987, "step": 82400 }, { - "epoch": 16.59, - "learning_rate": 0.00211308874701659, - "loss": 1.2821, + "epoch": 33.18, + "learning_rate": 0.0007677486054332725, + "loss": 1.3055, "step": 82500 }, { - "epoch": 16.61, - "learning_rate": 0.002111818540930124, - "loss": 1.2811, + "epoch": 33.22, + "learning_rate": 0.0007645119567554358, + "loss": 1.3044, "step": 82600 }, { - "epoch": 16.63, - "learning_rate": 0.00211054663639055, - "loss": 1.2759, + "epoch": 33.26, + "learning_rate": 0.0007612791367721684, + "loss": 1.3029, "step": 82700 }, { - "epoch": 16.65, - "learning_rate": 0.0021092730359045086, - "loss": 1.2797, + "epoch": 33.3, + "learning_rate": 0.0007580501709783928, + "loss": 1.308, "step": 82800 }, { - "epoch": 16.67, - "learning_rate": 0.0021079977419819853, - "loss": 1.2784, + "epoch": 33.34, + "learning_rate": 0.0007548250848386371, + "loss": 1.3032, "step": 82900 }, { - "epoch": 16.69, - "learning_rate": 0.0021067207571362997, - "loss": 1.2816, + "epoch": 33.38, + "learning_rate": 0.0007516039037868335, + "loss": 1.3025, "step": 83000 }, { - "epoch": 16.71, - "learning_rate": 0.0021054420838841066, - "loss": 1.2801, + "epoch": 33.42, + "learning_rate": 0.0007483866532261166, + "loss": 1.3047, "step": 83100 }, { - "epoch": 16.73, - "learning_rate": 0.0021041617247453863, - "loss": 1.284, + "epoch": 33.46, + "learning_rate": 0.0007451733585286252, + "loss": 1.3058, "step": 83200 }, { - "epoch": 16.75, - "learning_rate": 0.0021028796822434442, - "loss": 1.2809, + "epoch": 33.5, + "learning_rate": 0.0007419640450352999, + "loss": 1.3028, "step": 83300 }, { - "epoch": 16.77, - "learning_rate": 0.0021015959589049003, - "loss": 1.2786, + "epoch": 33.54, + "learning_rate": 0.0007387587380556856, + "loss": 1.3025, "step": 83400 }, { - "epoch": 16.79, - "learning_rate": 0.0021003105572596887, - "loss": 1.2824, + "epoch": 33.58, + "learning_rate": 0.0007355574628677295, + "loss": 1.3071, "step": 83500 }, { - "epoch": 16.81, - "learning_rate": 0.0020990234798410508, - "loss": 1.2771, + "epoch": 33.62, + "learning_rate": 0.0007323602447175839, + "loss": 1.3048, "step": 83600 }, { - "epoch": 16.83, - "learning_rate": 0.0020977347291855314, - "loss": 1.2829, + "epoch": 33.66, + "learning_rate": 0.0007291671088194063, + "loss": 1.3062, "step": 83700 }, { - "epoch": 16.85, - "learning_rate": 0.0020964443078329703, - "loss": 1.2805, + "epoch": 33.7, + "learning_rate": 0.0007259780803551594, + "loss": 1.3064, "step": 83800 }, { - "epoch": 16.87, - "learning_rate": 0.0020951522183265034, - "loss": 1.2761, + "epoch": 33.74, + "learning_rate": 0.0007227931844744142, + "loss": 1.301, "step": 83900 }, { - "epoch": 16.89, - "learning_rate": 0.0020938584632125513, - "loss": 1.2776, + "epoch": 33.78, + "learning_rate": 0.0007196124462941508, + "loss": 1.3034, "step": 84000 }, { - "epoch": 16.91, - "learning_rate": 0.002092563045040819, - "loss": 1.2783, + "epoch": 33.82, + "learning_rate": 0.0007164358908985607, + "loss": 1.3039, "step": 84100 }, { - "epoch": 16.93, - "learning_rate": 0.0020912659663642878, - "loss": 1.2802, + "epoch": 33.86, + "learning_rate": 0.0007132635433388489, + "loss": 1.3041, "step": 84200 }, { - "epoch": 16.95, - "learning_rate": 0.002089967229739212, - "loss": 1.2749, + "epoch": 33.9, + "learning_rate": 0.0007100954286330352, + "loss": 1.3004, "step": 84300 }, { - "epoch": 16.97, - "learning_rate": 0.0020886668377251135, - "loss": 1.2798, + "epoch": 33.94, + "learning_rate": 0.0007069315717657598, + "loss": 1.3056, "step": 84400 }, { - "epoch": 16.99, - "learning_rate": 0.002087364792884776, - "loss": 1.2791, + "epoch": 33.98, + "learning_rate": 0.0007037719976880829, + "loss": 1.3033, "step": 84500 }, { - "epoch": 17.0, - "eval_accuracy": 0.40644209745282756, - "eval_loss": 1.2400243282318115, - "eval_runtime": 19.8275, - "eval_samples_per_second": 4013.004, - "eval_steps_per_second": 15.685, - "step": 84545 + "epoch": 34.0, + "eval_accuracy": 0.4169924281259647, + "eval_loss": 1.2650080919265747, + "eval_runtime": 18.2265, + "eval_samples_per_second": 4365.502, + "eval_steps_per_second": 17.063, + "step": 84549 }, { - "epoch": 17.01, - "learning_rate": 0.0020860610977842414, - "loss": 1.2757, + "epoch": 34.02, + "learning_rate": 0.00070061673131729, + "loss": 1.2983, "step": 84600 }, { - "epoch": 17.03, - "learning_rate": 0.0020847557549928037, - "loss": 1.2693, + "epoch": 34.06, + "learning_rate": 0.0006974657975366943, + "loss": 1.2906, "step": 84700 }, { - "epoch": 17.05, - "learning_rate": 0.002083448767083003, - "loss": 1.2748, + "epoch": 34.1, + "learning_rate": 0.0006943192211954416, + "loss": 1.2921, "step": 84800 }, { - "epoch": 17.07, - "learning_rate": 0.002082140136630623, - "loss": 1.2713, + "epoch": 34.14, + "learning_rate": 0.0006911770271083132, + "loss": 1.2937, "step": 84900 }, { - "epoch": 17.09, - "learning_rate": 0.002080829866214684, - "loss": 1.2743, + "epoch": 34.18, + "learning_rate": 0.0006880392400555313, + "loss": 1.2936, "step": 85000 }, { - "epoch": 17.11, - "learning_rate": 0.0020795310855919614, - "loss": 1.2762, + "epoch": 34.22, + "learning_rate": 0.0006849058847825628, + "loss": 1.2942, "step": 85100 }, { - "epoch": 17.13, - "learning_rate": 0.0020782175593340372, - "loss": 1.2779, + "epoch": 34.26, + "learning_rate": 0.0006817769859999242, + "loss": 1.293, "step": 85200 }, { - "epoch": 17.15, - "learning_rate": 0.0020769024008430834, - "loss": 1.272, + "epoch": 34.3, + "learning_rate": 0.0006786525683829867, + "loss": 1.2976, "step": 85300 }, { - "epoch": 17.17, - "learning_rate": 0.0020755856127109857, - "loss": 1.2736, + "epoch": 34.34, + "learning_rate": 0.0006755326565717819, + "loss": 1.2933, "step": 85400 }, { - "epoch": 17.19, - "learning_rate": 0.0020742671975328406, - "loss": 1.2722, + "epoch": 34.38, + "learning_rate": 0.0006724172751708072, + "loss": 1.2956, "step": 85500 }, { - "epoch": 17.21, - "learning_rate": 0.0020729471579069526, - "loss": 1.2711, + "epoch": 34.42, + "learning_rate": 0.0006693064487488328, + "loss": 1.297, "step": 85600 }, { - "epoch": 17.23, - "learning_rate": 0.002071625496434827, - "loss": 1.2761, + "epoch": 34.46, + "learning_rate": 0.0006662002018387062, + "loss": 1.2965, "step": 85700 }, { - "epoch": 17.25, - "learning_rate": 0.0020703022157211644, - "loss": 1.2755, + "epoch": 34.5, + "learning_rate": 0.0006630985589371597, + "loss": 1.296, "step": 85800 }, { - "epoch": 17.27, - "learning_rate": 0.002068977318373858, - "loss": 1.28, + "epoch": 34.54, + "learning_rate": 0.000660001544504617, + "loss": 1.2949, "step": 85900 }, { - "epoch": 17.29, - "learning_rate": 0.002067650807003987, - "loss": 1.2775, + "epoch": 34.58, + "learning_rate": 0.0006569091829650009, + "loss": 1.2962, "step": 86000 }, { - "epoch": 17.31, - "learning_rate": 0.00206632268422581, - "loss": 1.2762, + "epoch": 34.62, + "learning_rate": 0.0006538214987055396, + "loss": 1.2964, "step": 86100 }, { - "epoch": 17.33, - "learning_rate": 0.002064992952656763, - "loss": 1.2791, + "epoch": 34.66, + "learning_rate": 0.0006507385160765756, + "loss": 1.2985, "step": 86200 }, { - "epoch": 17.35, - "learning_rate": 0.0020636616149174508, - "loss": 1.2801, + "epoch": 34.7, + "learning_rate": 0.0006476602593913723, + "loss": 1.2986, "step": 86300 }, { - "epoch": 17.37, - "learning_rate": 0.002062328673631646, - "loss": 1.2765, + "epoch": 34.74, + "learning_rate": 0.0006445867529259235, + "loss": 1.295, "step": 86400 }, { - "epoch": 17.39, - "learning_rate": 0.002060994131426279, - "loss": 1.271, + "epoch": 34.78, + "learning_rate": 0.0006415180209187613, + "loss": 1.2955, "step": 86500 }, { - "epoch": 17.41, - "learning_rate": 0.0020596713602392754, - "loss": 1.2774, + "epoch": 34.82, + "learning_rate": 0.0006384540875707645, + "loss": 1.2943, "step": 86600 }, { - "epoch": 17.43, - "learning_rate": 0.002058333640031713, - "loss": 1.2742, + "epoch": 34.86, + "learning_rate": 0.00063539497704497, + "loss": 1.2999, "step": 86700 }, { - "epoch": 17.45, - "learning_rate": 0.002056994326777913, - "loss": 1.2761, + "epoch": 34.9, + "learning_rate": 0.0006323407134663786, + "loss": 1.2954, "step": 86800 }, { - "epoch": 17.47, - "learning_rate": 0.0020556534231173655, - "loss": 1.2749, + "epoch": 34.95, + "learning_rate": 0.0006292913209217682, + "loss": 1.2961, "step": 86900 }, { - "epoch": 17.49, - "learning_rate": 0.002054310931692694, - "loss": 1.2757, + "epoch": 34.99, + "learning_rate": 0.0006262468234595013, + "loss": 1.2966, "step": 87000 }, { - "epoch": 17.51, - "learning_rate": 0.0020529668551496506, - "loss": 1.2755, + "epoch": 35.0, + "eval_accuracy": 0.4174700070521662, + "eval_loss": 1.2581887245178223, + "eval_runtime": 18.1816, + "eval_samples_per_second": 4376.288, + "eval_steps_per_second": 17.105, + "step": 87036 + }, + { + "epoch": 35.03, + "learning_rate": 0.0006232072450893371, + "loss": 1.2863, "step": 87100 }, { - "epoch": 17.53, - "learning_rate": 0.0020516211961371126, - "loss": 1.2766, + "epoch": 35.07, + "learning_rate": 0.0006201726097822411, + "loss": 1.2824, "step": 87200 }, { - "epoch": 17.55, - "learning_rate": 0.0020502739573070757, - "loss": 1.2719, + "epoch": 35.11, + "learning_rate": 0.0006171429414701966, + "loss": 1.2847, "step": 87300 }, { - "epoch": 17.57, - "learning_rate": 0.002048925141314647, - "loss": 1.2771, + "epoch": 35.15, + "learning_rate": 0.0006141182640460158, + "loss": 1.2874, "step": 87400 }, { - "epoch": 17.59, - "learning_rate": 0.0020475747508180457, - "loss": 1.2732, + "epoch": 35.19, + "learning_rate": 0.0006110986013631511, + "loss": 1.2868, "step": 87500 }, { - "epoch": 17.61, - "learning_rate": 0.00204622278847859, - "loss": 1.2741, + "epoch": 35.23, + "learning_rate": 0.0006080839772355072, + "loss": 1.288, "step": 87600 }, { - "epoch": 17.63, - "learning_rate": 0.0020448692569606988, - "loss": 1.275, + "epoch": 35.27, + "learning_rate": 0.0006050744154372545, + "loss": 1.2858, "step": 87700 }, { - "epoch": 17.65, - "learning_rate": 0.0020435141589318817, - "loss": 1.2755, + "epoch": 35.31, + "learning_rate": 0.0006020699397026388, + "loss": 1.2898, "step": 87800 }, { - "epoch": 17.67, - "learning_rate": 0.0020421574970627366, - "loss": 1.2727, + "epoch": 35.35, + "learning_rate": 0.000599070573725797, + "loss": 1.2871, "step": 87900 }, { - "epoch": 17.69, - "learning_rate": 0.002040799274026943, - "loss": 1.2753, + "epoch": 35.39, + "learning_rate": 0.0005960763411605681, + "loss": 1.2868, "step": 88000 }, { - "epoch": 17.71, - "learning_rate": 0.0020394394925012565, - "loss": 1.2749, + "epoch": 35.43, + "learning_rate": 0.0005930872656203081, + "loss": 1.2918, "step": 88100 }, { - "epoch": 17.73, - "learning_rate": 0.002038078155165506, - "loss": 1.2776, + "epoch": 35.47, + "learning_rate": 0.0005901033706777035, + "loss": 1.2867, "step": 88200 }, { - "epoch": 17.75, - "learning_rate": 0.002036715264702584, - "loss": 1.2752, + "epoch": 35.51, + "learning_rate": 0.0005871246798645846, + "loss": 1.288, "step": 88300 }, { - "epoch": 17.78, - "learning_rate": 0.0020353508237984466, - "loss": 1.2771, + "epoch": 35.55, + "learning_rate": 0.0005841512166717406, + "loss": 1.2896, "step": 88400 }, { - "epoch": 17.8, - "learning_rate": 0.002033984835142102, - "loss": 1.2761, + "epoch": 35.59, + "learning_rate": 0.0005811830045487345, + "loss": 1.2879, "step": 88500 }, { - "epoch": 17.82, - "learning_rate": 0.002032617301425613, - "loss": 1.2707, + "epoch": 35.63, + "learning_rate": 0.0005782200669037174, + "loss": 1.2887, "step": 88600 }, { - "epoch": 17.84, - "learning_rate": 0.0020312482253440835, - "loss": 1.2749, + "epoch": 35.67, + "learning_rate": 0.0005752624271032452, + "loss": 1.2855, "step": 88700 }, { - "epoch": 17.86, - "learning_rate": 0.0020298776095956594, - "loss": 1.2735, + "epoch": 35.71, + "learning_rate": 0.0005723101084720932, + "loss": 1.2858, "step": 88800 }, { - "epoch": 17.88, - "learning_rate": 0.00202850545688152, - "loss": 1.2736, + "epoch": 35.75, + "learning_rate": 0.0005693631342930718, + "loss": 1.2873, "step": 88900 }, { - "epoch": 17.9, - "learning_rate": 0.002027131769905874, - "loss": 1.2763, + "epoch": 35.79, + "learning_rate": 0.0005664215278068442, + "loss": 1.2889, "step": 89000 }, { - "epoch": 17.92, - "learning_rate": 0.002025756551375953, - "loss": 1.2728, + "epoch": 35.83, + "learning_rate": 0.0005634853122117424, + "loss": 1.2868, "step": 89100 }, { - "epoch": 17.94, - "learning_rate": 0.0020243798040020084, - "loss": 1.2773, + "epoch": 35.87, + "learning_rate": 0.0005605545106635842, + "loss": 1.2876, "step": 89200 }, { - "epoch": 17.96, - "learning_rate": 0.002023001530497303, - "loss": 1.2736, + "epoch": 35.91, + "learning_rate": 0.0005576291462754908, + "loss": 1.2869, "step": 89300 }, { - "epoch": 17.98, - "learning_rate": 0.0020216217335781088, - "loss": 1.2755, + "epoch": 35.95, + "learning_rate": 0.0005547092421177046, + "loss": 1.2883, "step": 89400 }, { - "epoch": 18.0, - "learning_rate": 0.002020240415963699, - "loss": 1.2724, + "epoch": 35.99, + "learning_rate": 0.0005517948212174065, + "loss": 1.2858, "step": 89500 }, { - "epoch": 18.0, - "eval_accuracy": 0.40673402194635844, - "eval_loss": 1.2363520860671997, - "eval_runtime": 19.8757, - "eval_samples_per_second": 4003.284, - "eval_steps_per_second": 15.647, - "step": 89518 + "epoch": 36.0, + "eval_accuracy": 0.41807255659434184, + "eval_loss": 1.250754952430725, + "eval_runtime": 18.2293, + "eval_samples_per_second": 4364.838, + "eval_steps_per_second": 17.06, + "step": 89523 }, { - "epoch": 18.02, - "learning_rate": 0.0020188575803763435, - "loss": 1.2653, + "epoch": 36.03, + "learning_rate": 0.0005488859065585351, + "loss": 1.275, "step": 89600 }, { - "epoch": 18.04, - "learning_rate": 0.0020174732295413058, - "loss": 1.265, + "epoch": 36.07, + "learning_rate": 0.0005459825210816062, + "loss": 1.2742, "step": 89700 }, { - "epoch": 18.06, - "learning_rate": 0.0020160873661868328, - "loss": 1.2724, + "epoch": 36.11, + "learning_rate": 0.0005430846876835299, + "loss": 1.2769, "step": 89800 }, { - "epoch": 18.08, - "learning_rate": 0.0020146999930441547, - "loss": 1.2692, + "epoch": 36.15, + "learning_rate": 0.0005401924292174312, + "loss": 1.2783, "step": 89900 }, { - "epoch": 18.1, - "learning_rate": 0.002013311112847475, - "loss": 1.2678, + "epoch": 36.19, + "learning_rate": 0.0005373346073161149, + "loss": 1.2779, "step": 90000 }, { - "epoch": 18.12, - "learning_rate": 0.0020119207283339695, - "loss": 1.2709, + "epoch": 36.23, + "learning_rate": 0.0005344535107797021, + "loss": 1.279, "step": 90100 }, { - "epoch": 18.14, - "learning_rate": 0.0020105288422437775, - "loss": 1.2674, + "epoch": 36.27, + "learning_rate": 0.0005315780572431434, + "loss": 1.2809, "step": 90200 }, { - "epoch": 18.16, - "learning_rate": 0.0020091354573199965, - "loss": 1.2694, + "epoch": 36.31, + "learning_rate": 0.0005287082693830698, + "loss": 1.2805, "step": 90300 }, { - "epoch": 18.18, - "learning_rate": 0.00200774057630868, - "loss": 1.2691, + "epoch": 36.35, + "learning_rate": 0.0005258441698314319, + "loss": 1.28, "step": 90400 }, { - "epoch": 18.2, - "learning_rate": 0.0020063442019588283, - "loss": 1.2665, + "epoch": 36.39, + "learning_rate": 0.0005229857811753194, + "loss": 1.2804, "step": 90500 }, { - "epoch": 18.22, - "learning_rate": 0.002004946337022386, - "loss": 1.2669, + "epoch": 36.43, + "learning_rate": 0.0005201331259567862, + "loss": 1.2817, "step": 90600 }, { - "epoch": 18.24, - "learning_rate": 0.0020035469842542347, - "loss": 1.272, + "epoch": 36.47, + "learning_rate": 0.0005172862266726693, + "loss": 1.2787, "step": 90700 }, { - "epoch": 18.26, - "learning_rate": 0.002002146146412188, - "loss": 1.2658, + "epoch": 36.51, + "learning_rate": 0.000514445105774413, + "loss": 1.2773, "step": 90800 }, { - "epoch": 18.28, - "learning_rate": 0.002000743826256986, - "loss": 1.2719, + "epoch": 36.55, + "learning_rate": 0.0005116097856678919, + "loss": 1.2788, "step": 90900 }, { - "epoch": 18.3, - "learning_rate": 0.0019993400265522917, - "loss": 1.2716, + "epoch": 36.59, + "learning_rate": 0.0005087802887132336, + "loss": 1.2787, "step": 91000 }, { - "epoch": 18.32, - "learning_rate": 0.001997934750064681, - "loss": 1.2676, + "epoch": 36.63, + "learning_rate": 0.000505956637224643, + "loss": 1.2777, "step": 91100 }, { - "epoch": 18.34, - "learning_rate": 0.0019965279995636438, - "loss": 1.2705, + "epoch": 36.67, + "learning_rate": 0.0005031388534702259, + "loss": 1.2796, "step": 91200 }, { - "epoch": 18.36, - "learning_rate": 0.001995119777821572, - "loss": 1.2741, + "epoch": 36.71, + "learning_rate": 0.0005003269596718136, + "loss": 1.2787, "step": 91300 }, { - "epoch": 18.38, - "learning_rate": 0.0019937100876137592, - "loss": 1.2688, + "epoch": 36.75, + "learning_rate": 0.0004975209780047875, + "loss": 1.2829, "step": 91400 }, { - "epoch": 18.4, - "learning_rate": 0.001992298931718391, - "loss": 1.2709, + "epoch": 36.8, + "learning_rate": 0.0004947209305979038, + "loss": 1.2794, "step": 91500 }, { - "epoch": 18.42, - "learning_rate": 0.0019908863129165432, - "loss": 1.2719, + "epoch": 36.84, + "learning_rate": 0.0004919268395331206, + "loss": 1.2799, "step": 91600 }, { - "epoch": 18.44, - "learning_rate": 0.0019894722339921737, - "loss": 1.2726, + "epoch": 36.88, + "learning_rate": 0.0004891387268454217, + "loss": 1.2785, "step": 91700 }, { - "epoch": 18.46, - "learning_rate": 0.0019880566977321184, - "loss": 1.2734, + "epoch": 36.92, + "learning_rate": 0.0004863566145226439, + "loss": 1.2797, "step": 91800 }, { - "epoch": 18.48, - "learning_rate": 0.001986639706926085, - "loss": 1.2703, + "epoch": 36.96, + "learning_rate": 0.0004835805245053036, + "loss": 1.2783, "step": 91900 }, { - "epoch": 18.5, - "learning_rate": 0.0019852354559692456, - "loss": 1.2692, + "epoch": 37.0, + "learning_rate": 0.00048081047868642353, + "loss": 1.2794, "step": 92000 }, { - "epoch": 18.52, - "learning_rate": 0.0019838155789275737, - "loss": 1.2722, + "epoch": 37.0, + "eval_accuracy": 0.41857916552198776, + "eval_loss": 1.2428914308547974, + "eval_runtime": 19.1983, + "eval_samples_per_second": 4144.524, + "eval_steps_per_second": 16.199, + "step": 92009 + }, + { + "epoch": 37.04, + "learning_rate": 0.0004780741086105822, + "loss": 1.266, "step": 92100 }, { - "epoch": 18.54, - "learning_rate": 0.0019823942556982275, - "loss": 1.2754, + "epoch": 37.08, + "learning_rate": 0.0004753161556907, + "loss": 1.2675, "step": 92200 }, { - "epoch": 18.56, - "learning_rate": 0.001980971489082321, - "loss": 1.2689, + "epoch": 37.12, + "learning_rate": 0.0004725643121444032, + "loss": 1.2649, "step": 92300 }, { - "epoch": 18.58, - "learning_rate": 0.00197954728188381, - "loss": 1.2756, + "epoch": 37.16, + "learning_rate": 0.0004698185996734999, + "loss": 1.2662, "step": 92400 }, { - "epoch": 18.6, - "learning_rate": 0.0019781216369094915, - "loss": 1.2725, + "epoch": 37.2, + "learning_rate": 0.000467079039931447, + "loss": 1.2685, "step": 92500 }, { - "epoch": 18.62, - "learning_rate": 0.001976694556968995, - "loss": 1.2689, + "epoch": 37.24, + "learning_rate": 0.0004643456545231797, + "loss": 1.2715, "step": 92600 }, { - "epoch": 18.64, - "learning_rate": 0.0019752660448747795, - "loss": 1.2701, + "epoch": 37.28, + "learning_rate": 0.00046161846500494027, + "loss": 1.2721, "step": 92700 }, { - "epoch": 18.66, - "learning_rate": 0.001973836103442124, - "loss": 1.2694, + "epoch": 37.32, + "learning_rate": 0.00045889749288410803, + "loss": 1.2727, "step": 92800 }, { - "epoch": 18.68, - "learning_rate": 0.0019724047354891263, - "loss": 1.2722, + "epoch": 37.36, + "learning_rate": 0.0004561827596190307, + "loss": 1.2707, "step": 92900 }, { - "epoch": 18.7, - "learning_rate": 0.001970971943836695, - "loss": 1.2724, + "epoch": 37.4, + "learning_rate": 0.0004534742866188546, + "loss": 1.2698, "step": 93000 }, { - "epoch": 18.72, - "learning_rate": 0.0019695377313085453, - "loss": 1.2683, + "epoch": 37.44, + "learning_rate": 0.00045079908599305484, + "loss": 1.2708, "step": 93100 }, { - "epoch": 18.74, - "learning_rate": 0.0019681021007311905, - "loss": 1.2739, + "epoch": 37.48, + "learning_rate": 0.00044810313441779866, + "loss": 1.275, "step": 93200 }, { - "epoch": 18.76, - "learning_rate": 0.001966665054933941, - "loss": 1.2722, + "epoch": 37.52, + "learning_rate": 0.000445413506825629, + "loss": 1.2721, "step": 93300 }, { - "epoch": 18.78, - "learning_rate": 0.0019652265967488943, - "loss": 1.2705, + "epoch": 37.56, + "learning_rate": 0.00044273022442770137, + "loss": 1.2709, "step": 93400 }, { - "epoch": 18.8, - "learning_rate": 0.0019637867290109327, - "loss": 1.2687, + "epoch": 37.6, + "learning_rate": 0.00044005330838513225, + "loss": 1.2693, "step": 93500 }, { - "epoch": 18.82, - "learning_rate": 0.001962345454557716, - "loss": 1.2698, + "epoch": 37.64, + "learning_rate": 0.0004373827798088306, + "loss": 1.2706, "step": 93600 }, { - "epoch": 18.84, - "learning_rate": 0.0019609027762296765, - "loss": 1.2722, + "epoch": 37.68, + "learning_rate": 0.0004347186597593318, + "loss": 1.27, "step": 93700 }, { - "epoch": 18.86, - "learning_rate": 0.001959458696870013, - "loss": 1.274, + "epoch": 37.72, + "learning_rate": 0.00043206096924663223, + "loss": 1.2732, "step": 93800 }, { - "epoch": 18.88, - "learning_rate": 0.0019580132193246854, - "loss": 1.2683, + "epoch": 37.76, + "learning_rate": 0.00042940972923002295, + "loss": 1.2717, "step": 93900 }, { - "epoch": 18.9, - "learning_rate": 0.001956566346442409, - "loss": 1.268, + "epoch": 37.8, + "learning_rate": 0.00042676496061792467, + "loss": 1.2724, "step": 94000 }, { - "epoch": 18.92, - "learning_rate": 0.001955118081074649, - "loss": 1.2674, + "epoch": 37.84, + "learning_rate": 0.00042412668426772287, + "loss": 1.2709, "step": 94100 }, { - "epoch": 18.94, - "learning_rate": 0.001953668426075616, - "loss": 1.267, + "epoch": 37.88, + "learning_rate": 0.0004214949209856031, + "loss": 1.2743, "step": 94200 }, { - "epoch": 18.96, - "learning_rate": 0.0019522173843022578, - "loss": 1.2677, + "epoch": 37.92, + "learning_rate": 0.00041886969152638703, + "loss": 1.2685, "step": 94300 }, { - "epoch": 18.98, - "learning_rate": 0.0019507649586142553, - "loss": 1.2669, + "epoch": 37.96, + "learning_rate": 0.00041625101659336836, + "loss": 1.2687, "step": 94400 }, { - "epoch": 19.0, - "eval_accuracy": 0.4069155401191752, - "eval_loss": 1.2321081161499023, - "eval_runtime": 19.6805, - "eval_samples_per_second": 4042.992, - "eval_steps_per_second": 15.802, - "step": 94491 + "epoch": 38.0, + "eval_accuracy": 0.4191567563434093, + "eval_loss": 1.2355095148086548, + "eval_runtime": 18.0561, + "eval_samples_per_second": 4406.712, + "eval_steps_per_second": 17.224, + "step": 94496 }, { - "epoch": 19.0, - "learning_rate": 0.0019493111518740181, - "loss": 1.2724, + "epoch": 38.0, + "learning_rate": 0.00041363891683815143, + "loss": 1.2674, "step": 94500 }, { - "epoch": 19.02, - "learning_rate": 0.001947855966946676, - "loss": 1.2629, + "epoch": 38.04, + "learning_rate": 0.0004110334128604853, + "loss": 1.2529, "step": 94600 }, { - "epoch": 19.04, - "learning_rate": 0.0019463994067000763, - "loss": 1.2614, + "epoch": 38.08, + "learning_rate": 0.0004084345252081037, + "loss": 1.2597, "step": 94700 }, { - "epoch": 19.06, - "learning_rate": 0.001944941474004775, - "loss": 1.2603, + "epoch": 38.12, + "learning_rate": 0.0004058422743765618, + "loss": 1.255, "step": 94800 }, { - "epoch": 19.08, - "learning_rate": 0.0019434821717340346, - "loss": 1.2643, + "epoch": 38.16, + "learning_rate": 0.00040325668080907486, + "loss": 1.2609, "step": 94900 }, { - "epoch": 19.1, - "learning_rate": 0.001942021502763816, - "loss": 1.2619, + "epoch": 38.2, + "learning_rate": 0.0004006777648963576, + "loss": 1.2601, "step": 95000 }, { - "epoch": 19.12, - "learning_rate": 0.001940559469972774, - "loss": 1.2676, + "epoch": 38.24, + "learning_rate": 0.0003981055469764626, + "loss": 1.2598, "step": 95100 }, { - "epoch": 19.14, - "learning_rate": 0.0019390960762422499, - "loss": 1.2606, + "epoch": 38.28, + "learning_rate": 0.00039554004733462005, + "loss": 1.2605, "step": 95200 }, { - "epoch": 19.16, - "learning_rate": 0.0019376313244562687, - "loss": 1.2628, + "epoch": 38.32, + "learning_rate": 0.0003929812862030781, + "loss": 1.2655, "step": 95300 }, { - "epoch": 19.18, - "learning_rate": 0.0019361798852696786, - "loss": 1.2691, + "epoch": 38.36, + "learning_rate": 0.0003904292837609427, + "loss": 1.2597, "step": 95400 }, { - "epoch": 19.2, - "learning_rate": 0.0019347124395440384, - "loss": 1.2652, + "epoch": 38.4, + "learning_rate": 0.00038788406013402, + "loss": 1.2629, "step": 95500 }, { - "epoch": 19.22, - "learning_rate": 0.0019332436444021162, - "loss": 1.2672, + "epoch": 38.44, + "learning_rate": 0.0003853456353946555, + "loss": 1.2638, "step": 95600 }, { - "epoch": 19.24, - "learning_rate": 0.0019317735027385814, - "loss": 1.2668, + "epoch": 38.48, + "learning_rate": 0.00038283931180072185, + "loss": 1.2636, "step": 95700 }, { - "epoch": 19.26, - "learning_rate": 0.0019303020174507568, - "loss": 1.2604, + "epoch": 38.52, + "learning_rate": 0.00038031447635151957, + "loss": 1.2616, "step": 95800 }, { - "epoch": 19.28, - "learning_rate": 0.001928829191438613, - "loss": 1.2699, + "epoch": 38.56, + "learning_rate": 0.0003777964994857314, + "loss": 1.2616, "step": 95900 }, { - "epoch": 19.3, - "learning_rate": 0.0019273550276047641, - "loss": 1.2617, + "epoch": 38.6, + "learning_rate": 0.0003752854010608274, + "loss": 1.2649, "step": 96000 }, { - "epoch": 19.32, - "learning_rate": 0.0019258795288544595, - "loss": 1.2651, + "epoch": 38.64, + "learning_rate": 0.0003727812008800327, + "loss": 1.265, "step": 96100 }, { - "epoch": 19.34, - "learning_rate": 0.0019244026980955796, - "loss": 1.2651, + "epoch": 38.69, + "learning_rate": 0.0003702839186921707, + "loss": 1.261, "step": 96200 }, { - "epoch": 19.36, - "learning_rate": 0.0019229245382386302, - "loss": 1.2654, + "epoch": 38.73, + "learning_rate": 0.00036779357419150716, + "loss": 1.2632, "step": 96300 }, { - "epoch": 19.38, - "learning_rate": 0.0019214450521967369, - "loss": 1.2645, + "epoch": 38.77, + "learning_rate": 0.0003653101870175964, + "loss": 1.2623, "step": 96400 }, { - "epoch": 19.4, - "learning_rate": 0.0019199642428856373, - "loss": 1.2674, + "epoch": 38.81, + "learning_rate": 0.00036283377675512416, + "loss": 1.2614, "step": 96500 }, { - "epoch": 19.42, - "learning_rate": 0.0019184821132236796, - "loss": 1.2707, + "epoch": 38.85, + "learning_rate": 0.0003603643629337551, + "loss": 1.2626, "step": 96600 }, { - "epoch": 19.44, - "learning_rate": 0.0019169986661318106, - "loss": 1.2653, + "epoch": 38.89, + "learning_rate": 0.00035790196502797744, + "loss": 1.2615, "step": 96700 }, { - "epoch": 19.46, - "learning_rate": 0.0019155139045335771, - "loss": 1.2664, + "epoch": 38.93, + "learning_rate": 0.00035544660245695014, + "loss": 1.2628, "step": 96800 }, { - "epoch": 19.48, - "learning_rate": 0.0019140278313551134, - "loss": 1.2657, + "epoch": 38.97, + "learning_rate": 0.00035299829458434963, + "loss": 1.2632, "step": 96900 }, { - "epoch": 19.5, - "learning_rate": 0.0019125404495251408, - "loss": 1.2665, + "epoch": 39.0, + "eval_accuracy": 0.4198839224771513, + "eval_loss": 1.2276296615600586, + "eval_runtime": 17.963, + "eval_samples_per_second": 4429.538, + "eval_steps_per_second": 17.313, + "step": 96983 + }, + { + "epoch": 39.01, + "learning_rate": 0.00035055706071821695, + "loss": 1.2591, "step": 97000 }, { - "epoch": 19.52, - "learning_rate": 0.001911066655304144, - "loss": 1.2674, + "epoch": 39.05, + "learning_rate": 0.00034812292011080534, + "loss": 1.2502, "step": 97100 }, { - "epoch": 19.54, - "learning_rate": 0.0019095766779809568, - "loss": 1.2644, + "epoch": 39.09, + "learning_rate": 0.0003456958919584291, + "loss": 1.249, "step": 97200 }, { - "epoch": 19.56, - "learning_rate": 0.0019080854007784964, - "loss": 1.2648, + "epoch": 39.13, + "learning_rate": 0.0003432759954013112, + "loss": 1.2494, "step": 97300 }, { - "epoch": 19.58, - "learning_rate": 0.0019065928266357385, - "loss": 1.2669, + "epoch": 39.17, + "learning_rate": 0.00034086324952343274, + "loss": 1.2543, "step": 97400 }, { - "epoch": 19.6, - "learning_rate": 0.001905098958494216, - "loss": 1.2676, + "epoch": 39.21, + "learning_rate": 0.00033845767335238363, + "loss": 1.2507, "step": 97500 }, { - "epoch": 19.62, - "learning_rate": 0.001903618757271021, - "loss": 1.2662, + "epoch": 39.25, + "learning_rate": 0.0003360592858592104, + "loss": 1.2529, "step": 97600 }, { - "epoch": 19.65, - "learning_rate": 0.001902122322833248, - "loss": 1.2677, + "epoch": 39.29, + "learning_rate": 0.00033366810595826816, + "loss": 1.2519, "step": 97700 }, { - "epoch": 19.67, - "learning_rate": 0.0019006246032070807, - "loss": 1.2659, + "epoch": 39.33, + "learning_rate": 0.0003312841525070705, + "loss": 1.2511, "step": 97800 }, { - "epoch": 19.69, - "learning_rate": 0.0018991256013441932, - "loss": 1.2669, + "epoch": 39.37, + "learning_rate": 0.00032890744430614155, + "loss": 1.254, "step": 97900 }, { - "epoch": 19.71, - "learning_rate": 0.0018976253201987848, - "loss": 1.2649, + "epoch": 39.41, + "learning_rate": 0.0003265380000988674, + "loss": 1.2547, "step": 98000 }, { - "epoch": 19.73, - "learning_rate": 0.0018961237627275773, - "loss": 1.2679, + "epoch": 39.45, + "learning_rate": 0.00032417583857134795, + "loss": 1.253, "step": 98100 }, { - "epoch": 19.75, - "learning_rate": 0.001894620931889807, - "loss": 1.2692, + "epoch": 39.49, + "learning_rate": 0.00032182097835225024, + "loss": 1.2491, "step": 98200 }, { - "epoch": 19.77, - "learning_rate": 0.0018931168306472199, - "loss": 1.2655, + "epoch": 39.53, + "learning_rate": 0.0003194734380126607, + "loss": 1.2557, "step": 98300 }, { - "epoch": 19.79, - "learning_rate": 0.0018916114619640656, - "loss": 1.2646, + "epoch": 39.57, + "learning_rate": 0.00031713323606593914, + "loss": 1.2547, "step": 98400 }, { - "epoch": 19.81, - "learning_rate": 0.0018901048288070927, - "loss": 1.2647, + "epoch": 39.61, + "learning_rate": 0.00031480039096757397, + "loss": 1.2565, "step": 98500 }, { - "epoch": 19.83, - "learning_rate": 0.0018885969341455395, - "loss": 1.2651, + "epoch": 39.65, + "learning_rate": 0.00031247492111503367, + "loss": 1.253, "step": 98600 }, { - "epoch": 19.85, - "learning_rate": 0.0018870877809511327, - "loss": 1.2642, + "epoch": 39.69, + "learning_rate": 0.00031015684484762455, + "loss": 1.2535, "step": 98700 }, { - "epoch": 19.87, - "learning_rate": 0.001885577372198078, - "loss": 1.2656, + "epoch": 39.73, + "learning_rate": 0.00030784618044634463, + "loss": 1.253, "step": 98800 }, { - "epoch": 19.89, - "learning_rate": 0.001884065710863056, - "loss": 1.2627, + "epoch": 39.77, + "learning_rate": 0.00030554294613374016, + "loss": 1.2542, "step": 98900 }, { - "epoch": 19.91, - "learning_rate": 0.0018825527999252157, - "loss": 1.266, + "epoch": 39.81, + "learning_rate": 0.0003032471600737613, + "loss": 1.2526, "step": 99000 }, { - "epoch": 19.93, - "learning_rate": 0.0018810386423661694, - "loss": 1.2654, + "epoch": 39.85, + "learning_rate": 0.0003009588403716193, + "loss": 1.2552, "step": 99100 }, { - "epoch": 19.95, - "learning_rate": 0.0018795232411699847, - "loss": 1.264, + "epoch": 39.89, + "learning_rate": 0.000298678005073644, + "loss": 1.2552, "step": 99200 }, { - "epoch": 19.97, - "learning_rate": 0.0018780065993231816, - "loss": 1.2633, + "epoch": 39.93, + "learning_rate": 0.00029640467216714053, + "loss": 1.2537, "step": 99300 }, { - "epoch": 19.99, - "learning_rate": 0.0018764887198147245, - "loss": 1.2683, + "epoch": 39.97, + "learning_rate": 0.0002941388595802483, + "loss": 1.2517, "step": 99400 }, { - "epoch": 20.0, - "eval_accuracy": 0.4075025708445861, - "eval_loss": 1.2268297672271729, - "eval_runtime": 19.801, - "eval_samples_per_second": 4018.383, - "eval_steps_per_second": 15.706, - "step": 99465 + "epoch": 40.0, + "eval_accuracy": 0.42036451061081953, + "eval_loss": 1.2210617065429688, + "eval_runtime": 18.2161, + "eval_samples_per_second": 4367.993, + "eval_steps_per_second": 17.073, + "step": 99470 }, { - "epoch": 20.01, - "learning_rate": 0.0018749696056360177, - "loss": 1.2603, + "epoch": 40.01, + "learning_rate": 0.0002918805851817999, + "loss": 1.2484, "step": 99500 }, { - "epoch": 20.03, - "learning_rate": 0.0018734492597808972, - "loss": 1.2609, + "epoch": 40.05, + "learning_rate": 0.0002896298667811789, + "loss": 1.2386, "step": 99600 }, { - "epoch": 20.05, - "learning_rate": 0.001871927685245628, - "loss": 1.2587, + "epoch": 40.09, + "learning_rate": 0.00028738672212818065, + "loss": 1.2427, "step": 99700 }, { - "epoch": 20.07, - "learning_rate": 0.0018704048850288952, - "loss": 1.2575, + "epoch": 40.13, + "learning_rate": 0.0002851734868094691, + "loss": 1.2423, "step": 99800 }, { - "epoch": 20.09, - "learning_rate": 0.001868880862131801, - "loss": 1.2582, + "epoch": 40.17, + "learning_rate": 0.00028294546648429563, + "loss": 1.2407, "step": 99900 }, { - "epoch": 20.11, - "learning_rate": 0.0018673556195578558, - "loss": 1.2595, + "epoch": 40.21, + "learning_rate": 0.00028072507262179606, + "loss": 1.2437, "step": 100000 }, { - "epoch": 20.13, - "learning_rate": 0.0018658291603129745, - "loss": 1.2615, + "epoch": 40.25, + "learning_rate": 0.00027851232273261794, + "loss": 1.2416, "step": 100100 }, { - "epoch": 20.15, - "learning_rate": 0.0018643014874054691, - "loss": 1.2603, + "epoch": 40.29, + "learning_rate": 0.00027630723426712566, + "loss": 1.2436, "step": 100200 }, { - "epoch": 20.17, - "learning_rate": 0.0018627726038460447, - "loss": 1.2634, + "epoch": 40.33, + "learning_rate": 0.000274109824615264, + "loss": 1.2457, "step": 100300 }, { - "epoch": 20.19, - "learning_rate": 0.001861242512647791, - "loss": 1.2634, + "epoch": 40.37, + "learning_rate": 0.000271920111106421, + "loss": 1.2445, "step": 100400 }, { - "epoch": 20.21, - "learning_rate": 0.0018597112168261781, - "loss": 1.2646, + "epoch": 40.41, + "learning_rate": 0.0002697381110092896, + "loss": 1.2466, "step": 100500 }, { - "epoch": 20.23, - "learning_rate": 0.0018581787193990508, - "loss": 1.2591, + "epoch": 40.45, + "learning_rate": 0.0002675638415317336, + "loss": 1.2449, "step": 100600 }, { - "epoch": 20.25, - "learning_rate": 0.0018566450233866208, - "loss": 1.2609, + "epoch": 40.49, + "learning_rate": 0.0002653973198206501, + "loss": 1.2427, "step": 100700 }, { - "epoch": 20.27, - "learning_rate": 0.0018551101318114629, - "loss": 1.2647, + "epoch": 40.53, + "learning_rate": 0.00026323856296183584, + "loss": 1.2446, "step": 100800 }, { - "epoch": 20.29, - "learning_rate": 0.0018535740476985083, - "loss": 1.2634, + "epoch": 40.58, + "learning_rate": 0.0002610875879798516, + "loss": 1.2442, "step": 100900 }, { - "epoch": 20.31, - "learning_rate": 0.0018520367740750374, - "loss": 1.2589, + "epoch": 40.62, + "learning_rate": 0.0002589444118378878, + "loss": 1.2457, "step": 101000 }, { - "epoch": 20.33, - "learning_rate": 0.0018504983139706755, - "loss": 1.2615, + "epoch": 40.66, + "learning_rate": 0.0002568090514376313, + "loss": 1.2462, "step": 101100 }, { - "epoch": 20.35, - "learning_rate": 0.001848958670417386, - "loss": 1.2561, + "epoch": 40.7, + "learning_rate": 0.00025468152361913203, + "loss": 1.245, "step": 101200 }, { - "epoch": 20.37, - "learning_rate": 0.0018474178464494648, - "loss": 1.2627, + "epoch": 40.74, + "learning_rate": 0.000252561845160669, + "loss": 1.2446, "step": 101300 }, { - "epoch": 20.39, - "learning_rate": 0.001845875845103534, - "loss": 1.2583, + "epoch": 40.78, + "learning_rate": 0.0002504500327786208, + "loss": 1.244, "step": 101400 }, { - "epoch": 20.41, - "learning_rate": 0.0018443326694185364, - "loss": 1.2633, + "epoch": 40.82, + "learning_rate": 0.0002483461031273301, + "loss": 1.2472, "step": 101500 }, { - "epoch": 20.43, - "learning_rate": 0.0018427883224357284, - "loss": 1.2611, + "epoch": 40.86, + "learning_rate": 0.0002462500727989753, + "loss": 1.247, "step": 101600 }, { - "epoch": 20.45, - "learning_rate": 0.0018412428071986754, - "loss": 1.2625, + "epoch": 40.9, + "learning_rate": 0.00024416195832343765, + "loss": 1.2435, "step": 101700 }, { - "epoch": 20.47, - "learning_rate": 0.001839711599315474, - "loss": 1.2607, + "epoch": 40.94, + "learning_rate": 0.00024208177616817276, + "loss": 1.2433, "step": 101800 }, { - "epoch": 20.49, - "learning_rate": 0.001838163768316336, - "loss": 1.2623, + "epoch": 40.98, + "learning_rate": 0.00024003022567247874, + "loss": 1.244, "step": 101900 }, { - "epoch": 20.51, - "learning_rate": 0.0018366147781769232, - "loss": 1.2569, + "epoch": 41.0, + "eval_accuracy": 0.4209702463726659, + "eval_loss": 1.2142891883850098, + "eval_runtime": 18.2831, + "eval_samples_per_second": 4351.989, + "eval_steps_per_second": 17.01, + "step": 101956 + }, + { + "epoch": 41.02, + "learning_rate": 0.00023796587757840703, + "loss": 1.2387, "step": 102000 }, { - "epoch": 20.53, - "learning_rate": 0.001835064631949951, - "loss": 1.2585, + "epoch": 41.06, + "learning_rate": 0.0002359095106686346, + "loss": 1.2334, "step": 102100 }, { - "epoch": 20.55, - "learning_rate": 0.0018335133326904144, - "loss": 1.2631, + "epoch": 41.1, + "learning_rate": 0.00023386114116024593, + "loss": 1.233, "step": 102200 }, { - "epoch": 20.57, - "learning_rate": 0.001831960883455579, - "loss": 1.2592, + "epoch": 41.14, + "learning_rate": 0.00023182078520725695, + "loss": 1.2353, "step": 102300 }, { - "epoch": 20.59, - "learning_rate": 0.0018304072873049785, - "loss": 1.2613, + "epoch": 41.18, + "learning_rate": 0.0002297884589004852, + "loss": 1.2358, "step": 102400 }, { - "epoch": 20.61, - "learning_rate": 0.0018288525473004055, - "loss": 1.2601, + "epoch": 41.22, + "learning_rate": 0.0002277641782674246, + "loss": 1.2354, "step": 102500 }, { - "epoch": 20.63, - "learning_rate": 0.0018272966665059086, - "loss": 1.2608, + "epoch": 41.26, + "learning_rate": 0.000225747959272119, + "loss": 1.2359, "step": 102600 }, { - "epoch": 20.65, - "learning_rate": 0.0018257396479877822, - "loss": 1.2608, + "epoch": 41.3, + "learning_rate": 0.00022373981781503545, + "loss": 1.2341, "step": 102700 }, { - "epoch": 20.67, - "learning_rate": 0.0018241814948145656, - "loss": 1.2619, + "epoch": 41.34, + "learning_rate": 0.00022173976973293943, + "loss": 1.2361, "step": 102800 }, { - "epoch": 20.69, - "learning_rate": 0.0018226222100570322, - "loss": 1.2599, + "epoch": 41.38, + "learning_rate": 0.0002197478307987698, + "loss": 1.2394, "step": 102900 }, { - "epoch": 20.71, - "learning_rate": 0.0018210617967881865, - "loss": 1.2589, + "epoch": 41.42, + "learning_rate": 0.00021776401672151432, + "loss": 1.2384, "step": 103000 }, { - "epoch": 20.73, - "learning_rate": 0.0018195002580832567, - "loss": 1.2573, + "epoch": 41.46, + "learning_rate": 0.00021578834314608638, + "loss": 1.2345, "step": 103100 }, { - "epoch": 20.75, - "learning_rate": 0.0018179375970196893, - "loss": 1.2582, + "epoch": 41.5, + "learning_rate": 0.0002138208256532001, + "loss": 1.2345, "step": 103200 }, { - "epoch": 20.77, - "learning_rate": 0.0018163738166771422, - "loss": 1.2581, + "epoch": 41.54, + "learning_rate": 0.00021186147975924913, + "loss": 1.2368, "step": 103300 }, { - "epoch": 20.79, - "learning_rate": 0.0018148089201374795, - "loss": 1.2628, + "epoch": 41.58, + "learning_rate": 0.000209910320916184, + "loss": 1.237, "step": 103400 }, { - "epoch": 20.81, - "learning_rate": 0.0018132429104847653, - "loss": 1.2598, + "epoch": 41.62, + "learning_rate": 0.00020796736451138902, + "loss": 1.2365, "step": 103500 }, { - "epoch": 20.83, - "learning_rate": 0.0018116757908052572, - "loss": 1.2618, + "epoch": 41.66, + "learning_rate": 0.0002060326258675621, + "loss": 1.237, "step": 103600 }, { - "epoch": 20.85, - "learning_rate": 0.0018101075641874, - "loss": 1.266, + "epoch": 41.7, + "learning_rate": 0.00020410612024259367, + "loss": 1.2337, "step": 103700 }, { - "epoch": 20.87, - "learning_rate": 0.0018085382337218203, - "loss": 1.2608, + "epoch": 41.74, + "learning_rate": 0.00020218786282944629, + "loss": 1.2346, "step": 103800 }, { - "epoch": 20.89, - "learning_rate": 0.0018069678025013202, - "loss": 1.2583, + "epoch": 41.78, + "learning_rate": 0.00020029692774372416, + "loss": 1.2401, "step": 103900 }, { - "epoch": 20.91, - "learning_rate": 0.0018053962736208717, - "loss": 1.2578, + "epoch": 41.82, + "learning_rate": 0.00019839512921442467, + "loss": 1.2351, "step": 104000 }, { - "epoch": 20.93, - "learning_rate": 0.0018038236501776092, - "loss": 1.2621, + "epoch": 41.86, + "learning_rate": 0.0001965016239354195, + "loss": 1.2362, "step": 104100 }, { - "epoch": 20.95, - "learning_rate": 0.0018022499352708247, - "loss": 1.2574, + "epoch": 41.9, + "learning_rate": 0.00019461642683942098, + "loss": 1.2342, "step": 104200 }, { - "epoch": 20.97, - "learning_rate": 0.0018006751320019604, - "loss": 1.2589, + "epoch": 41.94, + "learning_rate": 0.00019273955279362155, + "loss": 1.2378, "step": 104300 }, { - "epoch": 20.99, - "learning_rate": 0.001799099243474605, - "loss": 1.2632, + "epoch": 41.98, + "learning_rate": 0.00019087101659957472, + "loss": 1.2342, "step": 104400 }, { - "epoch": 21.0, - "eval_accuracy": 0.40797378629408937, - "eval_loss": 1.2228056192398071, - "eval_runtime": 19.5365, - "eval_samples_per_second": 4072.794, - "eval_steps_per_second": 15.919, - "step": 104438 + "epoch": 42.0, + "eval_accuracy": 0.4214678276779114, + "eval_loss": 1.206972599029541, + "eval_runtime": 23.1456, + "eval_samples_per_second": 3437.717, + "eval_steps_per_second": 13.437, + "step": 104443 }, { - "epoch": 21.01, - "learning_rate": 0.0017975222727944844, - "loss": 1.2558, + "epoch": 42.02, + "learning_rate": 0.0001890108329930798, + "loss": 1.2246, "step": 104500 }, { - "epoch": 21.03, - "learning_rate": 0.0017959442230694584, - "loss": 1.2493, + "epoch": 42.06, + "learning_rate": 0.00018715901664406552, + "loss": 1.2236, "step": 104600 }, { - "epoch": 21.05, - "learning_rate": 0.0017943650974095123, - "loss": 1.2564, + "epoch": 42.1, + "learning_rate": 0.00018533397496334577, + "loss": 1.2253, "step": 104700 }, { - "epoch": 21.07, - "learning_rate": 0.001792784898926753, - "loss": 1.2552, + "epoch": 42.14, + "learning_rate": 0.00018349885283927892, + "loss": 1.2263, "step": 104800 }, { - "epoch": 21.09, - "learning_rate": 0.0017912036307354006, - "loss": 1.2495, + "epoch": 42.18, + "learning_rate": 0.00018167214144171039, + "loss": 1.226, "step": 104900 }, { - "epoch": 21.11, - "learning_rate": 0.0017896212959517843, - "loss": 1.2554, + "epoch": 42.22, + "learning_rate": 0.0001798538551765977, + "loss": 1.2303, "step": 105000 }, { - "epoch": 21.13, - "learning_rate": 0.0017880378976943344, - "loss": 1.2587, + "epoch": 42.26, + "learning_rate": 0.00017804400838345501, + "loss": 1.2247, "step": 105100 }, { - "epoch": 21.15, - "learning_rate": 0.0017864534390835776, - "loss": 1.2574, + "epoch": 42.3, + "learning_rate": 0.0001762426153352409, + "loss": 1.225, "step": 105200 }, { - "epoch": 21.17, - "learning_rate": 0.00178486792324213, - "loss": 1.2543, + "epoch": 42.34, + "learning_rate": 0.00017444969023824462, + "loss": 1.2298, "step": 105300 }, { - "epoch": 21.19, - "learning_rate": 0.0017832813532946924, - "loss": 1.2566, + "epoch": 42.38, + "learning_rate": 0.0001726652472319755, + "loss": 1.2271, "step": 105400 }, { - "epoch": 21.21, - "learning_rate": 0.001781693732368041, - "loss": 1.2563, + "epoch": 42.42, + "learning_rate": 0.00017088930038905006, + "loss": 1.2284, "step": 105500 }, { - "epoch": 21.23, - "learning_rate": 0.001780105063591025, - "loss": 1.259, + "epoch": 42.47, + "learning_rate": 0.00016912186371508266, + "loss": 1.2296, "step": 105600 }, { - "epoch": 21.25, - "learning_rate": 0.0017785153500945576, - "loss": 1.2568, + "epoch": 42.51, + "learning_rate": 0.00016736295114857318, + "loss": 1.2285, "step": 105700 }, { - "epoch": 21.27, - "learning_rate": 0.001776924595011612, - "loss": 1.2594, + "epoch": 42.55, + "learning_rate": 0.00016561257656079852, + "loss": 1.2285, "step": 105800 }, { - "epoch": 21.29, - "learning_rate": 0.0017753328014772126, - "loss": 1.2547, + "epoch": 42.59, + "learning_rate": 0.0001638707537557023, + "loss": 1.2271, "step": 105900 }, { - "epoch": 21.31, - "learning_rate": 0.0017737399726284325, - "loss": 1.2585, + "epoch": 42.63, + "learning_rate": 0.00016213749646978664, + "loss": 1.2288, "step": 106000 }, { - "epoch": 21.33, - "learning_rate": 0.0017721461116043825, - "loss": 1.2554, + "epoch": 42.67, + "learning_rate": 0.0001604128183720037, + "loss": 1.229, "step": 106100 }, { - "epoch": 21.35, - "learning_rate": 0.00177055122154621, - "loss": 1.2586, + "epoch": 42.71, + "learning_rate": 0.00015869673306364714, + "loss": 1.2282, "step": 106200 }, { - "epoch": 21.37, - "learning_rate": 0.001768955305597089, - "loss": 1.2551, + "epoch": 42.75, + "learning_rate": 0.00015698925407824665, + "loss": 1.2256, "step": 106300 }, { - "epoch": 21.39, - "learning_rate": 0.0017673583669022158, - "loss": 1.256, + "epoch": 42.79, + "learning_rate": 0.0001552903948814588, + "loss": 1.2279, "step": 106400 }, { - "epoch": 21.41, - "learning_rate": 0.0017657604086088023, - "loss": 1.2537, + "epoch": 42.83, + "learning_rate": 0.0001536001688709629, + "loss": 1.2291, "step": 106500 }, { - "epoch": 21.43, - "learning_rate": 0.0017641614338660694, - "loss": 1.2552, + "epoch": 42.87, + "learning_rate": 0.00015191858937635403, + "loss": 1.2299, "step": 106600 }, { - "epoch": 21.45, - "learning_rate": 0.0017625614458252417, - "loss": 1.2555, + "epoch": 42.91, + "learning_rate": 0.00015024566965903888, + "loss": 1.229, "step": 106700 }, { - "epoch": 21.47, - "learning_rate": 0.0017609604476395407, - "loss": 1.2555, + "epoch": 42.95, + "learning_rate": 0.00014858142291213032, + "loss": 1.2265, "step": 106800 }, { - "epoch": 21.49, - "learning_rate": 0.0017593584424641785, - "loss": 1.253, + "epoch": 42.99, + "learning_rate": 0.00014692586226034408, + "loss": 1.2274, "step": 106900 }, { - "epoch": 21.52, - "learning_rate": 0.0017577714685050292, - "loss": 1.2582, + "epoch": 43.0, + "eval_accuracy": 0.42207267837873813, + "eval_loss": 1.200390100479126, + "eval_runtime": 18.2476, + "eval_samples_per_second": 4360.456, + "eval_steps_per_second": 17.043, + "step": 106930 + }, + { + "epoch": 43.03, + "learning_rate": 0.00014527900075989462, + "loss": 1.2189, "step": 107000 }, { - "epoch": 21.54, - "learning_rate": 0.0017561674688150015, - "loss": 1.2545, + "epoch": 43.07, + "learning_rate": 0.00014364085139839235, + "loss": 1.215, "step": 107100 }, { - "epoch": 21.56, - "learning_rate": 0.0017545624715812104, - "loss": 1.2584, + "epoch": 43.11, + "learning_rate": 0.0001420114270947423, + "loss": 1.2176, "step": 107200 }, { - "epoch": 21.58, - "learning_rate": 0.0017529564799667488, - "loss": 1.2594, + "epoch": 43.15, + "learning_rate": 0.00014039074069903984, + "loss": 1.2178, "step": 107300 }, { - "epoch": 21.6, - "learning_rate": 0.001751365571861091, - "loss": 1.2576, + "epoch": 43.19, + "learning_rate": 0.00013877880499247182, + "loss": 1.2195, "step": 107400 }, { - "epoch": 21.62, - "learning_rate": 0.0017497576108472055, - "loss": 1.256, + "epoch": 43.23, + "learning_rate": 0.00013717563268721394, + "loss": 1.2172, "step": 107500 }, { - "epoch": 21.64, - "learning_rate": 0.0017481486649219638, - "loss": 1.2531, + "epoch": 43.27, + "learning_rate": 0.00013558123642633203, + "loss": 1.2201, "step": 107600 }, { - "epoch": 21.66, - "learning_rate": 0.001746538737256242, - "loss": 1.255, + "epoch": 43.31, + "learning_rate": 0.00013399562878368092, + "loss": 1.2208, "step": 107700 }, { - "epoch": 21.68, - "learning_rate": 0.0017449278310228496, - "loss": 1.258, + "epoch": 43.35, + "learning_rate": 0.0001324188222638062, + "loss": 1.2206, "step": 107800 }, { - "epoch": 21.7, - "learning_rate": 0.0017433159493965259, - "loss": 1.2543, + "epoch": 43.39, + "learning_rate": 0.00013085082930184538, + "loss": 1.2216, "step": 107900 }, { - "epoch": 21.72, - "learning_rate": 0.0017417030955539316, - "loss": 1.2588, + "epoch": 43.43, + "learning_rate": 0.00012929166226342993, + "loss": 1.2193, "step": 108000 }, { - "epoch": 21.74, - "learning_rate": 0.0017400892726736443, - "loss": 1.2568, + "epoch": 43.47, + "learning_rate": 0.00012774133344458705, + "loss": 1.2204, "step": 108100 }, { - "epoch": 21.76, - "learning_rate": 0.0017384744839361499, - "loss": 1.254, + "epoch": 43.51, + "learning_rate": 0.0001261998550716438, + "loss": 1.2178, "step": 108200 }, { - "epoch": 21.78, - "learning_rate": 0.0017368587325238393, - "loss": 1.2562, + "epoch": 43.55, + "learning_rate": 0.00012466723930113028, + "loss": 1.22, "step": 108300 }, { - "epoch": 21.8, - "learning_rate": 0.0017352420216209996, - "loss": 1.2515, + "epoch": 43.59, + "learning_rate": 0.0001231434982196833, + "loss": 1.2202, "step": 108400 }, { - "epoch": 21.82, - "learning_rate": 0.0017336243544138097, - "loss": 1.2532, + "epoch": 43.63, + "learning_rate": 0.00012162864384395109, + "loss": 1.2178, "step": 108500 }, { - "epoch": 21.84, - "learning_rate": 0.0017320057340903326, - "loss": 1.2579, + "epoch": 43.67, + "learning_rate": 0.00012012268812049948, + "loss": 1.2205, "step": 108600 }, { - "epoch": 21.86, - "learning_rate": 0.0017303861638405097, - "loss": 1.2541, + "epoch": 43.71, + "learning_rate": 0.00011862564292571654, + "loss": 1.2218, "step": 108700 }, { - "epoch": 21.88, - "learning_rate": 0.001728765646856154, - "loss": 1.2553, + "epoch": 43.75, + "learning_rate": 0.00011715235709016989, + "loss": 1.2213, "step": 108800 }, { - "epoch": 21.9, - "learning_rate": 0.0017271441863309462, - "loss": 1.2541, + "epoch": 43.79, + "learning_rate": 0.00011567307890214637, + "loss": 1.2187, "step": 108900 }, { - "epoch": 21.92, - "learning_rate": 0.0017255217854604242, - "loss": 1.2558, + "epoch": 43.83, + "learning_rate": 0.00011420274633365508, + "loss": 1.2207, "step": 109000 }, { - "epoch": 21.94, - "learning_rate": 0.0017238984474419804, - "loss": 1.2548, + "epoch": 43.87, + "learning_rate": 0.00011274137098015003, + "loss": 1.2219, "step": 109100 }, { - "epoch": 21.96, - "learning_rate": 0.0017222741754748536, - "loss": 1.2568, + "epoch": 43.91, + "learning_rate": 0.00011128896436644637, + "loss": 1.2187, "step": 109200 }, { - "epoch": 21.98, - "learning_rate": 0.0017206489727601237, - "loss": 1.2524, + "epoch": 43.95, + "learning_rate": 0.00010984553794662913, + "loss": 1.2246, "step": 109300 }, { - "epoch": 22.0, - "learning_rate": 0.0017190228425007044, - "loss": 1.2563, + "epoch": 43.99, + "learning_rate": 0.00010841110310396294, + "loss": 1.2195, "step": 109400 }, { - "epoch": 22.0, - "eval_accuracy": 0.40835639033056986, - "eval_loss": 1.2162704467773438, - "eval_runtime": 19.8119, - "eval_samples_per_second": 4016.173, - "eval_steps_per_second": 15.698, - "step": 109411 + "epoch": 44.0, + "eval_accuracy": 0.4225120226688909, + "eval_loss": 1.1945446729660034, + "eval_runtime": 18.2468, + "eval_samples_per_second": 4360.644, + "eval_steps_per_second": 17.044, + "step": 109417 }, { - "epoch": 22.02, - "learning_rate": 0.001717395787901338, - "loss": 1.2459, + "epoch": 44.03, + "learning_rate": 0.00010698567115080215, + "loss": 1.2138, "step": 109500 }, { - "epoch": 22.04, - "learning_rate": 0.0017157678121685874, - "loss": 1.2485, + "epoch": 44.07, + "learning_rate": 0.00010556925332850243, + "loss": 1.2119, "step": 109600 }, { - "epoch": 22.06, - "learning_rate": 0.0017141389185108325, - "loss": 1.25, + "epoch": 44.11, + "learning_rate": 0.00010416186080733053, + "loss": 1.211, "step": 109700 }, { - "epoch": 22.08, - "learning_rate": 0.0017125091101382601, - "loss": 1.2473, + "epoch": 44.15, + "learning_rate": 0.0001027635046863784, + "loss": 1.2115, "step": 109800 }, { - "epoch": 22.1, - "learning_rate": 0.0017108783902628616, - "loss": 1.2489, + "epoch": 44.19, + "learning_rate": 0.00010137419599347344, + "loss": 1.2124, "step": 109900 }, { - "epoch": 22.12, - "learning_rate": 0.0017092467620984239, - "loss": 1.2498, + "epoch": 44.23, + "learning_rate": 9.999394568509276e-05, + "loss": 1.2094, "step": 110000 }, { - "epoch": 22.14, - "learning_rate": 0.0017076142288605242, - "loss": 1.2485, + "epoch": 44.27, + "learning_rate": 9.862276464627659e-05, + "loss": 1.2145, "step": 110100 }, { - "epoch": 22.16, - "learning_rate": 0.0017059807937665227, - "loss": 1.2542, + "epoch": 44.31, + "learning_rate": 9.72606636905421e-05, + "loss": 1.2153, "step": 110200 }, { - "epoch": 22.18, - "learning_rate": 0.0017043464600355585, - "loss": 1.2515, + "epoch": 44.36, + "learning_rate": 9.590765355979872e-05, + "loss": 1.2129, "step": 110300 }, { - "epoch": 22.2, - "learning_rate": 0.0017027112308885396, - "loss": 1.2534, + "epoch": 44.4, + "learning_rate": 9.456374492426326e-05, + "loss": 1.2112, "step": 110400 }, { - "epoch": 22.22, - "learning_rate": 0.0017010751095481403, - "loss": 1.2504, + "epoch": 44.44, + "learning_rate": 9.322894838237492e-05, + "loss": 1.2115, "step": 110500 }, { - "epoch": 22.24, - "learning_rate": 0.001699438099238793, - "loss": 1.25, + "epoch": 44.48, + "learning_rate": 9.190327446071303e-05, + "loss": 1.2111, "step": 110600 }, { - "epoch": 22.26, - "learning_rate": 0.0016978002031866812, - "loss": 1.2516, + "epoch": 44.52, + "learning_rate": 9.059985377950942e-05, + "loss": 1.2122, "step": 110700 }, { - "epoch": 22.28, - "learning_rate": 0.0016961614246197348, - "loss": 1.2541, + "epoch": 44.56, + "learning_rate": 8.929236490444886e-05, + "loss": 1.2102, "step": 110800 }, { - "epoch": 22.3, - "learning_rate": 0.001694521766767623, - "loss": 1.2501, + "epoch": 44.6, + "learning_rate": 8.799402969461356e-05, + "loss": 1.2157, "step": 110900 }, { - "epoch": 22.32, - "learning_rate": 0.001692881232861747, - "loss": 1.2477, + "epoch": 44.64, + "learning_rate": 8.670485838903813e-05, + "loss": 1.2117, "step": 111000 }, { - "epoch": 22.34, - "learning_rate": 0.001691239826135236, - "loss": 1.2528, + "epoch": 44.68, + "learning_rate": 8.542486115448847e-05, + "loss": 1.2159, "step": 111100 }, { - "epoch": 22.36, - "learning_rate": 0.0016895975498229378, - "loss": 1.2535, + "epoch": 44.72, + "learning_rate": 8.415404808538113e-05, + "loss": 1.2135, "step": 111200 }, { - "epoch": 22.38, - "learning_rate": 0.0016879544071614144, - "loss": 1.2523, + "epoch": 44.76, + "learning_rate": 8.289242920370337e-05, + "loss": 1.2141, "step": 111300 }, { - "epoch": 22.4, - "learning_rate": 0.0016863104013889359, - "loss": 1.2528, + "epoch": 44.8, + "learning_rate": 8.164001445893466e-05, + "loss": 1.2145, "step": 111400 }, { - "epoch": 22.42, - "learning_rate": 0.001684681988647626, - "loss": 1.248, + "epoch": 44.84, + "learning_rate": 8.039681372796842e-05, + "loss": 1.2121, "step": 111500 }, { - "epoch": 22.44, - "learning_rate": 0.0016830362749250844, - "loss": 1.2526, + "epoch": 44.88, + "learning_rate": 7.916283681503323e-05, + "loss": 1.2131, "step": 111600 }, { - "epoch": 22.46, - "learning_rate": 0.0016813897077841348, - "loss": 1.2497, + "epoch": 44.92, + "learning_rate": 7.793809345161661e-05, + "loss": 1.2091, "step": 111700 }, { - "epoch": 22.48, - "learning_rate": 0.0016797422904697957, - "loss": 1.2518, + "epoch": 44.96, + "learning_rate": 7.67225932963872e-05, + "loss": 1.2121, "step": 111800 }, { - "epoch": 22.5, - "learning_rate": 0.001678094026228761, - "loss": 1.2476, + "epoch": 45.0, + "learning_rate": 7.551634593511963e-05, + "loss": 1.2127, "step": 111900 }, { - "epoch": 22.52, - "learning_rate": 0.0016764614135541252, - "loss": 1.2516, + "epoch": 45.0, + "eval_accuracy": 0.4228867575046094, + "eval_loss": 1.1900668144226074, + "eval_runtime": 18.1455, + "eval_samples_per_second": 4385.002, + "eval_steps_per_second": 17.139, + "step": 111903 + }, + { + "epoch": 45.04, + "learning_rate": 7.43193608806178e-05, + "loss": 1.2036, "step": 112000 }, { - "epoch": 22.54, - "learning_rate": 0.001674811473594641, - "loss": 1.2452, + "epoch": 45.08, + "learning_rate": 7.313164757264124e-05, + "loss": 1.2056, "step": 112100 }, { - "epoch": 22.56, - "learning_rate": 0.0016731606964260073, - "loss": 1.2497, + "epoch": 45.12, + "learning_rate": 7.19532153778292e-05, + "loss": 1.2025, "step": 112200 }, { - "epoch": 22.58, - "learning_rate": 0.0016715090853015398, - "loss": 1.2523, + "epoch": 45.16, + "learning_rate": 7.078407358962785e-05, + "loss": 1.2057, "step": 112300 }, { - "epoch": 22.6, - "learning_rate": 0.0016698566434761963, - "loss": 1.2479, + "epoch": 45.2, + "learning_rate": 6.962423142821653e-05, + "loss": 1.2062, "step": 112400 }, { - "epoch": 22.62, - "learning_rate": 0.0016682033742065746, - "loss": 1.2541, + "epoch": 45.24, + "learning_rate": 6.847369804043491e-05, + "loss": 1.2055, "step": 112500 }, { - "epoch": 22.64, - "learning_rate": 0.0016665492807509006, - "loss": 1.2487, + "epoch": 45.28, + "learning_rate": 6.733248249971153e-05, + "loss": 1.203, "step": 112600 }, { - "epoch": 22.66, - "learning_rate": 0.0016648943663690257, - "loss": 1.2482, + "epoch": 45.32, + "learning_rate": 6.620059380599119e-05, + "loss": 1.2062, "step": 112700 }, { - "epoch": 22.68, - "learning_rate": 0.0016632386343224186, - "loss": 1.2489, + "epoch": 45.36, + "learning_rate": 6.507804088566522e-05, + "loss": 1.2078, "step": 112800 }, { - "epoch": 22.7, - "learning_rate": 0.0016615820878741606, - "loss": 1.2516, + "epoch": 45.4, + "learning_rate": 6.396483259149974e-05, + "loss": 1.2036, "step": 112900 }, { - "epoch": 22.72, - "learning_rate": 0.0016599247302889358, - "loss": 1.2487, + "epoch": 45.44, + "learning_rate": 6.286097770256697e-05, + "loss": 1.2054, "step": 113000 }, { - "epoch": 22.74, - "learning_rate": 0.0016582665648330298, - "loss": 1.2538, + "epoch": 45.48, + "learning_rate": 6.176648492417572e-05, + "loss": 1.2043, "step": 113100 }, { - "epoch": 22.76, - "learning_rate": 0.0016566075947743175, - "loss": 1.2529, + "epoch": 45.52, + "learning_rate": 6.0681362887802236e-05, + "loss": 1.2082, "step": 113200 }, { - "epoch": 22.78, - "learning_rate": 0.0016549478233822618, - "loss": 1.2498, + "epoch": 45.56, + "learning_rate": 5.9605620151022416e-05, + "loss": 1.2087, "step": 113300 }, { - "epoch": 22.8, - "learning_rate": 0.0016532872539279028, - "loss": 1.2516, + "epoch": 45.6, + "learning_rate": 5.853926519744451e-05, + "loss": 1.2083, "step": 113400 }, { - "epoch": 22.82, - "learning_rate": 0.0016516258896838553, - "loss": 1.2495, + "epoch": 45.64, + "learning_rate": 5.748230643664226e-05, + "loss": 1.2055, "step": 113500 }, { - "epoch": 22.84, - "learning_rate": 0.0016499637339242989, - "loss": 1.2488, + "epoch": 45.68, + "learning_rate": 5.6434752204088115e-05, + "loss": 1.2066, "step": 113600 }, { - "epoch": 22.86, - "learning_rate": 0.0016483007899249745, - "loss": 1.2488, + "epoch": 45.72, + "learning_rate": 5.539661076108787e-05, + "loss": 1.2076, "step": 113700 }, { - "epoch": 22.88, - "learning_rate": 0.0016466370609631749, - "loss": 1.2482, + "epoch": 45.76, + "learning_rate": 5.436789029471562e-05, + "loss": 1.2076, "step": 113800 }, { - "epoch": 22.9, - "learning_rate": 0.0016449725503177412, - "loss": 1.2518, + "epoch": 45.8, + "learning_rate": 5.3348598917748456e-05, + "loss": 1.2115, "step": 113900 }, { - "epoch": 22.92, - "learning_rate": 0.0016433072612690542, - "loss": 1.2484, + "epoch": 45.84, + "learning_rate": 5.2348796471101136e-05, + "loss": 1.2063, "step": 114000 }, { - "epoch": 22.94, - "learning_rate": 0.0016416411970990297, - "loss": 1.2538, + "epoch": 45.88, + "learning_rate": 5.134829282367678e-05, + "loss": 1.2078, "step": 114100 }, { - "epoch": 22.96, - "learning_rate": 0.0016399743610911097, - "loss": 1.2527, + "epoch": 45.92, + "learning_rate": 5.035724207904865e-05, + "loss": 1.2081, "step": 114200 }, { - "epoch": 22.98, - "learning_rate": 0.0016383067565302588, - "loss": 1.2523, + "epoch": 45.96, + "learning_rate": 4.937565205291983e-05, + "loss": 1.2077, "step": 114300 }, { - "epoch": 23.0, - "eval_accuracy": 0.4088067653938865, - "eval_loss": 1.212856650352478, - "eval_runtime": 19.7647, - "eval_samples_per_second": 4025.761, - "eval_steps_per_second": 15.735, - "step": 114384 + "epoch": 46.0, + "eval_accuracy": 0.42319670587368974, + "eval_loss": 1.1863625049591064, + "eval_runtime": 18.1292, + "eval_samples_per_second": 4388.94, + "eval_steps_per_second": 17.155, + "step": 114390 }, { - "epoch": 23.0, - "learning_rate": 0.001636638386702955, - "loss": 1.2473, + "epoch": 46.0, + "learning_rate": 4.840353048638432e-05, + "loss": 1.2057, "step": 114400 }, { - "epoch": 23.02, - "learning_rate": 0.0016349692548971854, - "loss": 1.2428, + "epoch": 46.04, + "learning_rate": 4.7440885045864775e-05, + "loss": 1.1987, "step": 114500 }, { - "epoch": 23.04, - "learning_rate": 0.001633299364402438, - "loss": 1.2422, + "epoch": 46.08, + "learning_rate": 4.6487723323052344e-05, + "loss": 1.2002, "step": 114600 }, { - "epoch": 23.06, - "learning_rate": 0.0016316287185096973, - "loss": 1.2432, + "epoch": 46.12, + "learning_rate": 4.554405283484733e-05, + "loss": 1.1993, "step": 114700 }, { - "epoch": 23.08, - "learning_rate": 0.0016299573205114343, - "loss": 1.2439, + "epoch": 46.16, + "learning_rate": 4.4609881023299216e-05, + "loss": 1.1977, "step": 114800 }, { - "epoch": 23.1, - "learning_rate": 0.001628301898865501, - "loss": 1.2479, + "epoch": 46.2, + "learning_rate": 4.3685215255548086e-05, + "loss": 1.2031, "step": 114900 }, { - "epoch": 23.12, - "learning_rate": 0.0016266290139783787, - "loss": 1.2494, + "epoch": 46.25, + "learning_rate": 4.277006282376719e-05, + "loss": 1.1998, "step": 115000 }, { - "epoch": 23.14, - "learning_rate": 0.0016249553868390434, - "loss": 1.2441, + "epoch": 46.29, + "learning_rate": 4.186443094510467e-05, + "loss": 1.2023, "step": 115100 }, { - "epoch": 23.16, - "learning_rate": 0.0016232810207458424, - "loss": 1.245, + "epoch": 46.33, + "learning_rate": 4.096832676162704e-05, + "loss": 1.2014, "step": 115200 }, { - "epoch": 23.18, - "learning_rate": 0.0016216059189985796, - "loss": 1.2475, + "epoch": 46.37, + "learning_rate": 4.008175734026259e-05, + "loss": 1.2005, "step": 115300 }, { - "epoch": 23.2, - "learning_rate": 0.0016199300848985091, - "loss": 1.2451, + "epoch": 46.41, + "learning_rate": 3.920472967274588e-05, + "loss": 1.2036, "step": 115400 }, { - "epoch": 23.22, - "learning_rate": 0.0016182535217483282, - "loss": 1.247, + "epoch": 46.45, + "learning_rate": 3.833725067556235e-05, + "loss": 1.2017, "step": 115500 }, { - "epoch": 23.24, - "learning_rate": 0.0016165762328521703, - "loss": 1.2434, + "epoch": 46.49, + "learning_rate": 3.747932718989433e-05, + "loss": 1.1997, "step": 115600 }, { - "epoch": 23.26, - "learning_rate": 0.0016148982215156002, - "loss": 1.2469, + "epoch": 46.53, + "learning_rate": 3.663096598156654e-05, + "loss": 1.2002, "step": 115700 }, { - "epoch": 23.28, - "learning_rate": 0.0016132194910456056, - "loss": 1.245, + "epoch": 46.57, + "learning_rate": 3.579217374099275e-05, + "loss": 1.2012, "step": 115800 }, { - "epoch": 23.3, - "learning_rate": 0.0016115400447505918, - "loss": 1.2507, + "epoch": 46.61, + "learning_rate": 3.496295708312336e-05, + "loss": 1.2016, "step": 115900 }, { - "epoch": 23.32, - "learning_rate": 0.0016098598859403746, - "loss": 1.2434, + "epoch": 46.65, + "learning_rate": 3.4143322547392775e-05, + "loss": 1.2027, "step": 116000 }, { - "epoch": 23.34, - "learning_rate": 0.0016081790179261746, - "loss": 1.2418, + "epoch": 46.69, + "learning_rate": 3.3333276597668774e-05, + "loss": 1.1987, "step": 116100 }, { - "epoch": 23.37, - "learning_rate": 0.0016064974440206093, - "loss": 1.2476, + "epoch": 46.73, + "learning_rate": 3.253282562220003e-05, + "loss": 1.2016, "step": 116200 }, { - "epoch": 23.39, - "learning_rate": 0.0016048151675376878, - "loss": 1.2492, + "epoch": 46.77, + "learning_rate": 3.1741975933567006e-05, + "loss": 1.2037, "step": 116300 }, { - "epoch": 23.41, - "learning_rate": 0.001603132191792804, - "loss": 1.2383, + "epoch": 46.81, + "learning_rate": 3.0960733768631707e-05, + "loss": 1.2031, "step": 116400 }, { - "epoch": 23.43, - "learning_rate": 0.0016014485201027297, - "loss": 1.2473, + "epoch": 46.85, + "learning_rate": 3.0189105288488535e-05, + "loss": 1.199, "step": 116500 }, { - "epoch": 23.45, - "learning_rate": 0.0015997641557856073, - "loss": 1.2489, + "epoch": 46.89, + "learning_rate": 2.942709657841547e-05, + "loss": 1.2021, "step": 116600 }, { - "epoch": 23.47, - "learning_rate": 0.0015980791021609464, - "loss": 1.2463, + "epoch": 46.93, + "learning_rate": 2.8674713647826452e-05, + "loss": 1.2008, "step": 116700 }, { - "epoch": 23.49, - "learning_rate": 0.001596393362549613, - "loss": 1.2469, + "epoch": 46.97, + "learning_rate": 2.7931962430223916e-05, + "loss": 1.2008, "step": 116800 }, { - "epoch": 23.51, - "learning_rate": 0.0015947069402738262, - "loss": 1.2443, + "epoch": 47.0, + "eval_accuracy": 0.4234523114961638, + "eval_loss": 1.1835007667541504, + "eval_runtime": 18.0486, + "eval_samples_per_second": 4408.549, + "eval_steps_per_second": 17.231, + "step": 116877 + }, + { + "epoch": 47.01, + "learning_rate": 2.7206132194601552e-05, + "loss": 1.2004, "step": 116900 }, { - "epoch": 23.53, - "learning_rate": 0.0015930367130251378, - "loss": 1.2448, + "epoch": 47.05, + "learning_rate": 2.6482565437709605e-05, + "loss": 1.1965, "step": 117000 }, { - "epoch": 23.55, - "learning_rate": 0.0015913489421361716, - "loss": 1.2469, + "epoch": 47.09, + "learning_rate": 2.5768647681698314e-05, + "loss": 1.1938, "step": 117100 }, { - "epoch": 23.57, - "learning_rate": 0.0015896604985241852, - "loss": 1.2453, + "epoch": 47.13, + "learning_rate": 2.5064384556722914e-05, + "loss": 1.1968, "step": 117200 }, { - "epoch": 23.59, - "learning_rate": 0.0015879713855167263, - "loss": 1.2467, + "epoch": 47.17, + "learning_rate": 2.4369781616799536e-05, + "loss": 1.1974, "step": 117300 }, { - "epoch": 23.61, - "learning_rate": 0.0015862816064426619, - "loss": 1.2437, + "epoch": 47.21, + "learning_rate": 2.368484433976123e-05, + "loss": 1.1944, "step": 117400 }, { - "epoch": 23.63, - "learning_rate": 0.0015845911646321712, - "loss": 1.2453, + "epoch": 47.25, + "learning_rate": 2.3009578127215076e-05, + "loss": 1.1977, "step": 117500 }, { - "epoch": 23.65, - "learning_rate": 0.00158290006341674, - "loss": 1.2459, + "epoch": 47.29, + "learning_rate": 2.2343988304499436e-05, + "loss": 1.1989, "step": 117600 }, { - "epoch": 23.67, - "learning_rate": 0.0015812083061291539, - "loss": 1.2456, + "epoch": 47.33, + "learning_rate": 2.168808012064205e-05, + "loss": 1.1966, "step": 117700 }, { - "epoch": 23.69, - "learning_rate": 0.0015795158961034905, - "loss": 1.2425, + "epoch": 47.37, + "learning_rate": 2.1041858748318126e-05, + "loss": 1.1972, "step": 117800 }, { - "epoch": 23.71, - "learning_rate": 0.0015778228366751152, - "loss": 1.2451, + "epoch": 47.41, + "learning_rate": 2.0405329283810226e-05, + "loss": 1.1981, "step": 117900 }, { - "epoch": 23.73, - "learning_rate": 0.001576129131180672, - "loss": 1.246, + "epoch": 47.45, + "learning_rate": 1.977849674696816e-05, + "loss": 1.1981, "step": 118000 }, { - "epoch": 23.75, - "learning_rate": 0.001574434782958078, - "loss": 1.2433, + "epoch": 47.49, + "learning_rate": 1.9161366081168612e-05, + "loss": 1.1979, "step": 118100 }, { - "epoch": 23.77, - "learning_rate": 0.001572739795346519, - "loss": 1.2463, + "epoch": 47.53, + "learning_rate": 1.8553942153276548e-05, + "loss": 1.1966, "step": 118200 }, { - "epoch": 23.79, - "learning_rate": 0.001571044171686438, - "loss": 1.2468, + "epoch": 47.57, + "learning_rate": 1.7956229753607333e-05, + "loss": 1.2016, "step": 118300 }, { - "epoch": 23.81, - "learning_rate": 0.001569347915319534, - "loss": 1.2445, + "epoch": 47.61, + "learning_rate": 1.736823359588788e-05, + "loss": 1.1971, "step": 118400 }, { - "epoch": 23.83, - "learning_rate": 0.0015676510295887514, - "loss": 1.246, + "epoch": 47.65, + "learning_rate": 1.678995831722055e-05, + "loss": 1.1989, "step": 118500 }, { - "epoch": 23.85, - "learning_rate": 0.0015659535178382759, - "loss": 1.2459, + "epoch": 47.69, + "learning_rate": 1.6221408478046123e-05, + "loss": 1.1986, "step": 118600 }, { - "epoch": 23.87, - "learning_rate": 0.0015642553834135256, - "loss": 1.2422, + "epoch": 47.73, + "learning_rate": 1.5662588562107698e-05, + "loss": 1.1963, "step": 118700 }, { - "epoch": 23.89, - "learning_rate": 0.0015625566296611475, - "loss": 1.2426, + "epoch": 47.77, + "learning_rate": 1.511350297641545e-05, + "loss": 1.1946, "step": 118800 }, { - "epoch": 23.91, - "learning_rate": 0.0015608572599290084, - "loss": 1.2443, + "epoch": 47.81, + "learning_rate": 1.4574156051211929e-05, + "loss": 1.1993, "step": 118900 }, { - "epoch": 23.93, - "learning_rate": 0.001559157277566188, - "loss": 1.245, + "epoch": 47.85, + "learning_rate": 1.4044552039938069e-05, + "loss": 1.1962, "step": 119000 }, { - "epoch": 23.95, - "learning_rate": 0.001557456685922975, - "loss": 1.2447, + "epoch": 47.89, + "learning_rate": 1.3524695119199182e-05, + "loss": 1.1942, "step": 119100 }, { - "epoch": 23.97, - "learning_rate": 0.0015557554883508585, - "loss": 1.245, + "epoch": 47.93, + "learning_rate": 1.3014589388732484e-05, + "loss": 1.1956, "step": 119200 }, { - "epoch": 23.99, - "learning_rate": 0.0015540536882025203, - "loss": 1.2394, + "epoch": 47.97, + "learning_rate": 1.2514238871374623e-05, + "loss": 1.1957, "step": 119300 }, { - "epoch": 24.0, - "eval_accuracy": 0.408813924305172, - "eval_loss": 1.208786129951477, - "eval_runtime": 19.9606, - "eval_samples_per_second": 3986.246, - "eval_steps_per_second": 15.581, - "step": 119358 + "epoch": 48.0, + "eval_accuracy": 0.42356701540431, + "eval_loss": 1.1818615198135376, + "eval_runtime": 18.2384, + "eval_samples_per_second": 4362.669, + "eval_steps_per_second": 17.052, + "step": 119364 }, { - "epoch": 24.01, - "learning_rate": 0.001552351288831832, - "loss": 1.239, + "epoch": 48.01, + "learning_rate": 1.2023647513029479e-05, + "loss": 1.1975, "step": 119400 }, { - "epoch": 24.03, - "learning_rate": 0.0015506482935938443, - "loss": 1.2356, + "epoch": 48.05, + "learning_rate": 1.1542819182638053e-05, + "loss": 1.1943, "step": 119500 }, { - "epoch": 24.05, - "learning_rate": 0.0015489447058447835, - "loss": 1.2393, + "epoch": 48.09, + "learning_rate": 1.1071757672147104e-05, + "loss": 1.1947, "step": 119600 }, { - "epoch": 24.07, - "learning_rate": 0.0015472405289420421, - "loss": 1.2373, + "epoch": 48.14, + "learning_rate": 1.0610466696479442e-05, + "loss": 1.1944, "step": 119700 }, { - "epoch": 24.09, - "learning_rate": 0.0015455357662441758, - "loss": 1.2384, + "epoch": 48.18, + "learning_rate": 1.01589498935048e-05, + "loss": 1.1939, "step": 119800 }, { - "epoch": 24.11, - "learning_rate": 0.0015438304211108924, - "loss": 1.2399, + "epoch": 48.22, + "learning_rate": 9.717210824011086e-06, + "loss": 1.1924, "step": 119900 }, { - "epoch": 24.13, - "learning_rate": 0.00154212449690305, - "loss": 1.238, + "epoch": 48.26, + "learning_rate": 9.285252971676233e-06, + "loss": 1.1939, "step": 120000 }, { - "epoch": 24.15, - "learning_rate": 0.0015404179969826454, - "loss": 1.2407, + "epoch": 48.3, + "learning_rate": 8.86307974304057e-06, + "loss": 1.1958, "step": 120100 }, { - "epoch": 24.17, - "learning_rate": 0.0015387109247128126, - "loss": 1.2427, + "epoch": 48.34, + "learning_rate": 8.450694467480591e-06, + "loss": 1.1931, "step": 120200 }, { - "epoch": 24.19, - "learning_rate": 0.001537003283457811, - "loss": 1.2432, + "epoch": 48.38, + "learning_rate": 8.048100397181762e-06, + "loss": 1.1943, "step": 120300 }, { - "epoch": 24.21, - "learning_rate": 0.0015352950765830234, - "loss": 1.2396, + "epoch": 48.42, + "learning_rate": 7.655300707113676e-06, + "loss": 1.1952, "step": 120400 }, { - "epoch": 24.23, - "learning_rate": 0.001533586307454946, - "loss": 1.2404, + "epoch": 48.46, + "learning_rate": 7.272298495004659e-06, + "loss": 1.1925, "step": 120500 }, { - "epoch": 24.25, - "learning_rate": 0.0015318769794411841, - "loss": 1.2383, + "epoch": 48.5, + "learning_rate": 6.899096781317199e-06, + "loss": 1.1928, "step": 120600 }, { - "epoch": 24.27, - "learning_rate": 0.0015301670959104435, - "loss": 1.2408, + "epoch": 48.54, + "learning_rate": 6.535698509224641e-06, + "loss": 1.1952, "step": 120700 }, { - "epoch": 24.29, - "learning_rate": 0.0015284566602325254, - "loss": 1.2389, + "epoch": 48.58, + "learning_rate": 6.182106544587313e-06, + "loss": 1.1951, "step": 120800 }, { - "epoch": 24.31, - "learning_rate": 0.0015267456757783189, - "loss": 1.2397, + "epoch": 48.62, + "learning_rate": 5.838323675930318e-06, + "loss": 1.1945, "step": 120900 }, { - "epoch": 24.33, - "learning_rate": 0.0015250341459197947, - "loss": 1.2415, + "epoch": 48.66, + "learning_rate": 5.507643747879182e-06, + "loss": 1.194, "step": 121000 }, { - "epoch": 24.35, - "learning_rate": 0.0015233563207578788, - "loss": 1.2356, + "epoch": 48.7, + "learning_rate": 5.1833889701156935e-06, + "loss": 1.1933, "step": 121100 }, { - "epoch": 24.37, - "learning_rate": 0.0015216437209509943, - "loss": 1.2364, + "epoch": 48.74, + "learning_rate": 4.868951164498925e-06, + "loss": 1.1949, "step": 121200 }, { - "epoch": 24.39, - "learning_rate": 0.001519930585794613, - "loss": 1.2443, + "epoch": 48.78, + "learning_rate": 4.564332810773342e-06, + "loss": 1.1965, "step": 121300 }, { - "epoch": 24.41, - "learning_rate": 0.0015182169186649438, - "loss": 1.2386, + "epoch": 48.82, + "learning_rate": 4.269536311244659e-06, + "loss": 1.1954, "step": 121400 }, { - "epoch": 24.43, - "learning_rate": 0.001516502722939245, - "loss": 1.2404, + "epoch": 48.86, + "learning_rate": 3.9845639907604145e-06, + "loss": 1.1923, "step": 121500 }, { - "epoch": 24.45, - "learning_rate": 0.0015147880019958154, - "loss": 1.2416, + "epoch": 48.9, + "learning_rate": 3.709418096692202e-06, + "loss": 1.1934, "step": 121600 }, { - "epoch": 24.47, - "learning_rate": 0.0015130727592139904, - "loss": 1.2418, + "epoch": 48.94, + "learning_rate": 3.4441007989174933e-06, + "loss": 1.1964, "step": 121700 }, { - "epoch": 24.49, - "learning_rate": 0.0015113741581417957, - "loss": 1.2432, + "epoch": 48.98, + "learning_rate": 3.1886141898027077e-06, + "loss": 1.1931, "step": 121800 }, { - "epoch": 24.51, - "learning_rate": 0.001509657886959314, - "loss": 1.2408, + "epoch": 49.0, + "eval_accuracy": 0.4236181719312456, + "eval_loss": 1.1811745166778564, + "eval_runtime": 18.2028, + "eval_samples_per_second": 4371.185, + "eval_steps_per_second": 17.085, + "step": 121850 + }, + { + "epoch": 49.02, + "learning_rate": 2.942960284186835e-06, + "loss": 1.1948, "step": 121900 }, { - "epoch": 24.53, - "learning_rate": 0.0015079411040487545, - "loss": 1.242, + "epoch": 49.06, + "learning_rate": 2.7071410193652e-06, + "loss": 1.1911, "step": 122000 }, { - "epoch": 24.55, - "learning_rate": 0.0015062238127935158, - "loss": 1.2398, + "epoch": 49.1, + "learning_rate": 2.4811582550746113e-06, + "loss": 1.1927, "step": 122100 }, { - "epoch": 24.57, - "learning_rate": 0.0015045060165779975, - "loss": 1.2379, + "epoch": 49.14, + "learning_rate": 2.2671265131336828e-06, + "loss": 1.193, "step": 122200 }, { - "epoch": 24.59, - "learning_rate": 0.001502787718787595, - "loss": 1.2364, + "epoch": 49.18, + "learning_rate": 2.06072361075213e-06, + "loss": 1.1949, "step": 122300 }, { - "epoch": 24.61, - "learning_rate": 0.0015010689228086916, - "loss": 1.2375, + "epoch": 49.22, + "learning_rate": 1.8641623067300329e-06, + "loss": 1.1902, "step": 122400 }, { - "epoch": 24.63, - "learning_rate": 0.0014993496320286532, - "loss": 1.2353, + "epoch": 49.26, + "learning_rate": 1.6774441512050487e-06, + "loss": 1.1927, "step": 122500 }, { - "epoch": 24.65, - "learning_rate": 0.00149762984983582, - "loss": 1.2391, + "epoch": 49.3, + "learning_rate": 1.5005706166887346e-06, + "loss": 1.1923, "step": 122600 }, { - "epoch": 24.67, - "learning_rate": 0.0014959095796195018, - "loss": 1.2424, + "epoch": 49.34, + "learning_rate": 1.3335430980553065e-06, + "loss": 1.1914, "step": 122700 }, { - "epoch": 24.69, - "learning_rate": 0.0014941888247699687, - "loss": 1.2423, + "epoch": 49.38, + "learning_rate": 1.1763629125303987e-06, + "loss": 1.1914, "step": 122800 }, { - "epoch": 24.71, - "learning_rate": 0.0014924675886784473, - "loss": 1.2402, + "epoch": 49.42, + "learning_rate": 1.0290312996810714e-06, + "loss": 1.1934, "step": 122900 }, { - "epoch": 24.73, - "learning_rate": 0.001490745874737111, - "loss": 1.2396, + "epoch": 49.46, + "learning_rate": 8.915494214056796e-07, + "loss": 1.192, "step": 123000 }, { - "epoch": 24.75, - "learning_rate": 0.001489023686339077, - "loss": 1.2386, + "epoch": 49.5, + "learning_rate": 7.639183619247148e-07, + "loss": 1.1915, "step": 123100 }, { - "epoch": 24.77, - "learning_rate": 0.001487301026878396, - "loss": 1.2402, + "epoch": 49.54, + "learning_rate": 6.461391277723383e-07, + "loss": 1.1949, "step": 123200 }, { - "epoch": 24.79, - "learning_rate": 0.001485577899750048, - "loss": 1.2379, + "epoch": 49.58, + "learning_rate": 5.382126477887495e-07, + "loss": 1.1945, "step": 123300 }, { - "epoch": 24.81, - "learning_rate": 0.0014838543083499334, - "loss": 1.2432, + "epoch": 49.62, + "learning_rate": 4.4013977311213593e-07, + "loss": 1.1929, "step": 123400 }, { - "epoch": 24.83, - "learning_rate": 0.001482130256074869, - "loss": 1.24, + "epoch": 49.66, + "learning_rate": 3.519212771728453e-07, + "loss": 1.1936, "step": 123500 }, { - "epoch": 24.85, - "learning_rate": 0.001480405746322579, - "loss": 1.2389, + "epoch": 49.7, + "learning_rate": 2.7355785568658476e-07, + "loss": 1.1941, "step": 123600 }, { - "epoch": 24.87, - "learning_rate": 0.0014786807824916897, - "loss": 1.2393, + "epoch": 49.74, + "learning_rate": 2.050501266491478e-07, + "loss": 1.1922, "step": 123700 }, { - "epoch": 24.89, - "learning_rate": 0.0014769553679817215, - "loss": 1.237, + "epoch": 49.78, + "learning_rate": 1.4639863033169553e-07, + "loss": 1.1914, "step": 123800 }, { - "epoch": 24.91, - "learning_rate": 0.0014752295061930846, - "loss": 1.2358, + "epoch": 49.82, + "learning_rate": 9.760382927631594e-08, + "loss": 1.1921, "step": 123900 }, { - "epoch": 24.93, - "learning_rate": 0.0014735032005270684, - "loss": 1.2392, + "epoch": 49.86, + "learning_rate": 5.8666108292554366e-08, + "loss": 1.1939, "step": 124000 }, { - "epoch": 24.95, - "learning_rate": 0.0014717764543858392, - "loss": 1.2389, + "epoch": 49.9, + "learning_rate": 2.958577445394406e-08, + "loss": 1.193, "step": 124100 }, { - "epoch": 24.97, - "learning_rate": 0.00147004927117243, - "loss": 1.2392, + "epoch": 49.94, + "learning_rate": 1.0363057096479666e-08, + "loss": 1.1924, "step": 124200 }, { - "epoch": 24.99, - "learning_rate": 0.001468321654290736, - "loss": 1.2387, + "epoch": 49.98, + "learning_rate": 9.981078157028645e-10, + "loss": 1.1951, "step": 124300 }, { - "epoch": 25.0, - "eval_accuracy": 0.40966806196499067, - "eval_loss": 1.2022136449813843, - "eval_runtime": 19.7693, - "eval_samples_per_second": 4024.827, - "eval_steps_per_second": 15.731, - "step": 124331 - }, - { - "epoch": 25.01, - "learning_rate": 0.0014665936071455062, - "loss": 1.2317, - "step": 124400 - }, - { - "epoch": 25.03, - "learning_rate": 0.0014648651331423384, - "loss": 1.23, - "step": 124500 - }, - { - "epoch": 25.05, - "learning_rate": 0.0014631362356876715, - "loss": 1.2321, - "step": 124600 - }, - { - "epoch": 25.07, - "learning_rate": 0.0014614069181887784, - "loss": 1.2331, - "step": 124700 - }, - { - "epoch": 25.09, - "learning_rate": 0.0014596771840537605, - "loss": 1.2315, - "step": 124800 - }, - { - "epoch": 25.11, - "learning_rate": 0.0014579470366915396, - "loss": 1.2357, - "step": 124900 - }, - { - "epoch": 25.13, - "learning_rate": 0.0014562164795118523, - "loss": 1.2365, - "step": 125000 - }, - { - "epoch": 25.15, - "learning_rate": 0.0014544855159252432, - "loss": 1.2316, - "step": 125100 - }, - { - "epoch": 25.17, - "learning_rate": 0.001452754149343057, - "loss": 1.2337, - "step": 125200 - }, - { - "epoch": 25.19, - "learning_rate": 0.001451022383177433, - "loss": 1.235, - "step": 125300 - }, - { - "epoch": 25.21, - "learning_rate": 0.0014492902208412984, - "loss": 1.2352, - "step": 125400 - }, - { - "epoch": 25.24, - "learning_rate": 0.0014475576657483604, - "loss": 1.231, - "step": 125500 - }, - { - "epoch": 25.26, - "learning_rate": 0.001445824721313101, - "loss": 1.2361, - "step": 125600 - }, - { - "epoch": 25.28, - "learning_rate": 0.0014441087261535155, - "loss": 1.2323, - "step": 125700 - }, - { - "epoch": 25.3, - "learning_rate": 0.0014423750170883193, - "loss": 1.2378, - "step": 125800 - }, - { - "epoch": 25.32, - "learning_rate": 0.001440640928894652, - "loss": 1.2323, - "step": 125900 - }, - { - "epoch": 25.34, - "learning_rate": 0.0014389064649900165, - "loss": 1.2387, - "step": 126000 - }, - { - "epoch": 25.36, - "learning_rate": 0.001437171628792657, - "loss": 1.2401, - "step": 126100 - }, - { - "epoch": 25.38, - "learning_rate": 0.0014354364237215494, - "loss": 1.2334, - "step": 126200 - }, - { - "epoch": 25.4, - "learning_rate": 0.0014337008531963994, - "loss": 1.2348, - "step": 126300 - }, - { - "epoch": 25.42, - "learning_rate": 0.0014319649206376301, - "loss": 1.236, - "step": 126400 - }, - { - "epoch": 25.44, - "learning_rate": 0.00143022862946638, - "loss": 1.2343, - "step": 126500 - }, - { - "epoch": 25.46, - "learning_rate": 0.0014284919831044935, - "loss": 1.2348, - "step": 126600 - }, - { - "epoch": 25.48, - "learning_rate": 0.0014267549849745155, - "loss": 1.2337, - "step": 126700 - }, - { - "epoch": 25.5, - "learning_rate": 0.0014250176384996832, - "loss": 1.2328, - "step": 126800 - }, - { - "epoch": 25.52, - "learning_rate": 0.0014232799471039221, - "loss": 1.2345, - "step": 126900 - }, - { - "epoch": 25.54, - "learning_rate": 0.0014215419142118353, - "loss": 1.2391, - "step": 127000 - }, - { - "epoch": 25.56, - "learning_rate": 0.0014198035432487004, - "loss": 1.2323, - "step": 127100 - }, - { - "epoch": 25.58, - "learning_rate": 0.0014180648376404608, - "loss": 1.2352, - "step": 127200 - }, - { - "epoch": 25.6, - "learning_rate": 0.0014163258008137198, - "loss": 1.2319, - "step": 127300 - }, - { - "epoch": 25.62, - "learning_rate": 0.0014145864361957325, - "loss": 1.2329, - "step": 127400 - }, - { - "epoch": 25.64, - "learning_rate": 0.001412846747214401, - "loss": 1.2351, - "step": 127500 - }, - { - "epoch": 25.66, - "learning_rate": 0.001411106737298266, - "loss": 1.2341, - "step": 127600 - }, - { - "epoch": 25.68, - "learning_rate": 0.001409366409876501, - "loss": 1.2328, - "step": 127700 - }, - { - "epoch": 25.7, - "learning_rate": 0.001407625768378905, - "loss": 1.235, - "step": 127800 - }, - { - "epoch": 25.72, - "learning_rate": 0.0014058848162358966, - "loss": 1.2304, - "step": 127900 - }, - { - "epoch": 25.74, - "learning_rate": 0.001404160970981524, - "loss": 1.2324, - "step": 128000 - }, - { - "epoch": 25.76, - "learning_rate": 0.0014024194108622253, - "loss": 1.2356, - "step": 128100 - }, - { - "epoch": 25.78, - "learning_rate": 0.0014006775503580902, - "loss": 1.2296, - "step": 128200 - }, - { - "epoch": 25.8, - "learning_rate": 0.0013989353929019378, - "loss": 1.2354, - "step": 128300 - }, - { - "epoch": 25.82, - "learning_rate": 0.0013971929419271745, - "loss": 1.2314, - "step": 128400 - }, - { - "epoch": 25.84, - "learning_rate": 0.0013954502008677843, - "loss": 1.2336, - "step": 128500 - }, - { - "epoch": 25.86, - "learning_rate": 0.0013937071731583237, - "loss": 1.2356, - "step": 128600 - }, - { - "epoch": 25.88, - "learning_rate": 0.001391963862233913, - "loss": 1.2314, - "step": 128700 - }, - { - "epoch": 25.9, - "learning_rate": 0.0013902202715302314, - "loss": 1.2336, - "step": 128800 - }, - { - "epoch": 25.92, - "learning_rate": 0.0013884764044835088, - "loss": 1.234, - "step": 128900 - }, - { - "epoch": 25.94, - "learning_rate": 0.001386732264530521, - "loss": 1.2355, - "step": 129000 - }, - { - "epoch": 25.96, - "learning_rate": 0.0013849878551085804, - "loss": 1.2334, - "step": 129100 - }, - { - "epoch": 25.98, - "learning_rate": 0.0013832431796555308, - "loss": 1.2339, - "step": 129200 - }, - { - "epoch": 26.0, - "learning_rate": 0.0013814982416097406, - "loss": 1.234, - "step": 129300 - }, - { - "epoch": 26.0, - "eval_accuracy": 0.4099652363267976, - "eval_loss": 1.1980103254318237, - "eval_runtime": 19.8186, - "eval_samples_per_second": 4014.806, - "eval_steps_per_second": 15.692, - "step": 129304 - }, - { - "epoch": 26.02, - "learning_rate": 0.0013797530444100952, - "loss": 1.2258, - "step": 129400 - }, - { - "epoch": 26.04, - "learning_rate": 0.0013780075914959912, - "loss": 1.2242, - "step": 129500 - }, - { - "epoch": 26.06, - "learning_rate": 0.001376261886307329, - "loss": 1.2273, - "step": 129600 - }, - { - "epoch": 26.08, - "learning_rate": 0.001374515932284506, - "loss": 1.2251, - "step": 129700 - }, - { - "epoch": 26.1, - "learning_rate": 0.001372787196065969, - "loss": 1.23, - "step": 129800 - }, - { - "epoch": 26.12, - "learning_rate": 0.0013710407571004548, - "loss": 1.2325, - "step": 129900 - }, - { - "epoch": 26.14, - "learning_rate": 0.0013692940795904673, - "loss": 1.2274, - "step": 130000 - }, - { - "epoch": 26.16, - "learning_rate": 0.0013675471669783198, - "loss": 1.2278, - "step": 130100 - }, - { - "epoch": 26.18, - "learning_rate": 0.0013658000227067901, - "loss": 1.2245, - "step": 130200 - }, - { - "epoch": 26.2, - "learning_rate": 0.0013640526502191111, - "loss": 1.2266, - "step": 130300 - }, - { - "epoch": 26.22, - "learning_rate": 0.0013623050529589667, - "loss": 1.2315, - "step": 130400 - }, - { - "epoch": 26.24, - "learning_rate": 0.001360557234370483, - "loss": 1.2302, - "step": 130500 - }, - { - "epoch": 26.26, - "learning_rate": 0.001358809197898223, - "loss": 1.2324, - "step": 130600 - }, - { - "epoch": 26.28, - "learning_rate": 0.0013570609469871781, - "loss": 1.2305, - "step": 130700 - }, - { - "epoch": 26.3, - "learning_rate": 0.001355312485082764, - "loss": 1.2294, - "step": 130800 - }, - { - "epoch": 26.32, - "learning_rate": 0.0013535638156308098, - "loss": 1.2278, - "step": 130900 - }, - { - "epoch": 26.34, - "learning_rate": 0.0013518149420775557, - "loss": 1.224, - "step": 131000 - }, - { - "epoch": 26.36, - "learning_rate": 0.0013500658678696432, - "loss": 1.2296, - "step": 131100 - }, - { - "epoch": 26.38, - "learning_rate": 0.00134831659645411, - "loss": 1.228, - "step": 131200 - }, - { - "epoch": 26.4, - "learning_rate": 0.0013465671312783809, - "loss": 1.2245, - "step": 131300 - }, - { - "epoch": 26.42, - "learning_rate": 0.0013448174757902645, - "loss": 1.2305, - "step": 131400 - }, - { - "epoch": 26.44, - "learning_rate": 0.0013430676334379426, - "loss": 1.2305, - "step": 131500 - }, - { - "epoch": 26.46, - "learning_rate": 0.0013413176076699674, - "loss": 1.2294, - "step": 131600 - }, - { - "epoch": 26.48, - "learning_rate": 0.0013395674019352503, - "loss": 1.2312, - "step": 131700 - }, - { - "epoch": 26.5, - "learning_rate": 0.0013378170196830588, - "loss": 1.2278, - "step": 131800 - }, - { - "epoch": 26.52, - "learning_rate": 0.001336066464363008, - "loss": 1.2289, - "step": 131900 - }, - { - "epoch": 26.54, - "learning_rate": 0.0013343157394250537, - "loss": 1.2242, - "step": 132000 - }, - { - "epoch": 26.56, - "learning_rate": 0.0013325648483194865, - "loss": 1.2275, - "step": 132100 - }, - { - "epoch": 26.58, - "learning_rate": 0.0013308137944969248, - "loss": 1.2262, - "step": 132200 - }, - { - "epoch": 26.6, - "learning_rate": 0.001329062581408306, - "loss": 1.2289, - "step": 132300 - }, - { - "epoch": 26.62, - "learning_rate": 0.0013273112125048833, - "loss": 1.2341, - "step": 132400 - }, - { - "epoch": 26.64, - "learning_rate": 0.0013255596912382156, - "loss": 1.2288, - "step": 132500 - }, - { - "epoch": 26.66, - "learning_rate": 0.0013238080210601635, - "loss": 1.2267, - "step": 132600 - }, - { - "epoch": 26.68, - "learning_rate": 0.0013220562054228793, - "loss": 1.2267, - "step": 132700 - }, - { - "epoch": 26.7, - "learning_rate": 0.0013203042477788038, - "loss": 1.2287, - "step": 132800 - }, - { - "epoch": 26.72, - "learning_rate": 0.001318552151580656, - "loss": 1.226, - "step": 132900 - }, - { - "epoch": 26.74, - "learning_rate": 0.001316799920281429, - "loss": 1.2294, - "step": 133000 - }, - { - "epoch": 26.76, - "learning_rate": 0.0013150475573343817, - "loss": 1.2272, - "step": 133100 - }, - { - "epoch": 26.78, - "learning_rate": 0.0013132950661930332, - "loss": 1.23, - "step": 133200 - }, - { - "epoch": 26.8, - "learning_rate": 0.001311542450311154, - "loss": 1.2282, - "step": 133300 - }, - { - "epoch": 26.82, - "learning_rate": 0.0013097897131427616, - "loss": 1.2297, - "step": 133400 - }, - { - "epoch": 26.84, - "learning_rate": 0.0013080368581421117, - "loss": 1.2269, - "step": 133500 - }, - { - "epoch": 26.86, - "learning_rate": 0.0013062838887636927, - "loss": 1.2283, - "step": 133600 - }, - { - "epoch": 26.88, - "learning_rate": 0.001304530808462218, - "loss": 1.2292, - "step": 133700 - }, - { - "epoch": 26.9, - "learning_rate": 0.0013027776206926205, - "loss": 1.2306, - "step": 133800 - }, - { - "epoch": 26.92, - "learning_rate": 0.0013010243289100437, - "loss": 1.2264, - "step": 133900 - }, - { - "epoch": 26.94, - "learning_rate": 0.0012992709365698368, - "loss": 1.2249, - "step": 134000 - }, - { - "epoch": 26.96, - "learning_rate": 0.001297517447127547, - "loss": 1.2279, - "step": 134100 - }, - { - "epoch": 26.98, - "learning_rate": 0.001295763864038913, - "loss": 1.2272, - "step": 134200 - }, - { - "epoch": 27.0, - "eval_accuracy": 0.41072249005388595, - "eval_loss": 1.1898874044418335, - "eval_runtime": 19.6468, - "eval_samples_per_second": 4049.93, - "eval_steps_per_second": 15.83, - "step": 134277 - }, - { - "epoch": 27.0, - "learning_rate": 0.0012940101907598575, - "loss": 1.2222, - "step": 134300 - }, - { - "epoch": 27.02, - "learning_rate": 0.0012922564307464824, - "loss": 1.2168, - "step": 134400 - }, - { - "epoch": 27.04, - "learning_rate": 0.0012905025874550586, - "loss": 1.2195, - "step": 134500 - }, - { - "epoch": 27.06, - "learning_rate": 0.0012887486643420223, - "loss": 1.2188, - "step": 134600 - }, - { - "epoch": 27.08, - "learning_rate": 0.001286994664863967, - "loss": 1.2162, - "step": 134700 - }, - { - "epoch": 27.11, - "learning_rate": 0.0012852405924776362, - "loss": 1.2236, - "step": 134800 - }, - { - "epoch": 27.13, - "learning_rate": 0.0012834864506399174, - "loss": 1.2245, - "step": 134900 - }, - { - "epoch": 27.15, - "learning_rate": 0.001281732242807835, - "loss": 1.2193, - "step": 135000 - }, - { - "epoch": 27.17, - "learning_rate": 0.0012799779724385432, - "loss": 1.2235, - "step": 135100 - }, - { - "epoch": 27.19, - "learning_rate": 0.0012782236429893202, - "loss": 1.2252, - "step": 135200 - }, - { - "epoch": 27.21, - "learning_rate": 0.0012764692579175594, - "loss": 1.2253, - "step": 135300 - }, - { - "epoch": 27.23, - "learning_rate": 0.0012747148206807646, - "loss": 1.2226, - "step": 135400 - }, - { - "epoch": 27.25, - "learning_rate": 0.0012729603347365424, - "loss": 1.2217, - "step": 135500 - }, - { - "epoch": 27.27, - "learning_rate": 0.0012712058035425956, - "loss": 1.2222, - "step": 135600 - }, - { - "epoch": 27.29, - "learning_rate": 0.0012694512305567152, - "loss": 1.2209, - "step": 135700 - }, - { - "epoch": 27.31, - "learning_rate": 0.0012677141655283745, - "loss": 1.2256, - "step": 135800 - }, - { - "epoch": 27.33, - "learning_rate": 0.0012659595196639695, - "loss": 1.2215, - "step": 135900 - }, - { - "epoch": 27.35, - "learning_rate": 0.0012642048423468924, - "loss": 1.2238, - "step": 136000 - }, - { - "epoch": 27.37, - "learning_rate": 0.0012624501370352233, - "loss": 1.222, - "step": 136100 - }, - { - "epoch": 27.39, - "learning_rate": 0.0012606954071870964, - "loss": 1.2196, - "step": 136200 - }, - { - "epoch": 27.41, - "learning_rate": 0.0012589406562606954, - "loss": 1.2271, - "step": 136300 - }, - { - "epoch": 27.43, - "learning_rate": 0.0012571858877142449, - "loss": 1.2228, - "step": 136400 - }, - { - "epoch": 27.45, - "learning_rate": 0.001255431105006004, - "loss": 1.2225, - "step": 136500 - }, - { - "epoch": 27.47, - "learning_rate": 0.0012536763115942604, - "loss": 1.2253, - "step": 136600 - }, - { - "epoch": 27.49, - "learning_rate": 0.0012519215109373229, - "loss": 1.222, - "step": 136700 - }, - { - "epoch": 27.51, - "learning_rate": 0.001250166706493513, - "loss": 1.2234, - "step": 136800 - }, - { - "epoch": 27.53, - "learning_rate": 0.001248411901721162, - "loss": 1.2235, - "step": 136900 - }, - { - "epoch": 27.55, - "learning_rate": 0.0012466571000786, - "loss": 1.2218, - "step": 137000 - }, - { - "epoch": 27.57, - "learning_rate": 0.001244902305024152, - "loss": 1.2255, - "step": 137100 - }, - { - "epoch": 27.59, - "learning_rate": 0.0012431475200161302, - "loss": 1.2253, - "step": 137200 - }, - { - "epoch": 27.61, - "learning_rate": 0.0012413927485128253, - "loss": 1.2223, - "step": 137300 - }, - { - "epoch": 27.63, - "learning_rate": 0.001239637993972503, - "loss": 1.2251, - "step": 137400 - }, - { - "epoch": 27.65, - "learning_rate": 0.0012378832598533957, - "loss": 1.2225, - "step": 137500 - }, - { - "epoch": 27.67, - "learning_rate": 0.0012361285496136948, - "loss": 1.2206, - "step": 137600 - }, - { - "epoch": 27.69, - "learning_rate": 0.001234373866711544, - "loss": 1.2238, - "step": 137700 - }, - { - "epoch": 27.71, - "learning_rate": 0.0012326192146050346, - "loss": 1.2259, - "step": 137800 - }, - { - "epoch": 27.73, - "learning_rate": 0.0012308645967521966, - "loss": 1.2215, - "step": 137900 - }, - { - "epoch": 27.75, - "learning_rate": 0.0012291100166109926, - "loss": 1.2229, - "step": 138000 - }, - { - "epoch": 27.77, - "learning_rate": 0.0012273554776393101, - "loss": 1.2194, - "step": 138100 - }, - { - "epoch": 27.79, - "learning_rate": 0.0012256009832949562, - "loss": 1.2188, - "step": 138200 - }, - { - "epoch": 27.81, - "learning_rate": 0.0012238465370356501, - "loss": 1.2199, - "step": 138300 - }, - { - "epoch": 27.83, - "learning_rate": 0.0012220921423190167, - "loss": 1.2211, - "step": 138400 - }, - { - "epoch": 27.85, - "learning_rate": 0.0012203378026025777, - "loss": 1.2184, - "step": 138500 - }, - { - "epoch": 27.87, - "learning_rate": 0.0012185835213437478, - "loss": 1.2187, - "step": 138600 - }, - { - "epoch": 27.89, - "learning_rate": 0.0012168293019998258, - "loss": 1.2232, - "step": 138700 - }, - { - "epoch": 27.91, - "learning_rate": 0.0012150751480279897, - "loss": 1.2241, - "step": 138800 - }, - { - "epoch": 27.93, - "learning_rate": 0.0012133210628852867, - "loss": 1.2215, - "step": 138900 - }, - { - "epoch": 27.95, - "learning_rate": 0.0012115670500286294, - "loss": 1.2231, - "step": 139000 - }, - { - "epoch": 27.97, - "learning_rate": 0.0012098131129147888, - "loss": 1.2214, - "step": 139100 - }, - { - "epoch": 27.99, - "learning_rate": 0.0012080767931761424, - "loss": 1.2187, - "step": 139200 - }, - { - "epoch": 28.0, - "eval_accuracy": 0.4111801831154055, - "eval_loss": 1.1840488910675049, - "eval_runtime": 19.7999, - "eval_samples_per_second": 4018.599, - "eval_steps_per_second": 15.707, - "step": 139251 - }, - { - "epoch": 28.01, - "learning_rate": 0.0012063230170739731, - "loss": 1.2143, - "step": 139300 - }, - { - "epoch": 28.03, - "learning_rate": 0.0012045693270494448, - "loss": 1.211, - "step": 139400 - }, - { - "epoch": 28.05, - "learning_rate": 0.0012028157265586918, - "loss": 1.2165, - "step": 139500 - }, - { - "epoch": 28.07, - "learning_rate": 0.0012010622190576717, - "loss": 1.217, - "step": 139600 - }, - { - "epoch": 28.09, - "learning_rate": 0.001199308808002159, - "loss": 1.2124, - "step": 139700 - }, - { - "epoch": 28.11, - "learning_rate": 0.001197555496847737, - "loss": 1.2182, - "step": 139800 - }, - { - "epoch": 28.13, - "learning_rate": 0.0011958022890497934, - "loss": 1.2156, - "step": 139900 - }, - { - "epoch": 28.15, - "learning_rate": 0.0011940491880635118, - "loss": 1.2185, - "step": 140000 - }, - { - "epoch": 28.17, - "learning_rate": 0.0011922961973438657, - "loss": 1.2143, - "step": 140100 - }, - { - "epoch": 28.19, - "learning_rate": 0.0011905433203456097, - "loss": 1.2148, - "step": 140200 - }, - { - "epoch": 28.21, - "learning_rate": 0.001188790560523276, - "loss": 1.2149, - "step": 140300 - }, - { - "epoch": 28.23, - "learning_rate": 0.0011870379213311652, - "loss": 1.2188, - "step": 140400 - }, - { - "epoch": 28.25, - "learning_rate": 0.0011852854062233409, - "loss": 1.2153, - "step": 140500 - }, - { - "epoch": 28.27, - "learning_rate": 0.0011835330186536204, - "loss": 1.2163, - "step": 140600 - }, - { - "epoch": 28.29, - "learning_rate": 0.0011817807620755712, - "loss": 1.2149, - "step": 140700 - }, - { - "epoch": 28.31, - "learning_rate": 0.001180028639942502, - "loss": 1.2188, - "step": 140800 - }, - { - "epoch": 28.33, - "learning_rate": 0.0011782766557074578, - "loss": 1.2193, - "step": 140900 - }, - { - "epoch": 28.35, - "learning_rate": 0.0011765248128232095, - "loss": 1.2181, - "step": 141000 - }, - { - "epoch": 28.37, - "learning_rate": 0.001174773114742251, - "loss": 1.2133, - "step": 141100 - }, - { - "epoch": 28.39, - "learning_rate": 0.0011730215649167904, - "loss": 1.2198, - "step": 141200 - }, - { - "epoch": 28.41, - "learning_rate": 0.001171270166798745, - "loss": 1.2163, - "step": 141300 - }, - { - "epoch": 28.43, - "learning_rate": 0.00116951892383973, - "loss": 1.219, - "step": 141400 - }, - { - "epoch": 28.45, - "learning_rate": 0.0011677678394910577, - "loss": 1.2176, - "step": 141500 - }, - { - "epoch": 28.47, - "learning_rate": 0.0011660169172037266, - "loss": 1.2186, - "step": 141600 - }, - { - "epoch": 28.49, - "learning_rate": 0.0011642661604284164, - "loss": 1.2152, - "step": 141700 - }, - { - "epoch": 28.51, - "learning_rate": 0.0011625155726154794, - "loss": 1.2113, - "step": 141800 - }, - { - "epoch": 28.53, - "learning_rate": 0.0011607651572149362, - "loss": 1.2138, - "step": 141900 - }, - { - "epoch": 28.55, - "learning_rate": 0.001159014917676467, - "loss": 1.2151, - "step": 142000 - }, - { - "epoch": 28.57, - "learning_rate": 0.0011572648574494063, - "loss": 1.217, - "step": 142100 - }, - { - "epoch": 28.59, - "learning_rate": 0.001155514979982733, - "loss": 1.2148, - "step": 142200 - }, - { - "epoch": 28.61, - "learning_rate": 0.0011537652887250683, - "loss": 1.2149, - "step": 142300 - }, - { - "epoch": 28.63, - "learning_rate": 0.0011520157871246655, - "loss": 1.2161, - "step": 142400 - }, - { - "epoch": 28.65, - "learning_rate": 0.001150266478629404, - "loss": 1.2111, - "step": 142500 - }, - { - "epoch": 28.67, - "learning_rate": 0.001148517366686782, - "loss": 1.2201, - "step": 142600 - }, - { - "epoch": 28.69, - "learning_rate": 0.0011467684547439116, - "loss": 1.2148, - "step": 142700 - }, - { - "epoch": 28.71, - "learning_rate": 0.00114501974624751, - "loss": 1.2185, - "step": 142800 - }, - { - "epoch": 28.73, - "learning_rate": 0.0011432887286244955, - "loss": 1.2136, - "step": 142900 - }, - { - "epoch": 28.75, - "learning_rate": 0.0011415404352391302, - "loss": 1.2153, - "step": 143000 - }, - { - "epoch": 28.77, - "learning_rate": 0.0011397923556035006, - "loss": 1.2159, - "step": 143100 - }, - { - "epoch": 28.79, - "learning_rate": 0.0011380444931626827, - "loss": 1.2155, - "step": 143200 - }, - { - "epoch": 28.81, - "learning_rate": 0.0011362968513613262, - "loss": 1.2182, - "step": 143300 - }, - { - "epoch": 28.83, - "learning_rate": 0.001134549433643645, - "loss": 1.2113, - "step": 143400 - }, - { - "epoch": 28.85, - "learning_rate": 0.0011328022434534126, - "loss": 1.2165, - "step": 143500 - }, - { - "epoch": 28.87, - "learning_rate": 0.0011310552842339516, - "loss": 1.2119, - "step": 143600 - }, - { - "epoch": 28.89, - "learning_rate": 0.001129308559428132, - "loss": 1.2145, - "step": 143700 - }, - { - "epoch": 28.91, - "learning_rate": 0.0011275620724783605, - "loss": 1.2153, - "step": 143800 - }, - { - "epoch": 28.93, - "learning_rate": 0.001125815826826576, - "loss": 1.2151, - "step": 143900 - }, - { - "epoch": 28.95, - "learning_rate": 0.0011240698259142399, - "loss": 1.219, - "step": 144000 - }, - { - "epoch": 28.98, - "learning_rate": 0.0011223240731823335, - "loss": 1.2149, - "step": 144100 - }, - { - "epoch": 29.0, - "learning_rate": 0.0011205785720713479, - "loss": 1.2162, - "step": 144200 - }, - { - "epoch": 29.0, - "eval_accuracy": 0.41123713623185454, - "eval_loss": 1.1828089952468872, - "eval_runtime": 19.8923, - "eval_samples_per_second": 3999.934, - "eval_steps_per_second": 15.634, - "step": 144224 - }, - { - "epoch": 29.02, - "learning_rate": 0.0011188333260212788, - "loss": 1.2079, - "step": 144300 - }, - { - "epoch": 29.04, - "learning_rate": 0.001117088338471618, - "loss": 1.2047, - "step": 144400 - }, - { - "epoch": 29.06, - "learning_rate": 0.001115343612861349, - "loss": 1.2053, - "step": 144500 - }, - { - "epoch": 29.08, - "learning_rate": 0.0011135991526289393, - "loss": 1.2075, - "step": 144600 - }, - { - "epoch": 29.1, - "learning_rate": 0.0011118549612123333, - "loss": 1.2061, - "step": 144700 - }, - { - "epoch": 29.12, - "learning_rate": 0.0011101110420489442, - "loss": 1.2101, - "step": 144800 - }, - { - "epoch": 29.14, - "learning_rate": 0.0011083673985756498, - "loss": 1.2117, - "step": 144900 - }, - { - "epoch": 29.16, - "learning_rate": 0.001106624034228785, - "loss": 1.2104, - "step": 145000 - }, - { - "epoch": 29.18, - "learning_rate": 0.0011048809524441346, - "loss": 1.2105, - "step": 145100 - }, - { - "epoch": 29.2, - "learning_rate": 0.0011031381566569247, - "loss": 1.2052, - "step": 145200 - }, - { - "epoch": 29.22, - "learning_rate": 0.0011013956503018196, - "loss": 1.2104, - "step": 145300 - }, - { - "epoch": 29.24, - "learning_rate": 0.0010996534368129128, - "loss": 1.2103, - "step": 145400 - }, - { - "epoch": 29.26, - "learning_rate": 0.0010979115196237208, - "loss": 1.2142, - "step": 145500 - }, - { - "epoch": 29.28, - "learning_rate": 0.001096169902167175, - "loss": 1.2038, - "step": 145600 - }, - { - "epoch": 29.3, - "learning_rate": 0.0010944285878756177, - "loss": 1.2115, - "step": 145700 - }, - { - "epoch": 29.32, - "learning_rate": 0.0010926875801807927, - "loss": 1.2122, - "step": 145800 - }, - { - "epoch": 29.34, - "learning_rate": 0.0010909468825138404, - "loss": 1.2085, - "step": 145900 - }, - { - "epoch": 29.36, - "learning_rate": 0.0010892064983052884, - "loss": 1.2095, - "step": 146000 - }, - { - "epoch": 29.38, - "learning_rate": 0.0010874664309850487, - "loss": 1.2102, - "step": 146100 - }, - { - "epoch": 29.4, - "learning_rate": 0.0010857266839824074, - "loss": 1.2116, - "step": 146200 - }, - { - "epoch": 29.42, - "learning_rate": 0.0010839872607260209, - "loss": 1.2121, - "step": 146300 - }, - { - "epoch": 29.44, - "learning_rate": 0.0010822481646439047, - "loss": 1.2101, - "step": 146400 - }, - { - "epoch": 29.46, - "learning_rate": 0.0010805093991634325, - "loss": 1.2077, - "step": 146500 - }, - { - "epoch": 29.48, - "learning_rate": 0.001078770967711325, - "loss": 1.2103, - "step": 146600 - }, - { - "epoch": 29.5, - "learning_rate": 0.001077032873713645, - "loss": 1.2079, - "step": 146700 - }, - { - "epoch": 29.52, - "learning_rate": 0.0010752951205957896, - "loss": 1.2105, - "step": 146800 - }, - { - "epoch": 29.54, - "learning_rate": 0.0010735577117824847, - "loss": 1.2118, - "step": 146900 - }, - { - "epoch": 29.56, - "learning_rate": 0.0010718206506977778, - "loss": 1.2105, - "step": 147000 - }, - { - "epoch": 29.58, - "learning_rate": 0.0010700839407650313, - "loss": 1.2045, - "step": 147100 - }, - { - "epoch": 29.6, - "learning_rate": 0.001068347585406914, - "loss": 1.2123, - "step": 147200 - }, - { - "epoch": 29.62, - "learning_rate": 0.0010666115880453974, - "loss": 1.2078, - "step": 147300 - }, - { - "epoch": 29.64, - "learning_rate": 0.0010648759521017476, - "loss": 1.2099, - "step": 147400 - }, - { - "epoch": 29.66, - "learning_rate": 0.0010631406809965178, - "loss": 1.2085, - "step": 147500 - }, - { - "epoch": 29.68, - "learning_rate": 0.0010614057781495414, - "loss": 1.2119, - "step": 147600 - }, - { - "epoch": 29.7, - "learning_rate": 0.001059671246979928, - "loss": 1.2093, - "step": 147700 - }, - { - "epoch": 29.72, - "learning_rate": 0.001057937090906053, - "loss": 1.2063, - "step": 147800 - }, - { - "epoch": 29.74, - "learning_rate": 0.001056203313345554, - "loss": 1.2055, - "step": 147900 - }, - { - "epoch": 29.76, - "learning_rate": 0.0010544699177153208, - "loss": 1.2086, - "step": 148000 - }, - { - "epoch": 29.78, - "learning_rate": 0.0010527369074314922, - "loss": 1.2115, - "step": 148100 - }, - { - "epoch": 29.8, - "learning_rate": 0.0010510042859094464, - "loss": 1.2102, - "step": 148200 - }, - { - "epoch": 29.82, - "learning_rate": 0.0010492720565637972, - "loss": 1.2079, - "step": 148300 - }, - { - "epoch": 29.84, - "learning_rate": 0.001047540222808383, - "loss": 1.2114, - "step": 148400 - }, - { - "epoch": 29.86, - "learning_rate": 0.001045808788056264, - "loss": 1.2092, - "step": 148500 - }, - { - "epoch": 29.88, - "learning_rate": 0.001044077755719714, - "loss": 1.2106, - "step": 148600 - }, - { - "epoch": 29.9, - "learning_rate": 0.0010423471292102147, - "loss": 1.2099, - "step": 148700 - }, - { - "epoch": 29.92, - "learning_rate": 0.0010406169119384452, - "loss": 1.2035, - "step": 148800 - }, - { - "epoch": 29.94, - "learning_rate": 0.0010388871073142806, - "loss": 1.2079, - "step": 148900 - }, - { - "epoch": 29.96, - "learning_rate": 0.0010371577187467818, - "loss": 1.2114, - "step": 149000 - }, - { - "epoch": 29.98, - "learning_rate": 0.001035428749644191, - "loss": 1.2087, - "step": 149100 - }, - { - "epoch": 30.0, - "eval_accuracy": 0.41176053219028325, - "eval_loss": 1.1731864213943481, - "eval_runtime": 19.7738, - "eval_samples_per_second": 4023.916, - "eval_steps_per_second": 15.728, - "step": 149197 - }, - { - "epoch": 30.0, - "learning_rate": 0.001033700203413921, - "loss": 1.2107, - "step": 149200 - }, - { - "epoch": 30.02, - "learning_rate": 0.0010319893625408032, - "loss": 1.204, - "step": 149300 - }, - { - "epoch": 30.04, - "learning_rate": 0.0010302616679603773, - "loss": 1.2, - "step": 149400 - }, - { - "epoch": 30.06, - "learning_rate": 0.0010285344064354445, - "loss": 1.2002, - "step": 149500 - }, - { - "epoch": 30.08, - "learning_rate": 0.0010268075813700541, - "loss": 1.1993, - "step": 149600 - }, - { - "epoch": 30.1, - "learning_rate": 0.0010250811961673946, - "loss": 1.2016, - "step": 149700 - }, - { - "epoch": 30.12, - "learning_rate": 0.0010233552542297884, - "loss": 1.2029, - "step": 149800 - }, - { - "epoch": 30.14, - "learning_rate": 0.001021629758958684, - "loss": 1.2027, - "step": 149900 - }, - { - "epoch": 30.16, - "learning_rate": 0.0010199047137546503, - "loss": 1.2043, - "step": 150000 - }, - { - "epoch": 30.18, - "learning_rate": 0.0010181801220173676, - "loss": 1.2028, - "step": 150100 - }, - { - "epoch": 30.2, - "learning_rate": 0.0010164559871456242, - "loss": 1.2031, - "step": 150200 - }, - { - "epoch": 30.22, - "learning_rate": 0.0010147323125373072, - "loss": 1.2036, - "step": 150300 - }, - { - "epoch": 30.24, - "learning_rate": 0.0010130263313926052, - "loss": 1.2056, - "step": 150400 - }, - { - "epoch": 30.26, - "learning_rate": 0.001011303582813796, - "loss": 1.2015, - "step": 150500 - }, - { - "epoch": 30.28, - "learning_rate": 0.0010095813046526582, - "loss": 1.2047, - "step": 150600 - }, - { - "epoch": 30.3, - "learning_rate": 0.0010078595003034205, - "loss": 1.2056, - "step": 150700 - }, - { - "epoch": 30.32, - "learning_rate": 0.0010061381731593774, - "loss": 1.2017, - "step": 150800 - }, - { - "epoch": 30.34, - "learning_rate": 0.0010044345326882533, - "loss": 1.2007, - "step": 150900 - }, - { - "epoch": 30.36, - "learning_rate": 0.0010027141652740393, - "loss": 1.1998, - "step": 151000 - }, - { - "epoch": 30.38, - "learning_rate": 0.0010009942852053342, - "loss": 1.2035, - "step": 151100 - }, - { - "epoch": 30.4, - "learning_rate": 0.0009992748958716382, - "loss": 1.1996, - "step": 151200 - }, - { - "epoch": 30.42, - "learning_rate": 0.0009975560006614873, - "loss": 1.2052, - "step": 151300 - }, - { - "epoch": 30.44, - "learning_rate": 0.0009958376029624422, - "loss": 1.2017, - "step": 151400 - }, - { - "epoch": 30.46, - "learning_rate": 0.0009941197061610842, - "loss": 1.2044, - "step": 151500 - }, - { - "epoch": 30.48, - "learning_rate": 0.0009924023136430055, - "loss": 1.2078, - "step": 151600 - }, - { - "epoch": 30.5, - "learning_rate": 0.000990685428792806, - "loss": 1.2025, - "step": 151700 - }, - { - "epoch": 30.52, - "learning_rate": 0.0009889690549940852, - "loss": 1.2001, - "step": 151800 - }, - { - "epoch": 30.54, - "learning_rate": 0.0009872531956294354, - "loss": 1.1996, - "step": 151900 - }, - { - "epoch": 30.56, - "learning_rate": 0.0009855378540804332, - "loss": 1.2012, - "step": 152000 - }, - { - "epoch": 30.58, - "learning_rate": 0.0009838230337276372, - "loss": 1.2039, - "step": 152100 - }, - { - "epoch": 30.6, - "learning_rate": 0.0009821087379505776, - "loss": 1.2026, - "step": 152200 - }, - { - "epoch": 30.62, - "learning_rate": 0.0009803949701277515, - "loss": 1.2004, - "step": 152300 - }, - { - "epoch": 30.64, - "learning_rate": 0.0009786817336366138, - "loss": 1.2015, - "step": 152400 - }, - { - "epoch": 30.66, - "learning_rate": 0.0009769690318535743, - "loss": 1.2026, - "step": 152500 - }, - { - "epoch": 30.68, - "learning_rate": 0.0009752739871163907, - "loss": 1.2037, - "step": 152600 - }, - { - "epoch": 30.7, - "learning_rate": 0.0009735623594432755, - "loss": 1.2016, - "step": 152700 - }, - { - "epoch": 30.72, - "learning_rate": 0.0009718512765674095, - "loss": 1.2024, - "step": 152800 - }, - { - "epoch": 30.74, - "learning_rate": 0.0009701407418609562, - "loss": 1.202, - "step": 152900 - }, - { - "epoch": 30.76, - "learning_rate": 0.0009684307586950005, - "loss": 1.206, - "step": 153000 - }, - { - "epoch": 30.78, - "learning_rate": 0.0009667213304395399, - "loss": 1.1989, - "step": 153100 - }, - { - "epoch": 30.8, - "learning_rate": 0.0009650124604634786, - "loss": 1.2081, - "step": 153200 - }, - { - "epoch": 30.82, - "learning_rate": 0.0009633041521346189, - "loss": 1.2015, - "step": 153300 - }, - { - "epoch": 30.85, - "learning_rate": 0.0009615964088196581, - "loss": 1.1989, - "step": 153400 - }, - { - "epoch": 30.87, - "learning_rate": 0.0009598892338841794, - "loss": 1.2006, - "step": 153500 - }, - { - "epoch": 30.89, - "learning_rate": 0.0009581826306926464, - "loss": 1.1991, - "step": 153600 - }, - { - "epoch": 30.91, - "learning_rate": 0.000956493660031415, - "loss": 1.2033, - "step": 153700 - }, - { - "epoch": 30.93, - "learning_rate": 0.0009547882046153125, - "loss": 1.2024, - "step": 153800 - }, - { - "epoch": 30.95, - "learning_rate": 0.000953083330996152, - "loss": 1.2042, - "step": 153900 - }, - { - "epoch": 30.97, - "learning_rate": 0.0009513790425338609, - "loss": 1.2005, - "step": 154000 - }, - { - "epoch": 30.99, - "learning_rate": 0.000949675342587214, - "loss": 1.2005, - "step": 154100 - }, - { - "epoch": 31.0, - "eval_accuracy": 0.41267003209737646, - "eval_loss": 1.1657705307006836, - "eval_runtime": 19.6203, - "eval_samples_per_second": 4055.399, - "eval_steps_per_second": 15.851, - "step": 154170 - }, - { - "epoch": 31.01, - "learning_rate": 0.0009479722345138251, - "loss": 1.1976, - "step": 154200 - }, - { - "epoch": 31.03, - "learning_rate": 0.0009462697216701424, - "loss": 1.1919, - "step": 154300 - }, - { - "epoch": 31.05, - "learning_rate": 0.0009445678074114414, - "loss": 1.1973, - "step": 154400 - }, - { - "epoch": 31.07, - "learning_rate": 0.0009428664950918177, - "loss": 1.1941, - "step": 154500 - }, - { - "epoch": 31.09, - "learning_rate": 0.0009411657880641792, - "loss": 1.1909, - "step": 154600 - }, - { - "epoch": 31.11, - "learning_rate": 0.0009394656896802428, - "loss": 1.1942, - "step": 154700 - }, - { - "epoch": 31.13, - "learning_rate": 0.0009377662032905253, - "loss": 1.1925, - "step": 154800 - }, - { - "epoch": 31.15, - "learning_rate": 0.0009360673322443375, - "loss": 1.1955, - "step": 154900 - }, - { - "epoch": 31.17, - "learning_rate": 0.0009343690798897762, - "loss": 1.1939, - "step": 155000 - }, - { - "epoch": 31.19, - "learning_rate": 0.0009326714495737206, - "loss": 1.1936, - "step": 155100 - }, - { - "epoch": 31.21, - "learning_rate": 0.0009309744446418236, - "loss": 1.195, - "step": 155200 - }, - { - "epoch": 31.23, - "learning_rate": 0.0009292780684385055, - "loss": 1.1938, - "step": 155300 - }, - { - "epoch": 31.25, - "learning_rate": 0.0009275823243069464, - "loss": 1.1969, - "step": 155400 - }, - { - "epoch": 31.27, - "learning_rate": 0.0009258872155890821, - "loss": 1.1971, - "step": 155500 - }, - { - "epoch": 31.29, - "learning_rate": 0.0009241927456255962, - "loss": 1.1924, - "step": 155600 - }, - { - "epoch": 31.31, - "learning_rate": 0.0009224989177559132, - "loss": 1.197, - "step": 155700 - }, - { - "epoch": 31.33, - "learning_rate": 0.0009208057353181909, - "loss": 1.1955, - "step": 155800 - }, - { - "epoch": 31.35, - "learning_rate": 0.0009191132016493168, - "loss": 1.1927, - "step": 155900 - }, - { - "epoch": 31.37, - "learning_rate": 0.0009174213200848991, - "loss": 1.1954, - "step": 156000 - }, - { - "epoch": 31.39, - "learning_rate": 0.0009157300939592614, - "loss": 1.1945, - "step": 156100 - }, - { - "epoch": 31.41, - "learning_rate": 0.0009140395266054343, - "loss": 1.1989, - "step": 156200 - }, - { - "epoch": 31.43, - "learning_rate": 0.0009123496213551513, - "loss": 1.1914, - "step": 156300 - }, - { - "epoch": 31.45, - "learning_rate": 0.0009106603815388409, - "loss": 1.1953, - "step": 156400 - }, - { - "epoch": 31.47, - "learning_rate": 0.0009089718104856201, - "loss": 1.1952, - "step": 156500 - }, - { - "epoch": 31.49, - "learning_rate": 0.0009072839115232867, - "loss": 1.1936, - "step": 156600 - }, - { - "epoch": 31.51, - "learning_rate": 0.0009055966879783159, - "loss": 1.197, - "step": 156700 - }, - { - "epoch": 31.53, - "learning_rate": 0.0009039101431758506, - "loss": 1.1957, - "step": 156800 - }, - { - "epoch": 31.55, - "learning_rate": 0.0009022242804396972, - "loss": 1.1971, - "step": 156900 - }, - { - "epoch": 31.57, - "learning_rate": 0.0009005391030923156, - "loss": 1.1941, - "step": 157000 - }, - { - "epoch": 31.59, - "learning_rate": 0.0008988546144548173, - "loss": 1.1935, - "step": 157100 - }, - { - "epoch": 31.61, - "learning_rate": 0.0008971708178469554, - "loss": 1.1977, - "step": 157200 - }, - { - "epoch": 31.63, - "learning_rate": 0.00089548771658712, - "loss": 1.1978, - "step": 157300 - }, - { - "epoch": 31.65, - "learning_rate": 0.0008938053139923291, - "loss": 1.1932, - "step": 157400 - }, - { - "epoch": 31.67, - "learning_rate": 0.0008921236133782254, - "loss": 1.1928, - "step": 157500 - }, - { - "epoch": 31.69, - "learning_rate": 0.0008904426180590678, - "loss": 1.1976, - "step": 157600 - }, - { - "epoch": 31.71, - "learning_rate": 0.0008887623313477256, - "loss": 1.1946, - "step": 157700 - }, - { - "epoch": 31.73, - "learning_rate": 0.0008870827565556696, - "loss": 1.1975, - "step": 157800 - }, - { - "epoch": 31.75, - "learning_rate": 0.0008854038969929701, - "loss": 1.195, - "step": 157900 - }, - { - "epoch": 31.77, - "learning_rate": 0.0008837257559682865, - "loss": 1.1947, - "step": 158000 - }, - { - "epoch": 31.79, - "learning_rate": 0.0008820483367888628, - "loss": 1.1963, - "step": 158100 - }, - { - "epoch": 31.81, - "learning_rate": 0.0008803716427605191, - "loss": 1.1963, - "step": 158200 - }, - { - "epoch": 31.83, - "learning_rate": 0.0008786956771876478, - "loss": 1.1923, - "step": 158300 - }, - { - "epoch": 31.85, - "learning_rate": 0.000877020443373205, - "loss": 1.1971, - "step": 158400 - }, - { - "epoch": 31.87, - "learning_rate": 0.0008753459446187053, - "loss": 1.1951, - "step": 158500 - }, - { - "epoch": 31.89, - "learning_rate": 0.0008736721842242136, - "loss": 1.1981, - "step": 158600 - }, - { - "epoch": 31.91, - "learning_rate": 0.0008719991654883402, - "loss": 1.1991, - "step": 158700 - }, - { - "epoch": 31.93, - "learning_rate": 0.0008703268917082342, - "loss": 1.1949, - "step": 158800 - }, - { - "epoch": 31.95, - "learning_rate": 0.0008686553661795765, - "loss": 1.1921, - "step": 158900 - }, - { - "epoch": 31.97, - "learning_rate": 0.0008669845921965718, - "loss": 1.1974, - "step": 159000 - }, - { - "epoch": 31.99, - "learning_rate": 0.0008653145730519456, - "loss": 1.1944, - "step": 159100 - }, - { - "epoch": 32.0, - "eval_accuracy": 0.4131173845092614, - "eval_loss": 1.1602274179458618, - "eval_runtime": 19.7773, - "eval_samples_per_second": 4023.208, - "eval_steps_per_second": 15.725, - "step": 159144 - }, - { - "epoch": 32.01, - "learning_rate": 0.000863645312036935, - "loss": 1.1882, - "step": 159200 - }, - { - "epoch": 32.03, - "learning_rate": 0.0008619768124412836, - "loss": 1.1821, - "step": 159300 - }, - { - "epoch": 32.05, - "learning_rate": 0.000860309077553233, - "loss": 1.1848, - "step": 159400 - }, - { - "epoch": 32.07, - "learning_rate": 0.0008586421106595186, - "loss": 1.1857, - "step": 159500 - }, - { - "epoch": 32.09, - "learning_rate": 0.0008569759150453628, - "loss": 1.1859, - "step": 159600 - }, - { - "epoch": 32.11, - "learning_rate": 0.0008553104939944677, - "loss": 1.189, - "step": 159700 - }, - { - "epoch": 32.13, - "learning_rate": 0.0008536458507890077, - "loss": 1.189, - "step": 159800 - }, - { - "epoch": 32.15, - "learning_rate": 0.0008519819887096256, - "loss": 1.1908, - "step": 159900 - }, - { - "epoch": 32.17, - "learning_rate": 0.0008503189110354243, - "loss": 1.1872, - "step": 160000 - }, - { - "epoch": 32.19, - "learning_rate": 0.0008486566210439614, - "loss": 1.1861, - "step": 160100 - }, - { - "epoch": 32.21, - "learning_rate": 0.0008470117330755695, - "loss": 1.1838, - "step": 160200 - }, - { - "epoch": 32.23, - "learning_rate": 0.0008453510203175021, - "loss": 1.1876, - "step": 160300 - }, - { - "epoch": 32.25, - "learning_rate": 0.0008436911050327827, - "loss": 1.1843, - "step": 160400 - }, - { - "epoch": 32.27, - "learning_rate": 0.0008420319904927365, - "loss": 1.1884, - "step": 160500 - }, - { - "epoch": 32.29, - "learning_rate": 0.0008403736799671097, - "loss": 1.191, - "step": 160600 - }, - { - "epoch": 32.31, - "learning_rate": 0.000838716176724065, - "loss": 1.1915, - "step": 160700 - }, - { - "epoch": 32.33, - "learning_rate": 0.0008370594840301723, - "loss": 1.188, - "step": 160800 - }, - { - "epoch": 32.35, - "learning_rate": 0.000835403605150406, - "loss": 1.1913, - "step": 160900 - }, - { - "epoch": 32.37, - "learning_rate": 0.000833748543348136, - "loss": 1.1905, - "step": 161000 - }, - { - "epoch": 32.39, - "learning_rate": 0.0008320943018851221, - "loss": 1.1876, - "step": 161100 - }, - { - "epoch": 32.41, - "learning_rate": 0.0008304408840215062, - "loss": 1.1882, - "step": 161200 - }, - { - "epoch": 32.43, - "learning_rate": 0.0008287882930158088, - "loss": 1.1874, - "step": 161300 - }, - { - "epoch": 32.45, - "learning_rate": 0.0008271365321249197, - "loss": 1.1908, - "step": 161400 - }, - { - "epoch": 32.47, - "learning_rate": 0.0008254856046040937, - "loss": 1.1903, - "step": 161500 - }, - { - "epoch": 32.49, - "learning_rate": 0.0008238355137069418, - "loss": 1.192, - "step": 161600 - }, - { - "epoch": 32.51, - "learning_rate": 0.0008221862626854274, - "loss": 1.1883, - "step": 161700 - }, - { - "epoch": 32.53, - "learning_rate": 0.0008205378547898581, - "loss": 1.1918, - "step": 161800 - }, - { - "epoch": 32.55, - "learning_rate": 0.0008188902932688807, - "loss": 1.1885, - "step": 161900 - }, - { - "epoch": 32.57, - "learning_rate": 0.0008172435813694726, - "loss": 1.1887, - "step": 162000 - }, - { - "epoch": 32.59, - "learning_rate": 0.0008155977223369379, - "loss": 1.1873, - "step": 162100 - }, - { - "epoch": 32.61, - "learning_rate": 0.0008139527194148993, - "loss": 1.1863, - "step": 162200 - }, - { - "epoch": 32.63, - "learning_rate": 0.0008123085758452935, - "loss": 1.1897, - "step": 162300 - }, - { - "epoch": 32.65, - "learning_rate": 0.0008106652948683613, - "loss": 1.1856, - "step": 162400 - }, - { - "epoch": 32.67, - "learning_rate": 0.0008090392995776086, - "loss": 1.1922, - "step": 162500 - }, - { - "epoch": 32.69, - "learning_rate": 0.0008073977447932476, - "loss": 1.1861, - "step": 162600 - }, - { - "epoch": 32.72, - "learning_rate": 0.0008057570622797192, - "loss": 1.1881, - "step": 162700 - }, - { - "epoch": 32.74, - "learning_rate": 0.0008041336489961784, - "loss": 1.1852, - "step": 162800 - }, - { - "epoch": 32.76, - "learning_rate": 0.0008024947119195025, - "loss": 1.1851, - "step": 162900 - }, - { - "epoch": 32.78, - "learning_rate": 0.000800856656776449, - "loss": 1.1819, - "step": 163000 - }, - { - "epoch": 32.8, - "learning_rate": 0.0007992194867952607, - "loss": 1.1882, - "step": 163100 - }, - { - "epoch": 32.82, - "learning_rate": 0.0007975832052024367, - "loss": 1.1864, - "step": 163200 - }, - { - "epoch": 32.84, - "learning_rate": 0.0007959478152227251, - "loss": 1.1914, - "step": 163300 - }, - { - "epoch": 32.86, - "learning_rate": 0.0007943133200791164, - "loss": 1.1888, - "step": 163400 - }, - { - "epoch": 32.88, - "learning_rate": 0.0007926797229928376, - "loss": 1.1831, - "step": 163500 - }, - { - "epoch": 32.9, - "learning_rate": 0.0007910470271833464, - "loss": 1.1878, - "step": 163600 - }, - { - "epoch": 32.92, - "learning_rate": 0.0007894152358683243, - "loss": 1.1889, - "step": 163700 - }, - { - "epoch": 32.94, - "learning_rate": 0.0007877843522636694, - "loss": 1.1872, - "step": 163800 - }, - { - "epoch": 32.96, - "learning_rate": 0.0007861543795834913, - "loss": 1.1851, - "step": 163900 - }, - { - "epoch": 32.98, - "learning_rate": 0.0007845253210401045, - "loss": 1.1905, - "step": 164000 - }, - { - "epoch": 33.0, - "learning_rate": 0.0007828971798440226, - "loss": 1.1887, - "step": 164100 - }, - { - "epoch": 33.0, - "eval_accuracy": 0.41392475061534817, - "eval_loss": 1.1511569023132324, - "eval_runtime": 19.7992, - "eval_samples_per_second": 4018.746, - "eval_steps_per_second": 15.708, - "step": 164117 - }, - { - "epoch": 33.02, - "learning_rate": 0.0007812699592039499, - "loss": 1.1783, - "step": 164200 - }, - { - "epoch": 33.04, - "learning_rate": 0.0007796436623267771, - "loss": 1.1784, - "step": 164300 - }, - { - "epoch": 33.06, - "learning_rate": 0.0007780182924175748, - "loss": 1.178, - "step": 164400 - }, - { - "epoch": 33.08, - "learning_rate": 0.0007763938526795867, - "loss": 1.177, - "step": 164500 - }, - { - "epoch": 33.1, - "learning_rate": 0.0007747703463142225, - "loss": 1.1826, - "step": 164600 - }, - { - "epoch": 33.12, - "learning_rate": 0.0007731477765210531, - "loss": 1.1774, - "step": 164700 - }, - { - "epoch": 33.14, - "learning_rate": 0.0007715261464978038, - "loss": 1.1816, - "step": 164800 - }, - { - "epoch": 33.16, - "learning_rate": 0.0007699054594403476, - "loss": 1.1792, - "step": 164900 - }, - { - "epoch": 33.18, - "learning_rate": 0.0007682857185426989, - "loss": 1.1801, - "step": 165000 - }, - { - "epoch": 33.2, - "learning_rate": 0.0007666669269970071, - "loss": 1.1801, - "step": 165100 - }, - { - "epoch": 33.22, - "learning_rate": 0.0007650490879935517, - "loss": 1.1833, - "step": 165200 - }, - { - "epoch": 33.24, - "learning_rate": 0.0007634322047207346, - "loss": 1.1818, - "step": 165300 - }, - { - "epoch": 33.26, - "learning_rate": 0.0007618162803650734, - "loss": 1.1805, - "step": 165400 - }, - { - "epoch": 33.28, - "learning_rate": 0.0007602013181111966, - "loss": 1.179, - "step": 165500 - }, - { - "epoch": 33.3, - "learning_rate": 0.0007585873211418363, - "loss": 1.1792, - "step": 165600 - }, - { - "epoch": 33.32, - "learning_rate": 0.000756974292637823, - "loss": 1.1775, - "step": 165700 - }, - { - "epoch": 33.34, - "learning_rate": 0.0007553622357780774, - "loss": 1.183, - "step": 165800 - }, - { - "epoch": 33.36, - "learning_rate": 0.0007537511537396058, - "loss": 1.1838, - "step": 165900 - }, - { - "epoch": 33.38, - "learning_rate": 0.0007521410496974935, - "loss": 1.182, - "step": 166000 - }, - { - "epoch": 33.4, - "learning_rate": 0.0007505319268248988, - "loss": 1.1809, - "step": 166100 - }, - { - "epoch": 33.42, - "learning_rate": 0.0007489237882930453, - "loss": 1.1785, - "step": 166200 - }, - { - "epoch": 33.44, - "learning_rate": 0.0007473166372712171, - "loss": 1.184, - "step": 166300 - }, - { - "epoch": 33.46, - "learning_rate": 0.0007457104769267522, - "loss": 1.1809, - "step": 166400 - }, - { - "epoch": 33.48, - "learning_rate": 0.000744105310425037, - "loss": 1.1809, - "step": 166500 - }, - { - "epoch": 33.5, - "learning_rate": 0.0007425011409294979, - "loss": 1.1788, - "step": 166600 - }, - { - "epoch": 33.52, - "learning_rate": 0.0007408979716015968, - "loss": 1.1821, - "step": 166700 - }, - { - "epoch": 33.54, - "learning_rate": 0.000739295805600825, - "loss": 1.1827, - "step": 166800 - }, - { - "epoch": 33.56, - "learning_rate": 0.0007376946460846965, - "loss": 1.1819, - "step": 166900 - }, - { - "epoch": 33.58, - "learning_rate": 0.0007360944962087409, - "loss": 1.1793, - "step": 167000 - }, - { - "epoch": 33.6, - "learning_rate": 0.0007344953591264986, - "loss": 1.1827, - "step": 167100 - }, - { - "epoch": 33.62, - "learning_rate": 0.000732897237989514, - "loss": 1.1813, - "step": 167200 - }, - { - "epoch": 33.64, - "learning_rate": 0.0007313001359473295, - "loss": 1.1835, - "step": 167300 - }, - { - "epoch": 33.66, - "learning_rate": 0.0007297040561474782, - "loss": 1.1772, - "step": 167400 - }, - { - "epoch": 33.68, - "learning_rate": 0.0007281090017354799, - "loss": 1.1828, - "step": 167500 - }, - { - "epoch": 33.7, - "learning_rate": 0.0007265149758548325, - "loss": 1.182, - "step": 167600 - }, - { - "epoch": 33.72, - "learning_rate": 0.0007249219816470082, - "loss": 1.1799, - "step": 167700 - }, - { - "epoch": 33.74, - "learning_rate": 0.0007233300222514435, - "loss": 1.1794, - "step": 167800 - }, - { - "epoch": 33.76, - "learning_rate": 0.0007217391008055382, - "loss": 1.1759, - "step": 167900 - }, - { - "epoch": 33.78, - "learning_rate": 0.0007201492204446453, - "loss": 1.1799, - "step": 168000 - }, - { - "epoch": 33.8, - "learning_rate": 0.0007185603843020663, - "loss": 1.1757, - "step": 168100 - }, - { - "epoch": 33.82, - "learning_rate": 0.0007169725955090442, - "loss": 1.1791, - "step": 168200 - }, - { - "epoch": 33.84, - "learning_rate": 0.0007153858571947587, - "loss": 1.1777, - "step": 168300 - }, - { - "epoch": 33.86, - "learning_rate": 0.0007138160241077916, - "loss": 1.1769, - "step": 168400 - }, - { - "epoch": 33.88, - "learning_rate": 0.0007122313855474593, - "loss": 1.1821, - "step": 168500 - }, - { - "epoch": 33.9, - "learning_rate": 0.0007106478068097369, - "loss": 1.1786, - "step": 168600 - }, - { - "epoch": 33.92, - "learning_rate": 0.0007090652910155055, - "loss": 1.1816, - "step": 168700 - }, - { - "epoch": 33.94, - "learning_rate": 0.0007074838412835532, - "loss": 1.1778, - "step": 168800 - }, - { - "epoch": 33.96, - "learning_rate": 0.0007059034607305667, - "loss": 1.1819, - "step": 168900 - }, - { - "epoch": 33.98, - "learning_rate": 0.0007043241524711256, - "loss": 1.1795, - "step": 169000 - }, - { - "epoch": 34.0, - "eval_accuracy": 0.4142251067155042, - "eval_loss": 1.1453089714050293, - "eval_runtime": 19.8806, - "eval_samples_per_second": 4002.288, - "eval_steps_per_second": 15.643, - "step": 169090 - }, - { - "epoch": 34.0, - "learning_rate": 0.000702745919617695, - "loss": 1.1752, - "step": 169100 - }, - { - "epoch": 34.02, - "learning_rate": 0.0007011687652806225, - "loss": 1.1677, - "step": 169200 - }, - { - "epoch": 34.04, - "learning_rate": 0.000699592692568129, - "loss": 1.17, - "step": 169300 - }, - { - "epoch": 34.06, - "learning_rate": 0.0006980177045863047, - "loss": 1.1725, - "step": 169400 - }, - { - "epoch": 34.08, - "learning_rate": 0.0006964438044391006, - "loss": 1.1729, - "step": 169500 - }, - { - "epoch": 34.1, - "learning_rate": 0.0006948709952283247, - "loss": 1.1716, - "step": 169600 - }, - { - "epoch": 34.12, - "learning_rate": 0.0006932992800536353, - "loss": 1.174, - "step": 169700 - }, - { - "epoch": 34.14, - "learning_rate": 0.0006917286620125349, - "loss": 1.1747, - "step": 169800 - }, - { - "epoch": 34.16, - "learning_rate": 0.0006901591442003616, - "loss": 1.1702, - "step": 169900 - }, - { - "epoch": 34.18, - "learning_rate": 0.0006885907297102878, - "loss": 1.1726, - "step": 170000 - }, - { - "epoch": 34.2, - "learning_rate": 0.0006870234216333101, - "loss": 1.1726, - "step": 170100 - }, - { - "epoch": 34.22, - "learning_rate": 0.0006854572230582455, - "loss": 1.1732, - "step": 170200 - }, - { - "epoch": 34.24, - "learning_rate": 0.000683892137071723, - "loss": 1.1736, - "step": 170300 - }, - { - "epoch": 34.26, - "learning_rate": 0.00068232816675818, - "loss": 1.1748, - "step": 170400 - }, - { - "epoch": 34.28, - "learning_rate": 0.0006807653151998552, - "loss": 1.1725, - "step": 170500 - }, - { - "epoch": 34.3, - "learning_rate": 0.0006792035854767827, - "loss": 1.1689, - "step": 170600 - }, - { - "epoch": 34.32, - "learning_rate": 0.0006776429806667841, - "loss": 1.1719, - "step": 170700 - }, - { - "epoch": 34.34, - "learning_rate": 0.0006760835038454657, - "loss": 1.1711, - "step": 170800 - }, - { - "epoch": 34.36, - "learning_rate": 0.0006745407359349601, - "loss": 1.1753, - "step": 170900 - }, - { - "epoch": 34.38, - "learning_rate": 0.0006729835129523944, - "loss": 1.1734, - "step": 171000 - }, - { - "epoch": 34.4, - "learning_rate": 0.0006714274271412859, - "loss": 1.174, - "step": 171100 - }, - { - "epoch": 34.42, - "learning_rate": 0.0006698724815683352, - "loss": 1.1727, - "step": 171200 - }, - { - "epoch": 34.44, - "learning_rate": 0.0006683186792979937, - "loss": 1.1714, - "step": 171300 - }, - { - "epoch": 34.46, - "learning_rate": 0.0006667660233924612, - "loss": 1.1702, - "step": 171400 - }, - { - "epoch": 34.48, - "learning_rate": 0.0006652145169116783, - "loss": 1.1687, - "step": 171500 - }, - { - "epoch": 34.5, - "learning_rate": 0.0006636641629133204, - "loss": 1.1715, - "step": 171600 - }, - { - "epoch": 34.52, - "learning_rate": 0.0006621149644527902, - "loss": 1.1716, - "step": 171700 - }, - { - "epoch": 34.54, - "learning_rate": 0.0006605669245832145, - "loss": 1.1719, - "step": 171800 - }, - { - "epoch": 34.56, - "learning_rate": 0.0006590200463554366, - "loss": 1.1726, - "step": 171900 - }, - { - "epoch": 34.59, - "learning_rate": 0.0006574743328180105, - "loss": 1.1751, - "step": 172000 - }, - { - "epoch": 34.61, - "learning_rate": 0.0006559297870171938, - "loss": 1.1714, - "step": 172100 - }, - { - "epoch": 34.63, - "learning_rate": 0.0006543864119969438, - "loss": 1.1752, - "step": 172200 - }, - { - "epoch": 34.65, - "learning_rate": 0.0006528442107989105, - "loss": 1.1756, - "step": 172300 - }, - { - "epoch": 34.67, - "learning_rate": 0.0006513031864624303, - "loss": 1.1723, - "step": 172400 - }, - { - "epoch": 34.69, - "learning_rate": 0.0006497633420245197, - "loss": 1.1723, - "step": 172500 - }, - { - "epoch": 34.71, - "learning_rate": 0.0006482246805198708, - "loss": 1.1735, - "step": 172600 - }, - { - "epoch": 34.73, - "learning_rate": 0.0006466872049808438, - "loss": 1.1732, - "step": 172700 - }, - { - "epoch": 34.75, - "learning_rate": 0.0006451509184374624, - "loss": 1.1732, - "step": 172800 - }, - { - "epoch": 34.77, - "learning_rate": 0.0006436158239174055, - "loss": 1.1751, - "step": 172900 - }, - { - "epoch": 34.79, - "learning_rate": 0.0006420819244460042, - "loss": 1.1721, - "step": 173000 - }, - { - "epoch": 34.81, - "learning_rate": 0.0006405492230462343, - "loss": 1.1709, - "step": 173100 - }, - { - "epoch": 34.83, - "learning_rate": 0.0006390177227387101, - "loss": 1.1696, - "step": 173200 - }, - { - "epoch": 34.85, - "learning_rate": 0.0006374874265416783, - "loss": 1.17, - "step": 173300 - }, - { - "epoch": 34.87, - "learning_rate": 0.0006359583374710134, - "loss": 1.1774, - "step": 173400 - }, - { - "epoch": 34.89, - "learning_rate": 0.0006344304585402111, - "loss": 1.1666, - "step": 173500 - }, - { - "epoch": 34.91, - "learning_rate": 0.0006329037927603816, - "loss": 1.1701, - "step": 173600 - }, - { - "epoch": 34.93, - "learning_rate": 0.0006313783431402438, - "loss": 1.1708, - "step": 173700 - }, - { - "epoch": 34.95, - "learning_rate": 0.0006298541126861209, - "loss": 1.1724, - "step": 173800 - }, - { - "epoch": 34.97, - "learning_rate": 0.0006283311044019327, - "loss": 1.1727, - "step": 173900 - }, - { - "epoch": 34.99, - "learning_rate": 0.0006268093212891912, - "loss": 1.1685, - "step": 174000 - }, - { - "epoch": 35.0, - "eval_accuracy": 0.41499333743989697, - "eval_loss": 1.1372462511062622, - "eval_runtime": 19.9974, - "eval_samples_per_second": 3978.919, - "eval_steps_per_second": 15.552, - "step": 174063 - }, - { - "epoch": 35.01, - "learning_rate": 0.0006253039658071285, - "loss": 1.1616, - "step": 174100 - }, - { - "epoch": 35.03, - "learning_rate": 0.0006237846297056513, - "loss": 1.1594, - "step": 174200 - }, - { - "epoch": 35.05, - "learning_rate": 0.0006222665277357129, - "loss": 1.1619, - "step": 174300 - }, - { - "epoch": 35.07, - "learning_rate": 0.0006207496628891555, - "loss": 1.1655, - "step": 174400 - }, - { - "epoch": 35.09, - "learning_rate": 0.0006192340381553838, - "loss": 1.1588, - "step": 174500 - }, - { - "epoch": 35.11, - "learning_rate": 0.0006177196565213567, - "loss": 1.1639, - "step": 174600 - }, - { - "epoch": 35.13, - "learning_rate": 0.0006162065209715849, - "loss": 1.1594, - "step": 174700 - }, - { - "epoch": 35.15, - "learning_rate": 0.0006146946344881228, - "loss": 1.164, - "step": 174800 - }, - { - "epoch": 35.17, - "learning_rate": 0.0006131840000505637, - "loss": 1.1602, - "step": 174900 - }, - { - "epoch": 35.19, - "learning_rate": 0.0006116746206360317, - "loss": 1.1629, - "step": 175000 - }, - { - "epoch": 35.21, - "learning_rate": 0.0006101664992191795, - "loss": 1.1656, - "step": 175100 - }, - { - "epoch": 35.23, - "learning_rate": 0.0006086596387721796, - "loss": 1.166, - "step": 175200 - }, - { - "epoch": 35.25, - "learning_rate": 0.0006071540422647201, - "loss": 1.1632, - "step": 175300 - }, - { - "epoch": 35.27, - "learning_rate": 0.0006056497126639966, - "loss": 1.1649, - "step": 175400 - }, - { - "epoch": 35.29, - "learning_rate": 0.0006041466529347094, - "loss": 1.1621, - "step": 175500 - }, - { - "epoch": 35.31, - "learning_rate": 0.0006026448660390557, - "loss": 1.1657, - "step": 175600 - }, - { - "epoch": 35.33, - "learning_rate": 0.0006011443549367248, - "loss": 1.1663, - "step": 175700 - }, - { - "epoch": 35.35, - "learning_rate": 0.0005996451225848903, - "loss": 1.1682, - "step": 175800 - }, - { - "epoch": 35.37, - "learning_rate": 0.0005981471719382066, - "loss": 1.1618, - "step": 175900 - }, - { - "epoch": 35.39, - "learning_rate": 0.0005966505059488022, - "loss": 1.1687, - "step": 176000 - }, - { - "epoch": 35.41, - "learning_rate": 0.0005951551275662743, - "loss": 1.162, - "step": 176100 - }, - { - "epoch": 35.43, - "learning_rate": 0.0005936610397376806, - "loss": 1.1656, - "step": 176200 - }, - { - "epoch": 35.45, - "learning_rate": 0.0005921682454075374, - "loss": 1.1663, - "step": 176300 - }, - { - "epoch": 35.47, - "learning_rate": 0.0005906767475178108, - "loss": 1.163, - "step": 176400 - }, - { - "epoch": 35.49, - "learning_rate": 0.0005891865490079131, - "loss": 1.1663, - "step": 176500 - }, - { - "epoch": 35.51, - "learning_rate": 0.0005876976528146937, - "loss": 1.1646, - "step": 176600 - }, - { - "epoch": 35.53, - "learning_rate": 0.000586210061872437, - "loss": 1.1609, - "step": 176700 - }, - { - "epoch": 35.55, - "learning_rate": 0.0005847237791128547, - "loss": 1.1629, - "step": 176800 - }, - { - "epoch": 35.57, - "learning_rate": 0.0005832388074650808, - "loss": 1.1615, - "step": 176900 - }, - { - "epoch": 35.59, - "learning_rate": 0.0005817551498556642, - "loss": 1.1688, - "step": 177000 - }, - { - "epoch": 35.61, - "learning_rate": 0.0005802728092085649, - "loss": 1.1621, - "step": 177100 - }, - { - "epoch": 35.63, - "learning_rate": 0.0005787917884451475, - "loss": 1.1638, - "step": 177200 - }, - { - "epoch": 35.65, - "learning_rate": 0.000577312090484176, - "loss": 1.1656, - "step": 177300 - }, - { - "epoch": 35.67, - "learning_rate": 0.0005758337182418055, - "loss": 1.1625, - "step": 177400 - }, - { - "epoch": 35.69, - "learning_rate": 0.0005743566746315804, - "loss": 1.1613, - "step": 177500 - }, - { - "epoch": 35.71, - "learning_rate": 0.0005728809625644257, - "loss": 1.167, - "step": 177600 - }, - { - "epoch": 35.73, - "learning_rate": 0.0005714065849486429, - "loss": 1.1632, - "step": 177700 - }, - { - "epoch": 35.75, - "learning_rate": 0.0005699335446899022, - "loss": 1.1662, - "step": 177800 - }, - { - "epoch": 35.77, - "learning_rate": 0.0005684618446912396, - "loss": 1.1628, - "step": 177900 - }, - { - "epoch": 35.79, - "learning_rate": 0.0005669914878530493, - "loss": 1.1689, - "step": 178000 - }, - { - "epoch": 35.81, - "learning_rate": 0.0005655224770730786, - "loss": 1.1652, - "step": 178100 - }, - { - "epoch": 35.83, - "learning_rate": 0.0005640694851778694, - "loss": 1.1631, - "step": 178200 - }, - { - "epoch": 35.85, - "learning_rate": 0.0005626031616641948, - "loss": 1.1611, - "step": 178300 - }, - { - "epoch": 35.87, - "learning_rate": 0.0005611381928571567, - "loss": 1.1598, - "step": 178400 - }, - { - "epoch": 35.89, - "learning_rate": 0.0005596745816438834, - "loss": 1.1639, - "step": 178500 - }, - { - "epoch": 35.91, - "learning_rate": 0.0005582123309088284, - "loss": 1.1606, - "step": 178600 - }, - { - "epoch": 35.93, - "learning_rate": 0.0005567514435337624, - "loss": 1.1622, - "step": 178700 - }, - { - "epoch": 35.95, - "learning_rate": 0.000555291922397771, - "loss": 1.162, - "step": 178800 - }, - { - "epoch": 35.97, - "learning_rate": 0.000553833770377246, - "loss": 1.164, - "step": 178900 - }, - { - "epoch": 35.99, - "learning_rate": 0.0005523769903458824, - "loss": 1.1658, - "step": 179000 - }, - { - "epoch": 36.0, - "eval_accuracy": 0.4156597525371182, - "eval_loss": 1.1301237344741821, - "eval_runtime": 19.5705, - "eval_samples_per_second": 4065.719, - "eval_steps_per_second": 15.891, - "step": 179037 - }, - { - "epoch": 36.01, - "learning_rate": 0.0005509215851746693, - "loss": 1.1566, - "step": 179100 - }, - { - "epoch": 36.03, - "learning_rate": 0.0005494675577318875, - "loss": 1.1495, - "step": 179200 - }, - { - "epoch": 36.05, - "learning_rate": 0.0005480149108831029, - "loss": 1.1467, - "step": 179300 - }, - { - "epoch": 36.07, - "learning_rate": 0.0005465636474911603, - "loss": 1.1519, - "step": 179400 - }, - { - "epoch": 36.09, - "learning_rate": 0.000545113770416177, - "loss": 1.1524, - "step": 179500 - }, - { - "epoch": 36.11, - "learning_rate": 0.0005436652825155394, - "loss": 1.1548, - "step": 179600 - }, - { - "epoch": 36.13, - "learning_rate": 0.0005422181866438958, - "loss": 1.1576, - "step": 179700 - }, - { - "epoch": 36.15, - "learning_rate": 0.0005407724856531514, - "loss": 1.1558, - "step": 179800 - }, - { - "epoch": 36.17, - "learning_rate": 0.0005393281823924612, - "loss": 1.1539, - "step": 179900 - }, - { - "epoch": 36.19, - "learning_rate": 0.000537885279708227, - "loss": 1.1562, - "step": 180000 - }, - { - "epoch": 36.21, - "learning_rate": 0.0005364437804440896, - "loss": 1.1551, - "step": 180100 - }, - { - "epoch": 36.23, - "learning_rate": 0.0005350036874409248, - "loss": 1.1554, - "step": 180200 - }, - { - "epoch": 36.25, - "learning_rate": 0.0005335650035368354, - "loss": 1.1547, - "step": 180300 - }, - { - "epoch": 36.27, - "learning_rate": 0.0005321277315671485, - "loss": 1.159, - "step": 180400 - }, - { - "epoch": 36.29, - "learning_rate": 0.0005306918743644085, - "loss": 1.1551, - "step": 180500 - }, - { - "epoch": 36.31, - "learning_rate": 0.0005292574347583714, - "loss": 1.1557, - "step": 180600 - }, - { - "epoch": 36.33, - "learning_rate": 0.0005278244155759988, - "loss": 1.1572, - "step": 180700 - }, - { - "epoch": 36.35, - "learning_rate": 0.0005263928196414538, - "loss": 1.1536, - "step": 180800 - }, - { - "epoch": 36.37, - "learning_rate": 0.0005249626497760943, - "loss": 1.1544, - "step": 180900 - }, - { - "epoch": 36.39, - "learning_rate": 0.0005235339087984682, - "loss": 1.1571, - "step": 181000 - }, - { - "epoch": 36.41, - "learning_rate": 0.0005221065995243063, - "loss": 1.1575, - "step": 181100 - }, - { - "epoch": 36.43, - "learning_rate": 0.0005206807247665185, - "loss": 1.1507, - "step": 181200 - }, - { - "epoch": 36.46, - "learning_rate": 0.0005192562873351877, - "loss": 1.1566, - "step": 181300 - }, - { - "epoch": 36.48, - "learning_rate": 0.0005178332900375647, - "loss": 1.1561, - "step": 181400 - }, - { - "epoch": 36.5, - "learning_rate": 0.0005164117356780603, - "loss": 1.1568, - "step": 181500 - }, - { - "epoch": 36.52, - "learning_rate": 0.000514991627058243, - "loss": 1.1599, - "step": 181600 - }, - { - "epoch": 36.54, - "learning_rate": 0.0005135729669768321, - "loss": 1.1601, - "step": 181700 - }, - { - "epoch": 36.56, - "learning_rate": 0.0005121557582296915, - "loss": 1.1561, - "step": 181800 - }, - { - "epoch": 36.58, - "learning_rate": 0.0005107400036098259, - "loss": 1.1579, - "step": 181900 - }, - { - "epoch": 36.6, - "learning_rate": 0.0005093257059073723, - "loss": 1.1532, - "step": 182000 - }, - { - "epoch": 36.62, - "learning_rate": 0.0005079128679095983, - "loss": 1.159, - "step": 182100 - }, - { - "epoch": 36.64, - "learning_rate": 0.0005065014924008942, - "loss": 1.1549, - "step": 182200 - }, - { - "epoch": 36.66, - "learning_rate": 0.0005051056740029337, - "loss": 1.1537, - "step": 182300 - }, - { - "epoch": 36.68, - "learning_rate": 0.000503697217119769, - "loss": 1.1526, - "step": 182400 - }, - { - "epoch": 36.7, - "learning_rate": 0.0005022902310337858, - "loss": 1.1616, - "step": 182500 - }, - { - "epoch": 36.72, - "learning_rate": 0.000500884718517842, - "loss": 1.1551, - "step": 182600 - }, - { - "epoch": 36.74, - "learning_rate": 0.0004994806823418908, - "loss": 1.1537, - "step": 182700 - }, - { - "epoch": 36.76, - "learning_rate": 0.0004980781252729766, - "loss": 1.1562, - "step": 182800 - }, - { - "epoch": 36.78, - "learning_rate": 0.000496677050075227, - "loss": 1.1569, - "step": 182900 - }, - { - "epoch": 36.8, - "learning_rate": 0.000495277459509851, - "loss": 1.1549, - "step": 183000 - }, - { - "epoch": 36.82, - "learning_rate": 0.0004938793563351308, - "loss": 1.1525, - "step": 183100 - }, - { - "epoch": 36.84, - "learning_rate": 0.0004924827433064183, - "loss": 1.1588, - "step": 183200 - }, - { - "epoch": 36.86, - "learning_rate": 0.0004910876231761266, - "loss": 1.1492, - "step": 183300 - }, - { - "epoch": 36.88, - "learning_rate": 0.0004896939986937287, - "loss": 1.1494, - "step": 183400 - }, - { - "epoch": 36.9, - "learning_rate": 0.000488301872605749, - "loss": 1.1526, - "step": 183500 - }, - { - "epoch": 36.92, - "learning_rate": 0.00048691124765575953, - "loss": 1.1514, - "step": 183600 - }, - { - "epoch": 36.94, - "learning_rate": 0.0004855221265843726, - "loss": 1.1562, - "step": 183700 - }, - { - "epoch": 36.96, - "learning_rate": 0.00048413451212923826, - "loss": 1.1559, - "step": 183800 - }, - { - "epoch": 36.98, - "learning_rate": 0.0004827622605958196, - "loss": 1.1564, - "step": 183900 - }, - { - "epoch": 37.0, - "learning_rate": 0.0004813776524399174, - "loss": 1.1529, - "step": 184000 - }, - { - "epoch": 37.0, - "eval_accuracy": 0.416447232778523, - "eval_loss": 1.121274709701538, - "eval_runtime": 19.5197, - "eval_samples_per_second": 4076.295, - "eval_steps_per_second": 15.933, - "step": 184010 - }, - { - "epoch": 37.02, - "learning_rate": 0.0004799945590681072, - "loss": 1.1429, - "step": 184100 - }, - { - "epoch": 37.04, - "learning_rate": 0.0004786129832061578, - "loss": 1.143, - "step": 184200 - }, - { - "epoch": 37.06, - "learning_rate": 0.00047723292757684944, - "loss": 1.1453, - "step": 184300 - }, - { - "epoch": 37.08, - "learning_rate": 0.00047585439489996554, - "loss": 1.1486, - "step": 184400 - }, - { - "epoch": 37.1, - "learning_rate": 0.0004744773878922883, - "loss": 1.1424, - "step": 184500 - }, - { - "epoch": 37.12, - "learning_rate": 0.0004731019092675921, - "loss": 1.1469, - "step": 184600 - }, - { - "epoch": 37.14, - "learning_rate": 0.00047172796173664076, - "loss": 1.1446, - "step": 184700 - }, - { - "epoch": 37.16, - "learning_rate": 0.0004703555480071799, - "loss": 1.1482, - "step": 184800 - }, - { - "epoch": 37.18, - "learning_rate": 0.00046898467078393294, - "loss": 1.1476, - "step": 184900 - }, - { - "epoch": 37.2, - "learning_rate": 0.00046761533276859366, - "loss": 1.1463, - "step": 185000 - }, - { - "epoch": 37.22, - "learning_rate": 0.0004662475366598239, - "loss": 1.1455, - "step": 185100 - }, - { - "epoch": 37.24, - "learning_rate": 0.00046488128515324634, - "loss": 1.1469, - "step": 185200 - }, - { - "epoch": 37.26, - "learning_rate": 0.00046351658094144005, - "loss": 1.1439, - "step": 185300 - }, - { - "epoch": 37.28, - "learning_rate": 0.0004621534267139332, - "loss": 1.1458, - "step": 185400 - }, - { - "epoch": 37.3, - "learning_rate": 0.00046079182515720076, - "loss": 1.1454, - "step": 185500 - }, - { - "epoch": 37.32, - "learning_rate": 0.00045943177895465734, - "loss": 1.1487, - "step": 185600 - }, - { - "epoch": 37.34, - "learning_rate": 0.0004580732907866525, - "loss": 1.1434, - "step": 185700 - }, - { - "epoch": 37.36, - "learning_rate": 0.00045671636333046426, - "loss": 1.152, - "step": 185800 - }, - { - "epoch": 37.38, - "learning_rate": 0.00045536099926029585, - "loss": 1.1452, - "step": 185900 - }, - { - "epoch": 37.4, - "learning_rate": 0.00045400720124726915, - "loss": 1.1455, - "step": 186000 - }, - { - "epoch": 37.42, - "learning_rate": 0.00045265497195942, - "loss": 1.146, - "step": 186100 - }, - { - "epoch": 37.44, - "learning_rate": 0.00045130431406169156, - "loss": 1.1436, - "step": 186200 - }, - { - "epoch": 37.46, - "learning_rate": 0.00044995523021593126, - "loss": 1.1471, - "step": 186300 - }, - { - "epoch": 37.48, - "learning_rate": 0.0004486077230808838, - "loss": 1.1461, - "step": 186400 - }, - { - "epoch": 37.5, - "learning_rate": 0.00044726179531218727, - "loss": 1.1437, - "step": 186500 - }, - { - "epoch": 37.52, - "learning_rate": 0.00044591744956236595, - "loss": 1.1465, - "step": 186600 - }, - { - "epoch": 37.54, - "learning_rate": 0.00044457468848082713, - "loss": 1.1502, - "step": 186700 - }, - { - "epoch": 37.56, - "learning_rate": 0.00044323351471385527, - "loss": 1.1474, - "step": 186800 - }, - { - "epoch": 37.58, - "learning_rate": 0.0004418939309046065, - "loss": 1.1465, - "step": 186900 - }, - { - "epoch": 37.6, - "learning_rate": 0.00044056931171319787, - "loss": 1.1458, - "step": 187000 - }, - { - "epoch": 37.62, - "learning_rate": 0.0004392328997709341, - "loss": 1.1453, - "step": 187100 - }, - { - "epoch": 37.64, - "learning_rate": 0.00043789808567071725, - "loss": 1.1459, - "step": 187200 - }, - { - "epoch": 37.66, - "learning_rate": 0.0004365648720431699, - "loss": 1.144, - "step": 187300 - }, - { - "epoch": 37.68, - "learning_rate": 0.0004352332615157606, - "loss": 1.1478, - "step": 187400 - }, - { - "epoch": 37.7, - "learning_rate": 0.00043390325671279736, - "loss": 1.1437, - "step": 187500 - }, - { - "epoch": 37.72, - "learning_rate": 0.00043257486025542497, - "loss": 1.1467, - "step": 187600 - }, - { - "epoch": 37.74, - "learning_rate": 0.0004312480747616181, - "loss": 1.1486, - "step": 187700 - }, - { - "epoch": 37.76, - "learning_rate": 0.00042992290284617695, - "loss": 1.1459, - "step": 187800 - }, - { - "epoch": 37.78, - "learning_rate": 0.00042859934712072045, - "loss": 1.1466, - "step": 187900 - }, - { - "epoch": 37.8, - "learning_rate": 0.00042727741019368354, - "loss": 1.147, - "step": 188000 - }, - { - "epoch": 37.82, - "learning_rate": 0.0004259570946703109, - "loss": 1.1458, - "step": 188100 - }, - { - "epoch": 37.84, - "learning_rate": 0.00042463840315265153, - "loss": 1.1443, - "step": 188200 - }, - { - "epoch": 37.86, - "learning_rate": 0.00042332133823955317, - "loss": 1.1469, - "step": 188300 - }, - { - "epoch": 37.88, - "learning_rate": 0.00042200590252665886, - "loss": 1.1453, - "step": 188400 - }, - { - "epoch": 37.9, - "learning_rate": 0.0004206920986064004, - "loss": 1.1479, - "step": 188500 - }, - { - "epoch": 37.92, - "learning_rate": 0.00041937992906799436, - "loss": 1.1438, - "step": 188600 - }, - { - "epoch": 37.94, - "learning_rate": 0.00041806939649743484, - "loss": 1.1443, - "step": 188700 - }, - { - "epoch": 37.96, - "learning_rate": 0.00041676050347749116, - "loss": 1.1442, - "step": 188800 - }, - { - "epoch": 37.98, - "learning_rate": 0.00041545325258770095, - "loss": 1.1463, - "step": 188900 - }, - { - "epoch": 38.0, - "eval_accuracy": 0.41712748843756287, - "eval_loss": 1.113813042640686, - "eval_runtime": 19.8226, - "eval_samples_per_second": 4013.998, - "eval_steps_per_second": 15.689, - "step": 188983 - }, - { - "epoch": 38.0, - "learning_rate": 0.00041414764640436614, - "loss": 1.1435, - "step": 189000 - }, - { - "epoch": 38.02, - "learning_rate": 0.0004128436875005459, - "loss": 1.1341, - "step": 189100 - }, - { - "epoch": 38.04, - "learning_rate": 0.0004115413784460545, - "loss": 1.1316, - "step": 189200 - }, - { - "epoch": 38.06, - "learning_rate": 0.00041024072180745373, - "loss": 1.1341, - "step": 189300 - }, - { - "epoch": 38.08, - "learning_rate": 0.00040894172014805, - "loss": 1.1296, - "step": 189400 - }, - { - "epoch": 38.1, - "learning_rate": 0.00040764437602788627, - "loss": 1.1366, - "step": 189500 - }, - { - "epoch": 38.12, - "learning_rate": 0.00040634869200374056, - "loss": 1.1403, - "step": 189600 - }, - { - "epoch": 38.14, - "learning_rate": 0.00040505467062911833, - "loss": 1.1389, - "step": 189700 - }, - { - "epoch": 38.16, - "learning_rate": 0.00040376231445424904, - "loss": 1.1316, - "step": 189800 - }, - { - "epoch": 38.18, - "learning_rate": 0.00040247162602607923, - "loss": 1.1382, - "step": 189900 - }, - { - "epoch": 38.2, - "learning_rate": 0.00040118260788826965, - "loss": 1.1355, - "step": 190000 - }, - { - "epoch": 38.22, - "learning_rate": 0.00039989526258118915, - "loss": 1.1365, - "step": 190100 - }, - { - "epoch": 38.24, - "learning_rate": 0.00039860959264191007, - "loss": 1.136, - "step": 190200 - }, - { - "epoch": 38.26, - "learning_rate": 0.0003973256006042017, - "loss": 1.1405, - "step": 190300 - }, - { - "epoch": 38.28, - "learning_rate": 0.00039604328899852786, - "loss": 1.1402, - "step": 190400 - }, - { - "epoch": 38.3, - "learning_rate": 0.00039476266035204003, - "loss": 1.1375, - "step": 190500 - }, - { - "epoch": 38.33, - "learning_rate": 0.00039348371718857386, - "loss": 1.1352, - "step": 190600 - }, - { - "epoch": 38.35, - "learning_rate": 0.0003922064620286414, - "loss": 1.1363, - "step": 190700 - }, - { - "epoch": 38.37, - "learning_rate": 0.00039093089738942975, - "loss": 1.1377, - "step": 190800 - }, - { - "epoch": 38.39, - "learning_rate": 0.0003896570257847937, - "loss": 1.1385, - "step": 190900 - }, - { - "epoch": 38.41, - "learning_rate": 0.00038839756308466366, - "loss": 1.1418, - "step": 191000 - }, - { - "epoch": 38.43, - "learning_rate": 0.0003871270680844698, - "loss": 1.1369, - "step": 191100 - }, - { - "epoch": 38.45, - "learning_rate": 0.0003858582736153553, - "loss": 1.1408, - "step": 191200 - }, - { - "epoch": 38.47, - "learning_rate": 0.00038459118217783184, - "loss": 1.1376, - "step": 191300 - }, - { - "epoch": 38.49, - "learning_rate": 0.000383325796269056, - "loss": 1.1401, - "step": 191400 - }, - { - "epoch": 38.51, - "learning_rate": 0.0003820621183828224, - "loss": 1.1371, - "step": 191500 - }, - { - "epoch": 38.53, - "learning_rate": 0.0003808001510095603, - "loss": 1.1346, - "step": 191600 - }, - { - "epoch": 38.55, - "learning_rate": 0.0003795398966363266, - "loss": 1.1439, - "step": 191700 - }, - { - "epoch": 38.57, - "learning_rate": 0.0003782813577468032, - "loss": 1.1426, - "step": 191800 - }, - { - "epoch": 38.59, - "learning_rate": 0.00037702453682129114, - "loss": 1.1383, - "step": 191900 - }, - { - "epoch": 38.61, - "learning_rate": 0.00037576943633670605, - "loss": 1.1362, - "step": 192000 - }, - { - "epoch": 38.63, - "learning_rate": 0.0003745160587665715, - "loss": 1.1356, - "step": 192100 - }, - { - "epoch": 38.65, - "learning_rate": 0.000373264406581017, - "loss": 1.1333, - "step": 192200 - }, - { - "epoch": 38.67, - "learning_rate": 0.00037201448224677153, - "loss": 1.14, - "step": 192300 - }, - { - "epoch": 38.69, - "learning_rate": 0.0003707662882271585, - "loss": 1.1349, - "step": 192400 - }, - { - "epoch": 38.71, - "learning_rate": 0.00036951982698209094, - "loss": 1.1371, - "step": 192500 - }, - { - "epoch": 38.73, - "learning_rate": 0.0003682751009680678, - "loss": 1.1391, - "step": 192600 - }, - { - "epoch": 38.75, - "learning_rate": 0.0003670321126381676, - "loss": 1.134, - "step": 192700 - }, - { - "epoch": 38.77, - "learning_rate": 0.000365790864442045, - "loss": 1.1358, - "step": 192800 - }, - { - "epoch": 38.79, - "learning_rate": 0.0003645513588259242, - "loss": 1.1331, - "step": 192900 - }, - { - "epoch": 38.81, - "learning_rate": 0.0003633135982325958, - "loss": 1.1376, - "step": 193000 - }, - { - "epoch": 38.83, - "learning_rate": 0.0003620775851014114, - "loss": 1.1353, - "step": 193100 - }, - { - "epoch": 38.85, - "learning_rate": 0.00036084332186827895, - "loss": 1.1376, - "step": 193200 - }, - { - "epoch": 38.87, - "learning_rate": 0.0003596231273926687, - "loss": 1.1346, - "step": 193300 - }, - { - "epoch": 38.89, - "learning_rate": 0.0003583923536899551, - "loss": 1.1375, - "step": 193400 - }, - { - "epoch": 38.91, - "learning_rate": 0.0003571633371480666, - "loss": 1.1387, - "step": 193500 - }, - { - "epoch": 38.93, - "learning_rate": 0.0003559360801891219, - "loss": 1.1375, - "step": 193600 - }, - { - "epoch": 38.95, - "learning_rate": 0.00035471058523177265, - "loss": 1.1357, - "step": 193700 - }, - { - "epoch": 38.97, - "learning_rate": 0.00035348685469119664, - "loss": 1.136, - "step": 193800 - }, - { - "epoch": 38.99, - "learning_rate": 0.0003522648909790957, - "loss": 1.1352, - "step": 193900 - }, - { - "epoch": 39.0, - "eval_accuracy": 0.41790908246302183, - "eval_loss": 1.1047524213790894, - "eval_runtime": 19.8895, - "eval_samples_per_second": 4000.506, - "eval_steps_per_second": 15.636, - "step": 193956 - }, - { - "epoch": 39.01, - "learning_rate": 0.000351044696503689, - "loss": 1.1315, - "step": 194000 - }, - { - "epoch": 39.03, - "learning_rate": 0.0003498262736697096, - "loss": 1.1213, - "step": 194100 - }, - { - "epoch": 39.05, - "learning_rate": 0.00034860962487839767, - "loss": 1.126, - "step": 194200 - }, - { - "epoch": 39.07, - "learning_rate": 0.00034739475252749854, - "loss": 1.125, - "step": 194300 - }, - { - "epoch": 39.09, - "learning_rate": 0.0003461816590112558, - "loss": 1.1259, - "step": 194400 - }, - { - "epoch": 39.11, - "learning_rate": 0.00034497034672040824, - "loss": 1.1266, - "step": 194500 - }, - { - "epoch": 39.13, - "learning_rate": 0.00034376081804218263, - "loss": 1.1286, - "step": 194600 - }, - { - "epoch": 39.15, - "learning_rate": 0.00034255307536029206, - "loss": 1.1286, - "step": 194700 - }, - { - "epoch": 39.17, - "learning_rate": 0.00034134712105492946, - "loss": 1.1294, - "step": 194800 - }, - { - "epoch": 39.19, - "learning_rate": 0.00034014295750276326, - "loss": 1.1256, - "step": 194900 - }, - { - "epoch": 39.21, - "learning_rate": 0.0003389405870769323, - "loss": 1.1315, - "step": 195000 - }, - { - "epoch": 39.23, - "learning_rate": 0.00033774001214704184, - "loss": 1.1328, - "step": 195100 - }, - { - "epoch": 39.25, - "learning_rate": 0.00033654123507915887, - "loss": 1.1278, - "step": 195200 - }, - { - "epoch": 39.27, - "learning_rate": 0.0003353562190853805, - "loss": 1.1284, - "step": 195300 - }, - { - "epoch": 39.29, - "learning_rate": 0.00033416102678803547, - "loss": 1.1292, - "step": 195400 - }, - { - "epoch": 39.31, - "learning_rate": 0.00033296763940608335, - "loss": 1.1335, - "step": 195500 - }, - { - "epoch": 39.33, - "learning_rate": 0.0003317760592914265, - "loss": 1.1264, - "step": 195600 - }, - { - "epoch": 39.35, - "learning_rate": 0.00033058628879240477, - "loss": 1.1293, - "step": 195700 - }, - { - "epoch": 39.37, - "learning_rate": 0.0003293983302537923, - "loss": 1.1275, - "step": 195800 - }, - { - "epoch": 39.39, - "learning_rate": 0.00032821218601679066, - "loss": 1.127, - "step": 195900 - }, - { - "epoch": 39.41, - "learning_rate": 0.00032702785841902783, - "loss": 1.1287, - "step": 196000 - }, - { - "epoch": 39.43, - "learning_rate": 0.0003258453497945503, - "loss": 1.1278, - "step": 196100 - }, - { - "epoch": 39.45, - "learning_rate": 0.0003246646624738207, - "loss": 1.1296, - "step": 196200 - }, - { - "epoch": 39.47, - "learning_rate": 0.00032348579878371114, - "loss": 1.1309, - "step": 196300 - }, - { - "epoch": 39.49, - "learning_rate": 0.00032230876104750046, - "loss": 1.1274, - "step": 196400 - }, - { - "epoch": 39.51, - "learning_rate": 0.000321133551584869, - "loss": 1.1295, - "step": 196500 - }, - { - "epoch": 39.53, - "learning_rate": 0.00031996017271189437, - "loss": 1.1271, - "step": 196600 - }, - { - "epoch": 39.55, - "learning_rate": 0.0003187886267410448, - "loss": 1.1254, - "step": 196700 - }, - { - "epoch": 39.57, - "learning_rate": 0.0003176189159811782, - "loss": 1.1303, - "step": 196800 - }, - { - "epoch": 39.59, - "learning_rate": 0.00031645104273753474, - "loss": 1.1273, - "step": 196900 - }, - { - "epoch": 39.61, - "learning_rate": 0.0003152850093117338, - "loss": 1.13, - "step": 197000 - }, - { - "epoch": 39.63, - "learning_rate": 0.0003141208180017678, - "loss": 1.1253, - "step": 197100 - }, - { - "epoch": 39.65, - "learning_rate": 0.00031295847110199976, - "loss": 1.1286, - "step": 197200 - }, - { - "epoch": 39.67, - "learning_rate": 0.0003117979709031578, - "loss": 1.1295, - "step": 197300 - }, - { - "epoch": 39.69, - "learning_rate": 0.0003106393196923304, - "loss": 1.1279, - "step": 197400 - }, - { - "epoch": 39.71, - "learning_rate": 0.00030949407858107237, - "loss": 1.1279, - "step": 197500 - }, - { - "epoch": 39.73, - "learning_rate": 0.00030833911364617274, - "loss": 1.1268, - "step": 197600 - }, - { - "epoch": 39.75, - "learning_rate": 0.0003071860045159272, - "loss": 1.1229, - "step": 197700 - }, - { - "epoch": 39.77, - "learning_rate": 0.00030603475346285824, - "loss": 1.1263, - "step": 197800 - }, - { - "epoch": 39.79, - "learning_rate": 0.0003048853627558264, - "loss": 1.1254, - "step": 197900 - }, - { - "epoch": 39.81, - "learning_rate": 0.0003037378346600259, - "loss": 1.1294, - "step": 198000 - }, - { - "epoch": 39.83, - "learning_rate": 0.0003025921714369792, - "loss": 1.1278, - "step": 198100 - }, - { - "epoch": 39.85, - "learning_rate": 0.0003014483753445349, - "loss": 1.1253, - "step": 198200 - }, - { - "epoch": 39.87, - "learning_rate": 0.000300306448636861, - "loss": 1.1272, - "step": 198300 - }, - { - "epoch": 39.89, - "learning_rate": 0.0002991663935644423, - "loss": 1.1225, - "step": 198400 - }, - { - "epoch": 39.91, - "learning_rate": 0.0002980282123740735, - "loss": 1.1233, - "step": 198500 - }, - { - "epoch": 39.93, - "learning_rate": 0.00029689190730885747, - "loss": 1.1229, - "step": 198600 - }, - { - "epoch": 39.95, - "learning_rate": 0.00029575748060819946, - "loss": 1.1273, - "step": 198700 - }, - { - "epoch": 39.97, - "learning_rate": 0.00029462493450780307, - "loss": 1.1271, - "step": 198800 - }, - { - "epoch": 39.99, - "learning_rate": 0.0002934942712396647, - "loss": 1.1259, - "step": 198900 - }, - { - "epoch": 40.0, - "eval_accuracy": 0.41850995375025135, - "eval_loss": 1.096311330795288, - "eval_runtime": 19.5694, - "eval_samples_per_second": 4065.946, - "eval_steps_per_second": 15.892, - "step": 198930 - }, - { - "epoch": 40.01, - "learning_rate": 0.0002923654930320711, - "loss": 1.116, - "step": 199000 - }, - { - "epoch": 40.03, - "learning_rate": 0.0002912386021095936, - "loss": 1.1166, - "step": 199100 - }, - { - "epoch": 40.05, - "learning_rate": 0.00029011360069308446, - "loss": 1.1156, - "step": 199200 - }, - { - "epoch": 40.07, - "learning_rate": 0.0002889904909996713, - "loss": 1.1193, - "step": 199300 - }, - { - "epoch": 40.09, - "learning_rate": 0.0002878692752427543, - "loss": 1.1196, - "step": 199400 - }, - { - "epoch": 40.11, - "learning_rate": 0.0002867611394349388, - "loss": 1.123, - "step": 199500 - }, - { - "epoch": 40.13, - "learning_rate": 0.0002856436991818517, - "loss": 1.117, - "step": 199600 - }, - { - "epoch": 40.15, - "learning_rate": 0.00028452815946104537, - "loss": 1.1189, - "step": 199700 - }, - { - "epoch": 40.17, - "learning_rate": 0.00028341452247099996, - "loss": 1.1162, - "step": 199800 - }, - { - "epoch": 40.2, - "learning_rate": 0.0002823027904064475, - "loss": 1.1149, - "step": 199900 - }, - { - "epoch": 40.22, - "learning_rate": 0.00028119296545836515, - "loss": 1.1151, - "step": 200000 - }, - { - "epoch": 40.24, - "learning_rate": 0.000280085049813972, - "loss": 1.1168, - "step": 200100 - }, - { - "epoch": 40.26, - "learning_rate": 0.00027897904565672314, - "loss": 1.1182, - "step": 200200 - }, - { - "epoch": 40.28, - "learning_rate": 0.00027787495516630776, - "loss": 1.1149, - "step": 200300 - }, - { - "epoch": 40.3, - "learning_rate": 0.0002767727805186432, - "loss": 1.1155, - "step": 200400 - }, - { - "epoch": 40.32, - "learning_rate": 0.0002756725238858715, - "loss": 1.1182, - "step": 200500 - }, - { - "epoch": 40.34, - "learning_rate": 0.00027457418743635374, - "loss": 1.1201, - "step": 200600 - }, - { - "epoch": 40.36, - "learning_rate": 0.00027347777333466746, - "loss": 1.1188, - "step": 200700 - }, - { - "epoch": 40.38, - "learning_rate": 0.0002723832837416017, - "loss": 1.1222, - "step": 200800 - }, - { - "epoch": 40.4, - "learning_rate": 0.0002712907208141528, - "loss": 1.1186, - "step": 200900 - }, - { - "epoch": 40.42, - "learning_rate": 0.00027020008670551935, - "loss": 1.121, - "step": 201000 - }, - { - "epoch": 40.44, - "learning_rate": 0.0002691113835650995, - "loss": 1.1172, - "step": 201100 - }, - { - "epoch": 40.46, - "learning_rate": 0.0002680246135384853, - "loss": 1.1223, - "step": 201200 - }, - { - "epoch": 40.48, - "learning_rate": 0.00026693977876746, - "loss": 1.118, - "step": 201300 - }, - { - "epoch": 40.5, - "learning_rate": 0.0002658568813899914, - "loss": 1.1155, - "step": 201400 - }, - { - "epoch": 40.52, - "learning_rate": 0.00026478672351106777, - "loss": 1.1191, - "step": 201500 - }, - { - "epoch": 40.54, - "learning_rate": 0.00026370768789222914, - "loss": 1.1167, - "step": 201600 - }, - { - "epoch": 40.56, - "learning_rate": 0.0002626305960366815, - "loss": 1.1194, - "step": 201700 - }, - { - "epoch": 40.58, - "learning_rate": 0.00026155545006713424, - "loss": 1.1171, - "step": 201800 - }, - { - "epoch": 40.6, - "learning_rate": 0.00026048225210246063, - "loss": 1.1199, - "step": 201900 - }, - { - "epoch": 40.62, - "learning_rate": 0.0002594110042576962, - "loss": 1.1161, - "step": 202000 - }, - { - "epoch": 40.64, - "learning_rate": 0.00025834170864403287, - "loss": 1.1146, - "step": 202100 - }, - { - "epoch": 40.66, - "learning_rate": 0.00025727436736881505, - "loss": 1.117, - "step": 202200 - }, - { - "epoch": 40.68, - "learning_rate": 0.00025620898253553515, - "loss": 1.1194, - "step": 202300 - }, - { - "epoch": 40.7, - "learning_rate": 0.00025514555624383053, - "loss": 1.1188, - "step": 202400 - }, - { - "epoch": 40.72, - "learning_rate": 0.0002540840905894784, - "loss": 1.1166, - "step": 202500 - }, - { - "epoch": 40.74, - "learning_rate": 0.0002530245876643923, - "loss": 1.1188, - "step": 202600 - }, - { - "epoch": 40.76, - "learning_rate": 0.0002519670495566169, - "loss": 1.1191, - "step": 202700 - }, - { - "epoch": 40.78, - "learning_rate": 0.00025091147835032526, - "loss": 1.1151, - "step": 202800 - }, - { - "epoch": 40.8, - "learning_rate": 0.00024985787612581423, - "loss": 1.1194, - "step": 202900 - }, - { - "epoch": 40.82, - "learning_rate": 0.00024880624495950024, - "loss": 1.1167, - "step": 203000 - }, - { - "epoch": 40.84, - "learning_rate": 0.00024775658692391416, - "loss": 1.1208, - "step": 203100 - }, - { - "epoch": 40.86, - "learning_rate": 0.0002467089040876995, - "loss": 1.1195, - "step": 203200 - }, - { - "epoch": 40.88, - "learning_rate": 0.00024566319851560675, - "loss": 1.1142, - "step": 203300 - }, - { - "epoch": 40.9, - "learning_rate": 0.00024461947226848984, - "loss": 1.1159, - "step": 203400 - }, - { - "epoch": 40.92, - "learning_rate": 0.00024357772740330076, - "loss": 1.1104, - "step": 203500 - }, - { - "epoch": 40.94, - "learning_rate": 0.00024254835376265563, - "loss": 1.1169, - "step": 203600 - }, - { - "epoch": 40.96, - "learning_rate": 0.00024151055795158666, - "loss": 1.1174, - "step": 203700 - }, - { - "epoch": 40.98, - "learning_rate": 0.00024047474964942626, - "loss": 1.1196, - "step": 203800 - }, - { - "epoch": 41.0, - "learning_rate": 0.00023944093089752302, - "loss": 1.1194, - "step": 203900 - }, - { - "epoch": 41.0, - "eval_accuracy": 0.41941595374516055, - "eval_loss": 1.0883480310440063, - "eval_runtime": 19.8865, - "eval_samples_per_second": 4001.112, - "eval_steps_per_second": 15.639, - "step": 203903 - }, - { - "epoch": 41.02, - "learning_rate": 0.00023840910373330374, - "loss": 1.1042, - "step": 204000 - }, - { - "epoch": 41.04, - "learning_rate": 0.00023737927019027105, - "loss": 1.1048, - "step": 204100 - }, - { - "epoch": 41.06, - "learning_rate": 0.00023635143229799844, - "loss": 1.1014, - "step": 204200 - }, - { - "epoch": 41.08, - "learning_rate": 0.0002353255920821265, - "loss": 1.1069, - "step": 204300 - }, - { - "epoch": 41.1, - "learning_rate": 0.0002343017515643582, - "loss": 1.107, - "step": 204400 - }, - { - "epoch": 41.12, - "learning_rate": 0.00023327991276245617, - "loss": 1.1049, - "step": 204500 - }, - { - "epoch": 41.14, - "learning_rate": 0.00023226007769023795, - "loss": 1.106, - "step": 204600 - }, - { - "epoch": 41.16, - "learning_rate": 0.00023124224835757254, - "loss": 1.109, - "step": 204700 - }, - { - "epoch": 41.18, - "learning_rate": 0.00023022642677037505, - "loss": 1.1107, - "step": 204800 - }, - { - "epoch": 41.2, - "learning_rate": 0.0002292126149306048, - "loss": 1.1064, - "step": 204900 - }, - { - "epoch": 41.22, - "learning_rate": 0.00022820081483625993, - "loss": 1.1077, - "step": 205000 - }, - { - "epoch": 41.24, - "learning_rate": 0.00022719102848137426, - "loss": 1.1066, - "step": 205100 - }, - { - "epoch": 41.26, - "learning_rate": 0.0002261832578560119, - "loss": 1.1104, - "step": 205200 - }, - { - "epoch": 41.28, - "learning_rate": 0.00022517750494626537, - "loss": 1.1109, - "step": 205300 - }, - { - "epoch": 41.3, - "learning_rate": 0.00022417377173425068, - "loss": 1.1095, - "step": 205400 - }, - { - "epoch": 41.32, - "learning_rate": 0.00022317206019810355, - "loss": 1.1118, - "step": 205500 - }, - { - "epoch": 41.34, - "learning_rate": 0.00022218235916729556, - "loss": 1.1085, - "step": 205600 - }, - { - "epoch": 41.36, - "learning_rate": 0.00022118467663540713, - "loss": 1.1091, - "step": 205700 - }, - { - "epoch": 41.38, - "learning_rate": 0.00022018902167022874, - "loss": 1.1113, - "step": 205800 - }, - { - "epoch": 41.4, - "learning_rate": 0.00021919539623397572, - "loss": 1.1041, - "step": 205900 - }, - { - "epoch": 41.42, - "learning_rate": 0.00021820380228486328, - "loss": 1.1076, - "step": 206000 - }, - { - "epoch": 41.44, - "learning_rate": 0.00021721424177710335, - "loss": 1.1081, - "step": 206100 - }, - { - "epoch": 41.46, - "learning_rate": 0.00021622671666089957, - "loss": 1.1087, - "step": 206200 - }, - { - "epoch": 41.48, - "learning_rate": 0.00021524122888244514, - "loss": 1.1065, - "step": 206300 - }, - { - "epoch": 41.5, - "learning_rate": 0.00021425778038391768, - "loss": 1.1096, - "step": 206400 - }, - { - "epoch": 41.52, - "learning_rate": 0.00021327637310347648, - "loss": 1.1105, - "step": 206500 - }, - { - "epoch": 41.54, - "learning_rate": 0.0002122970089752567, - "loss": 1.109, - "step": 206600 - }, - { - "epoch": 41.56, - "learning_rate": 0.00021131968992936804, - "loss": 1.1114, - "step": 206700 - }, - { - "epoch": 41.58, - "learning_rate": 0.00021034441789188953, - "loss": 1.1096, - "step": 206800 - }, - { - "epoch": 41.6, - "learning_rate": 0.00020937119478486616, - "loss": 1.1161, - "step": 206900 - }, - { - "epoch": 41.62, - "learning_rate": 0.00020840002252630428, - "loss": 1.1108, - "step": 207000 - }, - { - "epoch": 41.64, - "learning_rate": 0.00020743090303016923, - "loss": 1.1092, - "step": 207100 - }, - { - "epoch": 41.66, - "learning_rate": 0.00020646383820638022, - "loss": 1.1069, - "step": 207200 - }, - { - "epoch": 41.68, - "learning_rate": 0.00020549882996080793, - "loss": 1.108, - "step": 207300 - }, - { - "epoch": 41.7, - "learning_rate": 0.0002045358801952689, - "loss": 1.1072, - "step": 207400 - }, - { - "epoch": 41.72, - "learning_rate": 0.00020357499080752366, - "loss": 1.108, - "step": 207500 - }, - { - "epoch": 41.74, - "learning_rate": 0.00020261616369127183, - "loss": 1.1074, - "step": 207600 - }, - { - "epoch": 41.76, - "learning_rate": 0.0002016689581419083, - "loss": 1.1075, - "step": 207700 - }, - { - "epoch": 41.78, - "learning_rate": 0.00020071424056369538, - "loss": 1.1085, - "step": 207800 - }, - { - "epoch": 41.8, - "learning_rate": 0.00019976159089487967, - "loss": 1.1082, - "step": 207900 - }, - { - "epoch": 41.82, - "learning_rate": 0.0001988110110129225, - "loss": 1.1102, - "step": 208000 - }, - { - "epoch": 41.84, - "learning_rate": 0.00019786250279120606, - "loss": 1.1108, - "step": 208100 - }, - { - "epoch": 41.86, - "learning_rate": 0.00019691606809902964, - "loss": 1.108, - "step": 208200 - }, - { - "epoch": 41.88, - "learning_rate": 0.00019597170880160568, - "loss": 1.1053, - "step": 208300 - }, - { - "epoch": 41.9, - "learning_rate": 0.00019502942676005705, - "loss": 1.1093, - "step": 208400 - }, - { - "epoch": 41.92, - "learning_rate": 0.00019408922383141235, - "loss": 1.109, - "step": 208500 - }, - { - "epoch": 41.94, - "learning_rate": 0.0001931511018686033, - "loss": 1.1064, - "step": 208600 - }, - { - "epoch": 41.96, - "learning_rate": 0.0001922150627204597, - "loss": 1.1058, - "step": 208700 - }, - { - "epoch": 41.98, - "learning_rate": 0.00019128110823170717, - "loss": 1.1035, - "step": 208800 - }, - { - "epoch": 42.0, - "eval_accuracy": 0.42008968684058473, - "eval_loss": 1.080112099647522, - "eval_runtime": 19.7792, - "eval_samples_per_second": 4022.819, - "eval_steps_per_second": 15.724, - "step": 208876 - }, - { - "epoch": 42.0, - "learning_rate": 0.00019034924024296282, - "loss": 1.1021, - "step": 208900 - }, - { - "epoch": 42.02, - "learning_rate": 0.00018941946059073192, - "loss": 1.097, - "step": 209000 - }, - { - "epoch": 42.04, - "learning_rate": 0.0001884917711074033, - "loss": 1.0949, - "step": 209100 - }, - { - "epoch": 42.07, - "learning_rate": 0.00018756617362124722, - "loss": 1.094, - "step": 209200 - }, - { - "epoch": 42.09, - "learning_rate": 0.00018664266995641125, - "loss": 1.0974, - "step": 209300 - }, - { - "epoch": 42.11, - "learning_rate": 0.00018572126193291613, - "loss": 1.1003, - "step": 209400 - }, - { - "epoch": 42.13, - "learning_rate": 0.00018480195136665227, - "loss": 1.0985, - "step": 209500 - }, - { - "epoch": 42.15, - "learning_rate": 0.00018388474006937703, - "loss": 1.0984, - "step": 209600 - }, - { - "epoch": 42.17, - "learning_rate": 0.0001829787705446627, - "loss": 1.0988, - "step": 209700 - }, - { - "epoch": 42.19, - "learning_rate": 0.0001820657421663692, - "loss": 1.0996, - "step": 209800 - }, - { - "epoch": 42.21, - "learning_rate": 0.00018115481844952584, - "loss": 1.099, - "step": 209900 - }, - { - "epoch": 42.23, - "learning_rate": 0.00018024600118936058, - "loss": 1.0987, - "step": 210000 - }, - { - "epoch": 42.25, - "learning_rate": 0.0001793392921769506, - "loss": 1.1014, - "step": 210100 - }, - { - "epoch": 42.27, - "learning_rate": 0.0001784346931992184, - "loss": 1.1018, - "step": 210200 - }, - { - "epoch": 42.29, - "learning_rate": 0.0001775322060389281, - "loss": 1.0979, - "step": 210300 - }, - { - "epoch": 42.31, - "learning_rate": 0.000176631832474681, - "loss": 1.1027, - "step": 210400 - }, - { - "epoch": 42.33, - "learning_rate": 0.00017573357428091394, - "loss": 1.0946, - "step": 210500 - }, - { - "epoch": 42.35, - "learning_rate": 0.0001748374332278946, - "loss": 1.0991, - "step": 210600 - }, - { - "epoch": 42.37, - "learning_rate": 0.00017394341108171812, - "loss": 1.1001, - "step": 210700 - }, - { - "epoch": 42.39, - "learning_rate": 0.00017305150960430338, - "loss": 1.0999, - "step": 210800 - }, - { - "epoch": 42.41, - "learning_rate": 0.00017216173055339067, - "loss": 1.1005, - "step": 210900 - }, - { - "epoch": 42.43, - "learning_rate": 0.0001712740756825369, - "loss": 1.0971, - "step": 211000 - }, - { - "epoch": 42.45, - "learning_rate": 0.0001703885467411133, - "loss": 1.1002, - "step": 211100 - }, - { - "epoch": 42.47, - "learning_rate": 0.00016950514547430003, - "loss": 1.1011, - "step": 211200 - }, - { - "epoch": 42.49, - "learning_rate": 0.00016862387362308565, - "loss": 1.1001, - "step": 211300 - }, - { - "epoch": 42.51, - "learning_rate": 0.00016774473292426124, - "loss": 1.0965, - "step": 211400 - }, - { - "epoch": 42.53, - "learning_rate": 0.00016686772511041823, - "loss": 1.0978, - "step": 211500 - }, - { - "epoch": 42.55, - "learning_rate": 0.00016599285190994393, - "loss": 1.098, - "step": 211600 - }, - { - "epoch": 42.57, - "learning_rate": 0.00016512011504701954, - "loss": 1.0967, - "step": 211700 - }, - { - "epoch": 42.59, - "learning_rate": 0.00016425821164064812, - "loss": 1.0975, - "step": 211800 - }, - { - "epoch": 42.61, - "learning_rate": 0.00016338973120231013, - "loss": 1.1008, - "step": 211900 - }, - { - "epoch": 42.63, - "learning_rate": 0.00016252339223169542, - "loss": 1.1004, - "step": 212000 - }, - { - "epoch": 42.65, - "learning_rate": 0.0001616591964361662, - "loss": 1.0986, - "step": 212100 - }, - { - "epoch": 42.67, - "learning_rate": 0.00016079714551885991, - "loss": 1.0978, - "step": 212200 - }, - { - "epoch": 42.69, - "learning_rate": 0.00015993724117868786, - "loss": 1.1004, - "step": 212300 - }, - { - "epoch": 42.71, - "learning_rate": 0.00015907948511033082, - "loss": 1.0987, - "step": 212400 - }, - { - "epoch": 42.73, - "learning_rate": 0.00015822387900423591, - "loss": 1.1042, - "step": 212500 - }, - { - "epoch": 42.75, - "learning_rate": 0.00015737042454661232, - "loss": 1.0976, - "step": 212600 - }, - { - "epoch": 42.77, - "learning_rate": 0.00015652762576620364, - "loss": 1.1006, - "step": 212700 - }, - { - "epoch": 42.79, - "learning_rate": 0.00015567845808881514, - "loss": 1.1024, - "step": 212800 - }, - { - "epoch": 42.81, - "learning_rate": 0.00015483144707635803, - "loss": 1.0976, - "step": 212900 - }, - { - "epoch": 42.83, - "learning_rate": 0.0001539865943981035, - "loss": 1.0966, - "step": 213000 - }, - { - "epoch": 42.85, - "learning_rate": 0.0001531439017190679, - "loss": 1.1029, - "step": 213100 - }, - { - "epoch": 42.87, - "learning_rate": 0.0001523033707000121, - "loss": 1.0952, - "step": 213200 - }, - { - "epoch": 42.89, - "learning_rate": 0.00015146500299743584, - "loss": 1.0991, - "step": 213300 - }, - { - "epoch": 42.91, - "learning_rate": 0.0001506288002635764, - "loss": 1.0977, - "step": 213400 - }, - { - "epoch": 42.93, - "learning_rate": 0.00014979476414640313, - "loss": 1.095, - "step": 213500 - }, - { - "epoch": 42.95, - "learning_rate": 0.00014896289628961654, - "loss": 1.0932, - "step": 213600 - }, - { - "epoch": 42.97, - "learning_rate": 0.00014813319833264339, - "loss": 1.0971, - "step": 213700 - }, - { - "epoch": 42.99, - "learning_rate": 0.00014730567191063472, - "loss": 1.0962, - "step": 213800 - }, - { - "epoch": 43.0, - "eval_accuracy": 0.4207685107173674, - "eval_loss": 1.071601152420044, - "eval_runtime": 19.6492, - "eval_samples_per_second": 4049.421, - "eval_steps_per_second": 15.828, - "step": 213849 - }, - { - "epoch": 43.01, - "learning_rate": 0.00014648031865446083, - "loss": 1.0912, - "step": 213900 - }, - { - "epoch": 43.03, - "learning_rate": 0.00014565714019071016, - "loss": 1.0872, - "step": 214000 - }, - { - "epoch": 43.05, - "learning_rate": 0.00014483613814168474, - "loss": 1.0874, - "step": 214100 - }, - { - "epoch": 43.07, - "learning_rate": 0.0001440173141253978, - "loss": 1.0936, - "step": 214200 - }, - { - "epoch": 43.09, - "learning_rate": 0.00014320066975556919, - "loss": 1.0912, - "step": 214300 - }, - { - "epoch": 43.11, - "learning_rate": 0.000142386206641624, - "loss": 1.0885, - "step": 214400 - }, - { - "epoch": 43.13, - "learning_rate": 0.00014157392638868823, - "loss": 1.0879, - "step": 214500 - }, - { - "epoch": 43.15, - "learning_rate": 0.00014076383059758642, - "loss": 1.0882, - "step": 214600 - }, - { - "epoch": 43.17, - "learning_rate": 0.0001399559208648371, - "loss": 1.0894, - "step": 214700 - }, - { - "epoch": 43.19, - "learning_rate": 0.00013915019878265114, - "loss": 1.0883, - "step": 214800 - }, - { - "epoch": 43.21, - "learning_rate": 0.0001383466659389282, - "loss": 1.0865, - "step": 214900 - }, - { - "epoch": 43.23, - "learning_rate": 0.00013754532391725333, - "loss": 1.0899, - "step": 215000 - }, - { - "epoch": 43.25, - "learning_rate": 0.0001367461742968934, - "loss": 1.09, - "step": 215100 - }, - { - "epoch": 43.27, - "learning_rate": 0.0001359492186527951, - "loss": 1.0892, - "step": 215200 - }, - { - "epoch": 43.29, - "learning_rate": 0.0001351544585555814, - "loss": 1.0893, - "step": 215300 - }, - { - "epoch": 43.31, - "learning_rate": 0.00013436189557154823, - "loss": 1.0895, - "step": 215400 - }, - { - "epoch": 43.33, - "learning_rate": 0.0001335715312626608, - "loss": 1.0932, - "step": 215500 - }, - { - "epoch": 43.35, - "learning_rate": 0.00013278336718655206, - "loss": 1.0897, - "step": 215600 - }, - { - "epoch": 43.37, - "learning_rate": 0.00013199740489651862, - "loss": 1.0899, - "step": 215700 - }, - { - "epoch": 43.39, - "learning_rate": 0.0001312136459415178, - "loss": 1.0911, - "step": 215800 - }, - { - "epoch": 43.41, - "learning_rate": 0.00013043209186616432, - "loss": 1.0859, - "step": 215900 - }, - { - "epoch": 43.43, - "learning_rate": 0.000129652744210728, - "loss": 1.0889, - "step": 216000 - }, - { - "epoch": 43.45, - "learning_rate": 0.00012887560451113, - "loss": 1.0887, - "step": 216100 - }, - { - "epoch": 43.47, - "learning_rate": 0.0001281006742989406, - "loss": 1.0937, - "step": 216200 - }, - { - "epoch": 43.49, - "learning_rate": 0.0001273279551013748, - "loss": 1.0875, - "step": 216300 - }, - { - "epoch": 43.51, - "learning_rate": 0.00012655744844129082, - "loss": 1.0894, - "step": 216400 - }, - { - "epoch": 43.53, - "learning_rate": 0.0001257891558371864, - "loss": 1.088, - "step": 216500 - }, - { - "epoch": 43.55, - "learning_rate": 0.00012502307880319613, - "loss": 1.0907, - "step": 216600 - }, - { - "epoch": 43.57, - "learning_rate": 0.00012426684646913629, - "loss": 1.0884, - "step": 216700 - }, - { - "epoch": 43.59, - "learning_rate": 0.0001235051829070172, - "loss": 1.0873, - "step": 216800 - }, - { - "epoch": 43.61, - "learning_rate": 0.00012274573941621584, - "loss": 1.0884, - "step": 216900 - }, - { - "epoch": 43.63, - "learning_rate": 0.00012198851749342654, - "loss": 1.0917, - "step": 217000 - }, - { - "epoch": 43.65, - "learning_rate": 0.00012123351863096627, - "loss": 1.0934, - "step": 217100 - }, - { - "epoch": 43.67, - "learning_rate": 0.0001204807443167695, - "loss": 1.0904, - "step": 217200 - }, - { - "epoch": 43.69, - "learning_rate": 0.00011973019603438773, - "loss": 1.0906, - "step": 217300 - }, - { - "epoch": 43.71, - "learning_rate": 0.00011898187526298495, - "loss": 1.0889, - "step": 217400 - }, - { - "epoch": 43.73, - "learning_rate": 0.00011823578347733579, - "loss": 1.09, - "step": 217500 - }, - { - "epoch": 43.75, - "learning_rate": 0.00011749192214782101, - "loss": 1.0912, - "step": 217600 - }, - { - "epoch": 43.77, - "learning_rate": 0.00011675029274042654, - "loss": 1.0905, - "step": 217700 - }, - { - "epoch": 43.79, - "learning_rate": 0.0001160108967167392, - "loss": 1.0874, - "step": 217800 - }, - { - "epoch": 43.81, - "learning_rate": 0.0001152737355339449, - "loss": 1.0902, - "step": 217900 - }, - { - "epoch": 43.83, - "learning_rate": 0.00011453881064482418, - "loss": 1.088, - "step": 218000 - }, - { - "epoch": 43.85, - "learning_rate": 0.0001138061234977511, - "loss": 1.0878, - "step": 218100 - }, - { - "epoch": 43.87, - "learning_rate": 0.00011307567553668963, - "loss": 1.0927, - "step": 218200 - }, - { - "epoch": 43.89, - "learning_rate": 0.00011234746820119087, - "loss": 1.0882, - "step": 218300 - }, - { - "epoch": 43.91, - "learning_rate": 0.00011162150292638924, - "loss": 1.0927, - "step": 218400 - }, - { - "epoch": 43.94, - "learning_rate": 0.00011089778114300134, - "loss": 1.0885, - "step": 218500 - }, - { - "epoch": 43.96, - "learning_rate": 0.00011017630427732253, - "loss": 1.0884, - "step": 218600 - }, - { - "epoch": 43.98, - "learning_rate": 0.00010945707375122376, - "loss": 1.0878, - "step": 218700 - }, - { - "epoch": 44.0, - "learning_rate": 0.0001087472496787982, - "loss": 1.0855, - "step": 218800 - }, - { - "epoch": 44.0, - "eval_accuracy": 0.4214934698002123, - "eval_loss": 1.0645169019699097, - "eval_runtime": 19.7931, - "eval_samples_per_second": 4019.996, - "eval_steps_per_second": 15.713, - "step": 218823 - }, - { - "epoch": 44.02, - "learning_rate": 0.00010803249358108017, - "loss": 1.0767, - "step": 218900 - }, - { - "epoch": 44.04, - "learning_rate": 0.00010731998804791671, - "loss": 1.0805, - "step": 219000 - }, - { - "epoch": 44.06, - "learning_rate": 0.00010660973448349847, - "loss": 1.0767, - "step": 219100 - }, - { - "epoch": 44.08, - "learning_rate": 0.00010590173428757774, - "loss": 1.0796, - "step": 219200 - }, - { - "epoch": 44.1, - "learning_rate": 0.00010519598885546585, - "loss": 1.0782, - "step": 219300 - }, - { - "epoch": 44.12, - "learning_rate": 0.00010449249957803011, - "loss": 1.0765, - "step": 219400 - }, - { - "epoch": 44.14, - "learning_rate": 0.00010379126784169191, - "loss": 1.0828, - "step": 219500 - }, - { - "epoch": 44.16, - "learning_rate": 0.00010309229502842355, - "loss": 1.077, - "step": 219600 - }, - { - "epoch": 44.18, - "learning_rate": 0.00010239558251574535, - "loss": 1.0754, - "step": 219700 - }, - { - "epoch": 44.2, - "learning_rate": 0.00010170113167672274, - "loss": 1.0823, - "step": 219800 - }, - { - "epoch": 44.22, - "learning_rate": 0.00010100894387996454, - "loss": 1.082, - "step": 219900 - }, - { - "epoch": 44.24, - "learning_rate": 0.00010031902048961913, - "loss": 1.0795, - "step": 220000 - }, - { - "epoch": 44.26, - "learning_rate": 9.963136286537278e-05, - "loss": 1.0816, - "step": 220100 - }, - { - "epoch": 44.28, - "learning_rate": 9.894597236244558e-05, - "loss": 1.079, - "step": 220200 - }, - { - "epoch": 44.3, - "learning_rate": 9.826285033159035e-05, - "loss": 1.0816, - "step": 220300 - }, - { - "epoch": 44.32, - "learning_rate": 9.758199811908924e-05, - "loss": 1.0845, - "step": 220400 - }, - { - "epoch": 44.34, - "learning_rate": 9.690341706675043e-05, - "loss": 1.0838, - "step": 220500 - }, - { - "epoch": 44.36, - "learning_rate": 9.622710851190694e-05, - "loss": 1.0781, - "step": 220600 - }, - { - "epoch": 44.38, - "learning_rate": 9.555307378741259e-05, - "loss": 1.0841, - "step": 220700 - }, - { - "epoch": 44.4, - "learning_rate": 9.488802055091186e-05, - "loss": 1.0813, - "step": 220800 - }, - { - "epoch": 44.42, - "learning_rate": 9.421851469638642e-05, - "loss": 1.0834, - "step": 220900 - }, - { - "epoch": 44.44, - "learning_rate": 9.355128663070311e-05, - "loss": 1.0805, - "step": 221000 - }, - { - "epoch": 44.46, - "learning_rate": 9.288633766882021e-05, - "loss": 1.0837, - "step": 221100 - }, - { - "epoch": 44.48, - "learning_rate": 9.222366912120445e-05, - "loss": 1.079, - "step": 221200 - }, - { - "epoch": 44.5, - "learning_rate": 9.156328229382879e-05, - "loss": 1.082, - "step": 221300 - }, - { - "epoch": 44.52, - "learning_rate": 9.090517848816912e-05, - "loss": 1.0806, - "step": 221400 - }, - { - "epoch": 44.54, - "learning_rate": 9.024935900120185e-05, - "loss": 1.082, - "step": 221500 - }, - { - "epoch": 44.56, - "learning_rate": 8.95958251254017e-05, - "loss": 1.0818, - "step": 221600 - }, - { - "epoch": 44.58, - "learning_rate": 8.894457814873885e-05, - "loss": 1.0804, - "step": 221700 - }, - { - "epoch": 44.6, - "learning_rate": 8.829561935467689e-05, - "loss": 1.0799, - "step": 221800 - }, - { - "epoch": 44.62, - "learning_rate": 8.76489500221693e-05, - "loss": 1.0803, - "step": 221900 - }, - { - "epoch": 44.64, - "learning_rate": 8.700457142565774e-05, - "loss": 1.0811, - "step": 222000 - }, - { - "epoch": 44.66, - "learning_rate": 8.63624848350697e-05, - "loss": 1.0835, - "step": 222100 - }, - { - "epoch": 44.68, - "learning_rate": 8.57226915158156e-05, - "loss": 1.0782, - "step": 222200 - }, - { - "epoch": 44.7, - "learning_rate": 8.508519272878545e-05, - "loss": 1.0845, - "step": 222300 - }, - { - "epoch": 44.72, - "learning_rate": 8.444998973034829e-05, - "loss": 1.0814, - "step": 222400 - }, - { - "epoch": 44.74, - "learning_rate": 8.381708377234828e-05, - "loss": 1.0828, - "step": 222500 - }, - { - "epoch": 44.76, - "learning_rate": 8.318647610210284e-05, - "loss": 1.0845, - "step": 222600 - }, - { - "epoch": 44.78, - "learning_rate": 8.25581679623992e-05, - "loss": 1.0805, - "step": 222700 - }, - { - "epoch": 44.8, - "learning_rate": 8.193216059149355e-05, - "loss": 1.0768, - "step": 222800 - }, - { - "epoch": 44.82, - "learning_rate": 8.131468087783922e-05, - "loss": 1.0814, - "step": 222900 - }, - { - "epoch": 44.84, - "learning_rate": 8.06932557027712e-05, - "loss": 1.0778, - "step": 223000 - }, - { - "epoch": 44.86, - "learning_rate": 8.007413497182939e-05, - "loss": 1.083, - "step": 223100 - }, - { - "epoch": 44.88, - "learning_rate": 7.94573199051636e-05, - "loss": 1.075, - "step": 223200 - }, - { - "epoch": 44.9, - "learning_rate": 7.884281171837912e-05, - "loss": 1.0767, - "step": 223300 - }, - { - "epoch": 44.92, - "learning_rate": 7.82306116225355e-05, - "loss": 1.0833, - "step": 223400 - }, - { - "epoch": 44.94, - "learning_rate": 7.762072082414307e-05, - "loss": 1.0793, - "step": 223500 - }, - { - "epoch": 44.96, - "learning_rate": 7.701314052516146e-05, - "loss": 1.0785, - "step": 223600 - }, - { - "epoch": 44.98, - "learning_rate": 7.640787192299645e-05, - "loss": 1.0801, - "step": 223700 - }, - { - "epoch": 45.0, - "eval_accuracy": 0.4221078634754255, - "eval_loss": 1.0577867031097412, - "eval_runtime": 19.8376, - "eval_samples_per_second": 4010.968, - "eval_steps_per_second": 15.677, - "step": 223796 - }, - { - "epoch": 45.0, - "learning_rate": 7.580491621049817e-05, - "loss": 1.0812, - "step": 223800 - }, - { - "epoch": 45.02, - "learning_rate": 7.52042745759586e-05, - "loss": 1.0668, - "step": 223900 - }, - { - "epoch": 45.04, - "learning_rate": 7.460594820310931e-05, - "loss": 1.0678, - "step": 224000 - }, - { - "epoch": 45.06, - "learning_rate": 7.400993827111833e-05, - "loss": 1.0686, - "step": 224100 - }, - { - "epoch": 45.08, - "learning_rate": 7.341624595458923e-05, - "loss": 1.0737, - "step": 224200 - }, - { - "epoch": 45.1, - "learning_rate": 7.282487242355806e-05, - "loss": 1.0732, - "step": 224300 - }, - { - "epoch": 45.12, - "learning_rate": 7.223581884349059e-05, - "loss": 1.0704, - "step": 224400 - }, - { - "epoch": 45.14, - "learning_rate": 7.16490863752807e-05, - "loss": 1.0695, - "step": 224500 - }, - { - "epoch": 45.16, - "learning_rate": 7.106467617524787e-05, - "loss": 1.0738, - "step": 224600 - }, - { - "epoch": 45.18, - "learning_rate": 7.048258939513497e-05, - "loss": 1.07, - "step": 224700 - }, - { - "epoch": 45.2, - "learning_rate": 6.990282718210616e-05, - "loss": 1.0724, - "step": 224800 - }, - { - "epoch": 45.22, - "learning_rate": 6.93311535277738e-05, - "loss": 1.071, - "step": 224900 - }, - { - "epoch": 45.24, - "learning_rate": 6.875602059798258e-05, - "loss": 1.0762, - "step": 225000 - }, - { - "epoch": 45.26, - "learning_rate": 6.818321563795887e-05, - "loss": 1.074, - "step": 225100 - }, - { - "epoch": 45.28, - "learning_rate": 6.761273977657439e-05, - "loss": 1.069, - "step": 225200 - }, - { - "epoch": 45.3, - "learning_rate": 6.704459413811065e-05, - "loss": 1.0751, - "step": 225300 - }, - { - "epoch": 45.32, - "learning_rate": 6.647877984225656e-05, - "loss": 1.0755, - "step": 225400 - }, - { - "epoch": 45.34, - "learning_rate": 6.59152980041064e-05, - "loss": 1.0733, - "step": 225500 - }, - { - "epoch": 45.36, - "learning_rate": 6.535414973415796e-05, - "loss": 1.0741, - "step": 225600 - }, - { - "epoch": 45.38, - "learning_rate": 6.479533613830982e-05, - "loss": 1.0741, - "step": 225700 - }, - { - "epoch": 45.4, - "learning_rate": 6.423885831785992e-05, - "loss": 1.076, - "step": 225800 - }, - { - "epoch": 45.42, - "learning_rate": 6.368471736950224e-05, - "loss": 1.073, - "step": 225900 - }, - { - "epoch": 45.44, - "learning_rate": 6.313291438532556e-05, - "loss": 1.075, - "step": 226000 - }, - { - "epoch": 45.46, - "learning_rate": 6.258345045281138e-05, - "loss": 1.0719, - "step": 226100 - }, - { - "epoch": 45.48, - "learning_rate": 6.203632665483128e-05, - "loss": 1.0774, - "step": 226200 - }, - { - "epoch": 45.5, - "learning_rate": 6.149154406964438e-05, - "loss": 1.0707, - "step": 226300 - }, - { - "epoch": 45.52, - "learning_rate": 6.09491037708966e-05, - "loss": 1.0762, - "step": 226400 - }, - { - "epoch": 45.54, - "learning_rate": 6.040900682761738e-05, - "loss": 1.07, - "step": 226500 - }, - { - "epoch": 45.56, - "learning_rate": 5.987125430421819e-05, - "loss": 1.0754, - "step": 226600 - }, - { - "epoch": 45.58, - "learning_rate": 5.9335847260489354e-05, - "loss": 1.0736, - "step": 226700 - }, - { - "epoch": 45.6, - "learning_rate": 5.8802786751599724e-05, - "loss": 1.0737, - "step": 226800 - }, - { - "epoch": 45.62, - "learning_rate": 5.827207382809313e-05, - "loss": 1.0699, - "step": 226900 - }, - { - "epoch": 45.64, - "learning_rate": 5.774370953588723e-05, - "loss": 1.0726, - "step": 227000 - }, - { - "epoch": 45.66, - "learning_rate": 5.72176949162706e-05, - "loss": 1.0727, - "step": 227100 - }, - { - "epoch": 45.68, - "learning_rate": 5.669403100590123e-05, - "loss": 1.0772, - "step": 227200 - }, - { - "epoch": 45.7, - "learning_rate": 5.617792031399907e-05, - "loss": 1.0748, - "step": 227300 - }, - { - "epoch": 45.72, - "learning_rate": 5.565893738080891e-05, - "loss": 1.0718, - "step": 227400 - }, - { - "epoch": 45.74, - "learning_rate": 5.5142308228831315e-05, - "loss": 1.0739, - "step": 227500 - }, - { - "epoch": 45.76, - "learning_rate": 5.4628033876227974e-05, - "loss": 1.075, - "step": 227600 - }, - { - "epoch": 45.78, - "learning_rate": 5.411611533651911e-05, - "loss": 1.0738, - "step": 227700 - }, - { - "epoch": 45.81, - "learning_rate": 5.3606553618582714e-05, - "loss": 1.0752, - "step": 227800 - }, - { - "epoch": 45.83, - "learning_rate": 5.309934972665201e-05, - "loss": 1.0727, - "step": 227900 - }, - { - "epoch": 45.85, - "learning_rate": 5.259450466031324e-05, - "loss": 1.071, - "step": 228000 - }, - { - "epoch": 45.87, - "learning_rate": 5.2092019414504e-05, - "loss": 1.074, - "step": 228100 - }, - { - "epoch": 45.89, - "learning_rate": 5.1591894979511055e-05, - "loss": 1.0699, - "step": 228200 - }, - { - "epoch": 45.91, - "learning_rate": 5.109413234096888e-05, - "loss": 1.0727, - "step": 228300 - }, - { - "epoch": 45.93, - "learning_rate": 5.059873247985722e-05, - "loss": 1.0721, - "step": 228400 - }, - { - "epoch": 45.95, - "learning_rate": 5.010569637249912e-05, - "loss": 1.0708, - "step": 228500 - }, - { - "epoch": 45.97, - "learning_rate": 4.961502499055928e-05, - "loss": 1.0701, - "step": 228600 - }, - { - "epoch": 45.99, - "learning_rate": 4.912671930104237e-05, - "loss": 1.072, - "step": 228700 - }, - { - "epoch": 46.0, - "eval_accuracy": 0.42260501009247403, - "eval_loss": 1.052234172821045, - "eval_runtime": 19.8604, - "eval_samples_per_second": 4006.368, - "eval_steps_per_second": 15.659, - "step": 228769 - }, - { - "epoch": 46.01, - "learning_rate": 4.864078026629054e-05, - "loss": 1.0673, - "step": 228800 - }, - { - "epoch": 46.03, - "learning_rate": 4.8157208843981476e-05, - "loss": 1.0645, - "step": 228900 - }, - { - "epoch": 46.05, - "learning_rate": 4.767600598712743e-05, - "loss": 1.0635, - "step": 229000 - }, - { - "epoch": 46.07, - "learning_rate": 4.719717264407245e-05, - "loss": 1.0651, - "step": 229100 - }, - { - "epoch": 46.09, - "learning_rate": 4.672070975849069e-05, - "loss": 1.0623, - "step": 229200 - }, - { - "epoch": 46.11, - "learning_rate": 4.625134744279142e-05, - "loss": 1.0643, - "step": 229300 - }, - { - "epoch": 46.13, - "learning_rate": 4.5779604556573094e-05, - "loss": 1.0661, - "step": 229400 - }, - { - "epoch": 46.15, - "learning_rate": 4.5310234921539935e-05, - "loss": 1.0668, - "step": 229500 - }, - { - "epoch": 46.17, - "learning_rate": 4.4843239462715455e-05, - "loss": 1.0682, - "step": 229600 - }, - { - "epoch": 46.19, - "learning_rate": 4.437861910044444e-05, - "loss": 1.0681, - "step": 229700 - }, - { - "epoch": 46.21, - "learning_rate": 4.3916374750390256e-05, - "loss": 1.0666, - "step": 229800 - }, - { - "epoch": 46.23, - "learning_rate": 4.345650732353393e-05, - "loss": 1.0656, - "step": 229900 - }, - { - "epoch": 46.25, - "learning_rate": 4.299901772617215e-05, - "loss": 1.0653, - "step": 230000 - }, - { - "epoch": 46.27, - "learning_rate": 4.2543906859915384e-05, - "loss": 1.0678, - "step": 230100 - }, - { - "epoch": 46.29, - "learning_rate": 4.209117562168643e-05, - "loss": 1.064, - "step": 230200 - }, - { - "epoch": 46.31, - "learning_rate": 4.1640824903717566e-05, - "loss": 1.071, - "step": 230300 - }, - { - "epoch": 46.33, - "learning_rate": 4.119285559355049e-05, - "loss": 1.0661, - "step": 230400 - }, - { - "epoch": 46.35, - "learning_rate": 4.0747268574033294e-05, - "loss": 1.0667, - "step": 230500 - }, - { - "epoch": 46.37, - "learning_rate": 4.0304064723319104e-05, - "loss": 1.0685, - "step": 230600 - }, - { - "epoch": 46.39, - "learning_rate": 3.986324491486421e-05, - "loss": 1.0662, - "step": 230700 - }, - { - "epoch": 46.41, - "learning_rate": 3.942481001742673e-05, - "loss": 1.0661, - "step": 230800 - }, - { - "epoch": 46.43, - "learning_rate": 3.8988760895064675e-05, - "loss": 1.0645, - "step": 230900 - }, - { - "epoch": 46.45, - "learning_rate": 3.8555098407134085e-05, - "loss": 1.0628, - "step": 231000 - }, - { - "epoch": 46.47, - "learning_rate": 3.8123823408287294e-05, - "loss": 1.0712, - "step": 231100 - }, - { - "epoch": 46.49, - "learning_rate": 3.7694936748471633e-05, - "loss": 1.0657, - "step": 231200 - }, - { - "epoch": 46.51, - "learning_rate": 3.726843927292778e-05, - "loss": 1.0666, - "step": 231300 - }, - { - "epoch": 46.53, - "learning_rate": 3.6848561063323876e-05, - "loss": 1.0681, - "step": 231400 - }, - { - "epoch": 46.55, - "learning_rate": 3.6426820560480634e-05, - "loss": 1.0666, - "step": 231500 - }, - { - "epoch": 46.57, - "learning_rate": 3.600747174108493e-05, - "loss": 1.0647, - "step": 231600 - }, - { - "epoch": 46.59, - "learning_rate": 3.5590515431579846e-05, - "loss": 1.0647, - "step": 231700 - }, - { - "epoch": 46.61, - "learning_rate": 3.51759524536939e-05, - "loss": 1.0643, - "step": 231800 - }, - { - "epoch": 46.63, - "learning_rate": 3.476378362443869e-05, - "loss": 1.0623, - "step": 231900 - }, - { - "epoch": 46.65, - "learning_rate": 3.435400975610778e-05, - "loss": 1.0654, - "step": 232000 - }, - { - "epoch": 46.67, - "learning_rate": 3.394663165627407e-05, - "loss": 1.0645, - "step": 232100 - }, - { - "epoch": 46.69, - "learning_rate": 3.3541650127789646e-05, - "loss": 1.0678, - "step": 232200 - }, - { - "epoch": 46.71, - "learning_rate": 3.31390659687833e-05, - "loss": 1.0657, - "step": 232300 - }, - { - "epoch": 46.73, - "learning_rate": 3.2738879972659116e-05, - "loss": 1.068, - "step": 232400 - }, - { - "epoch": 46.75, - "learning_rate": 3.234109292809456e-05, - "loss": 1.0686, - "step": 232500 - }, - { - "epoch": 46.77, - "learning_rate": 3.194570561904003e-05, - "loss": 1.0659, - "step": 232600 - }, - { - "epoch": 46.79, - "learning_rate": 3.1552718824715834e-05, - "loss": 1.0674, - "step": 232700 - }, - { - "epoch": 46.81, - "learning_rate": 3.116213331961215e-05, - "loss": 1.0668, - "step": 232800 - }, - { - "epoch": 46.83, - "learning_rate": 3.077394987348589e-05, - "loss": 1.0674, - "step": 232900 - }, - { - "epoch": 46.85, - "learning_rate": 3.0388169251360788e-05, - "loss": 1.0628, - "step": 233000 - }, - { - "epoch": 46.87, - "learning_rate": 3.0004792213524645e-05, - "loss": 1.0639, - "step": 233100 - }, - { - "epoch": 46.89, - "learning_rate": 2.9623819515528783e-05, - "loss": 1.0649, - "step": 233200 - }, - { - "epoch": 46.91, - "learning_rate": 2.9245251908185526e-05, - "loss": 1.0651, - "step": 233300 - }, - { - "epoch": 46.93, - "learning_rate": 2.8872839843944315e-05, - "loss": 1.067, - "step": 233400 - }, - { - "epoch": 46.95, - "learning_rate": 2.8499060581948928e-05, - "loss": 1.0659, - "step": 233500 - }, - { - "epoch": 46.97, - "learning_rate": 2.812768862725676e-05, - "loss": 1.0669, - "step": 233600 - }, - { - "epoch": 46.99, - "learning_rate": 2.7758724711759303e-05, - "loss": 1.0625, - "step": 233700 - }, - { - "epoch": 47.0, - "eval_accuracy": 0.42298172791300875, - "eval_loss": 1.0480923652648926, - "eval_runtime": 19.887, - "eval_samples_per_second": 4001.005, - "eval_steps_per_second": 15.638, - "step": 233742 - }, - { - "epoch": 47.01, - "learning_rate": 2.7392169562602682e-05, - "loss": 1.0617, - "step": 233800 - }, - { - "epoch": 47.03, - "learning_rate": 2.7028023902185562e-05, - "loss": 1.0595, - "step": 233900 - }, - { - "epoch": 47.05, - "learning_rate": 2.6666288448158464e-05, - "loss": 1.0587, - "step": 234000 - }, - { - "epoch": 47.07, - "learning_rate": 2.6306963913421827e-05, - "loss": 1.0602, - "step": 234100 - }, - { - "epoch": 47.09, - "learning_rate": 2.5950051006124048e-05, - "loss": 1.0601, - "step": 234200 - }, - { - "epoch": 47.11, - "learning_rate": 2.5595550429661775e-05, - "loss": 1.0611, - "step": 234300 - }, - { - "epoch": 47.13, - "learning_rate": 2.5243462882676703e-05, - "loss": 1.0591, - "step": 234400 - }, - { - "epoch": 47.15, - "learning_rate": 2.4893789059055454e-05, - "loss": 1.0635, - "step": 234500 - }, - { - "epoch": 47.17, - "learning_rate": 2.4546529647927335e-05, - "loss": 1.059, - "step": 234600 - }, - { - "epoch": 47.19, - "learning_rate": 2.4201685333663654e-05, - "loss": 1.0587, - "step": 234700 - }, - { - "epoch": 47.21, - "learning_rate": 2.3859256795876057e-05, - "loss": 1.0579, - "step": 234800 - }, - { - "epoch": 47.23, - "learning_rate": 2.351924470941541e-05, - "loss": 1.0601, - "step": 234900 - }, - { - "epoch": 47.25, - "learning_rate": 2.318164974436987e-05, - "loss": 1.06, - "step": 235000 - }, - { - "epoch": 47.27, - "learning_rate": 2.2846472566064037e-05, - "loss": 1.0604, - "step": 235100 - }, - { - "epoch": 47.29, - "learning_rate": 2.2513713835058124e-05, - "loss": 1.058, - "step": 235200 - }, - { - "epoch": 47.31, - "learning_rate": 2.2183374207145472e-05, - "loss": 1.0605, - "step": 235300 - }, - { - "epoch": 47.33, - "learning_rate": 2.1858721552181353e-05, - "loss": 1.0602, - "step": 235400 - }, - { - "epoch": 47.35, - "learning_rate": 2.153319787157798e-05, - "loss": 1.0587, - "step": 235500 - }, - { - "epoch": 47.37, - "learning_rate": 2.1210095226447403e-05, - "loss": 1.0582, - "step": 235600 - }, - { - "epoch": 47.39, - "learning_rate": 2.0889414253553036e-05, - "loss": 1.0639, - "step": 235700 - }, - { - "epoch": 47.41, - "learning_rate": 2.057115558488601e-05, - "loss": 1.0615, - "step": 235800 - }, - { - "epoch": 47.43, - "learning_rate": 2.0255319847663906e-05, - "loss": 1.0598, - "step": 235900 - }, - { - "epoch": 47.45, - "learning_rate": 1.9941907664328407e-05, - "loss": 1.0658, - "step": 236000 - }, - { - "epoch": 47.47, - "learning_rate": 1.9630919652545432e-05, - "loss": 1.0611, - "step": 236100 - }, - { - "epoch": 47.49, - "learning_rate": 1.9322356425203607e-05, - "loss": 1.0624, - "step": 236200 - }, - { - "epoch": 47.51, - "learning_rate": 1.901621859041247e-05, - "loss": 1.0621, - "step": 236300 - }, - { - "epoch": 47.53, - "learning_rate": 1.8712506751501767e-05, - "loss": 1.0599, - "step": 236400 - }, - { - "epoch": 47.55, - "learning_rate": 1.841122150702007e-05, - "loss": 1.0564, - "step": 236500 - }, - { - "epoch": 47.57, - "learning_rate": 1.8112363450733667e-05, - "loss": 1.0621, - "step": 236600 - }, - { - "epoch": 47.59, - "learning_rate": 1.781593317162572e-05, - "loss": 1.0629, - "step": 236700 - }, - { - "epoch": 47.61, - "learning_rate": 1.7521931253894342e-05, - "loss": 1.0607, - "step": 236800 - }, - { - "epoch": 47.63, - "learning_rate": 1.7230358276952156e-05, - "loss": 1.0589, - "step": 236900 - }, - { - "epoch": 47.65, - "learning_rate": 1.6941214815424793e-05, - "loss": 1.0609, - "step": 237000 - }, - { - "epoch": 47.68, - "learning_rate": 1.66545014391499e-05, - "loss": 1.0602, - "step": 237100 - }, - { - "epoch": 47.7, - "learning_rate": 1.6370218713175762e-05, - "loss": 1.066, - "step": 237200 - }, - { - "epoch": 47.72, - "learning_rate": 1.6088367197760607e-05, - "loss": 1.0584, - "step": 237300 - }, - { - "epoch": 47.74, - "learning_rate": 1.5808947448371218e-05, - "loss": 1.0585, - "step": 237400 - }, - { - "epoch": 47.76, - "learning_rate": 1.553471784824498e-05, - "loss": 1.0647, - "step": 237500 - }, - { - "epoch": 47.78, - "learning_rate": 1.5260138946824596e-05, - "loss": 1.0597, - "step": 237600 - }, - { - "epoch": 47.8, - "learning_rate": 1.4987993443684039e-05, - "loss": 1.0618, - "step": 237700 - }, - { - "epoch": 47.82, - "learning_rate": 1.4718281875161916e-05, - "loss": 1.0587, - "step": 237800 - }, - { - "epoch": 47.84, - "learning_rate": 1.4451004772799565e-05, - "loss": 1.0562, - "step": 237900 - }, - { - "epoch": 47.86, - "learning_rate": 1.418616266334133e-05, - "loss": 1.0588, - "step": 238000 - }, - { - "epoch": 47.88, - "learning_rate": 1.392375606873178e-05, - "loss": 1.0633, - "step": 238100 - }, - { - "epoch": 47.9, - "learning_rate": 1.3663785506116133e-05, - "loss": 1.0609, - "step": 238200 - }, - { - "epoch": 47.92, - "learning_rate": 1.3406251487839e-05, - "loss": 1.0614, - "step": 238300 - }, - { - "epoch": 47.94, - "learning_rate": 1.3151154521442582e-05, - "loss": 1.0631, - "step": 238400 - }, - { - "epoch": 47.96, - "learning_rate": 1.2898495109666397e-05, - "loss": 1.06, - "step": 238500 - }, - { - "epoch": 47.98, - "learning_rate": 1.2648273750446026e-05, - "loss": 1.0606, - "step": 238600 - }, - { - "epoch": 48.0, - "learning_rate": 1.2400490936912284e-05, - "loss": 1.0639, - "step": 238700 - }, - { - "epoch": 48.0, - "eval_accuracy": 0.4231766093868917, - "eval_loss": 1.045819640159607, - "eval_runtime": 19.861, - "eval_samples_per_second": 4006.252, - "eval_steps_per_second": 15.659, - "step": 238716 - }, - { - "epoch": 48.02, - "learning_rate": 1.2155147157390245e-05, - "loss": 1.0555, - "step": 238800 - }, - { - "epoch": 48.04, - "learning_rate": 1.1912242895397857e-05, - "loss": 1.057, - "step": 238900 - }, - { - "epoch": 48.06, - "learning_rate": 1.1671778629645525e-05, - "loss": 1.0555, - "step": 239000 - }, - { - "epoch": 48.08, - "learning_rate": 1.1433754834035137e-05, - "loss": 1.0566, - "step": 239100 - }, - { - "epoch": 48.1, - "learning_rate": 1.1198171977658822e-05, - "loss": 1.0548, - "step": 239200 - }, - { - "epoch": 48.12, - "learning_rate": 1.096503052479783e-05, - "loss": 1.0572, - "step": 239300 - }, - { - "epoch": 48.14, - "learning_rate": 1.0734330934922404e-05, - "loss": 1.0535, - "step": 239400 - }, - { - "epoch": 48.16, - "learning_rate": 1.0508344144459226e-05, - "loss": 1.0572, - "step": 239500 - }, - { - "epoch": 48.18, - "learning_rate": 1.0282505209828901e-05, - "loss": 1.0567, - "step": 239600 - }, - { - "epoch": 48.2, - "learning_rate": 1.0059109483290113e-05, - "loss": 1.061, - "step": 239700 - }, - { - "epoch": 48.22, - "learning_rate": 9.838157405106102e-06, - "loss": 1.0535, - "step": 239800 - }, - { - "epoch": 48.24, - "learning_rate": 9.619649410724658e-06, - "loss": 1.0575, - "step": 239900 - }, - { - "epoch": 48.26, - "learning_rate": 9.40358593077631e-06, - "loss": 1.0578, - "step": 240000 - }, - { - "epoch": 48.28, - "learning_rate": 9.189967391074332e-06, - "loss": 1.0559, - "step": 240100 - }, - { - "epoch": 48.3, - "learning_rate": 8.978794212613355e-06, - "loss": 1.0572, - "step": 240200 - }, - { - "epoch": 48.32, - "learning_rate": 8.770066811569083e-06, - "loss": 1.0544, - "step": 240300 - }, - { - "epoch": 48.34, - "learning_rate": 8.563785599296769e-06, - "loss": 1.0586, - "step": 240400 - }, - { - "epoch": 48.36, - "learning_rate": 8.359950982330805e-06, - "loss": 1.0592, - "step": 240500 - }, - { - "epoch": 48.38, - "learning_rate": 8.158563362384158e-06, - "loss": 1.0586, - "step": 240600 - }, - { - "epoch": 48.4, - "learning_rate": 7.959623136347128e-06, - "loss": 1.0547, - "step": 240700 - }, - { - "epoch": 48.42, - "learning_rate": 7.763130696286647e-06, - "loss": 1.0525, - "step": 240800 - }, - { - "epoch": 48.44, - "learning_rate": 7.56908642944587e-06, - "loss": 1.0569, - "step": 240900 - }, - { - "epoch": 48.46, - "learning_rate": 7.3774907182427856e-06, - "loss": 1.055, - "step": 241000 - }, - { - "epoch": 48.48, - "learning_rate": 7.18834394027007e-06, - "loss": 1.0579, - "step": 241100 - }, - { - "epoch": 48.5, - "learning_rate": 7.001646468294265e-06, - "loss": 1.0554, - "step": 241200 - }, - { - "epoch": 48.52, - "learning_rate": 6.817398670254382e-06, - "loss": 1.0551, - "step": 241300 - }, - { - "epoch": 48.54, - "learning_rate": 6.635600909262185e-06, - "loss": 1.0599, - "step": 241400 - }, - { - "epoch": 48.56, - "learning_rate": 6.456253543600521e-06, - "loss": 1.0554, - "step": 241500 - }, - { - "epoch": 48.58, - "learning_rate": 6.281113760537027e-06, - "loss": 1.0548, - "step": 241600 - }, - { - "epoch": 48.6, - "learning_rate": 6.106643728384375e-06, - "loss": 1.0615, - "step": 241700 - }, - { - "epoch": 48.62, - "learning_rate": 5.934625134019766e-06, - "loss": 1.0538, - "step": 241800 - }, - { - "epoch": 48.64, - "learning_rate": 5.76505831645327e-06, - "loss": 1.0599, - "step": 241900 - }, - { - "epoch": 48.66, - "learning_rate": 5.597943609863821e-06, - "loss": 1.0539, - "step": 242000 - }, - { - "epoch": 48.68, - "learning_rate": 5.433281343597135e-06, - "loss": 1.0565, - "step": 242100 - }, - { - "epoch": 48.7, - "learning_rate": 5.2710718421662696e-06, - "loss": 1.0596, - "step": 242200 - }, - { - "epoch": 48.72, - "learning_rate": 5.111315425249952e-06, - "loss": 1.0548, - "step": 242300 - }, - { - "epoch": 48.74, - "learning_rate": 4.954012407692721e-06, - "loss": 1.0547, - "step": 242400 - }, - { - "epoch": 48.76, - "learning_rate": 4.799163099503956e-06, - "loss": 1.0514, - "step": 242500 - }, - { - "epoch": 48.78, - "learning_rate": 4.646767805857183e-06, - "loss": 1.0587, - "step": 242600 - }, - { - "epoch": 48.8, - "learning_rate": 4.496826827089795e-06, - "loss": 1.0602, - "step": 242700 - }, - { - "epoch": 48.82, - "learning_rate": 4.349340458702084e-06, - "loss": 1.0576, - "step": 242800 - }, - { - "epoch": 48.84, - "learning_rate": 4.204308991357098e-06, - "loss": 1.0576, - "step": 242900 - }, - { - "epoch": 48.86, - "learning_rate": 4.061732710879673e-06, - "loss": 1.0592, - "step": 243000 - }, - { - "epoch": 48.88, - "learning_rate": 3.921611898255878e-06, - "loss": 1.0601, - "step": 243100 - }, - { - "epoch": 48.9, - "learning_rate": 3.783946829632734e-06, - "loss": 1.0588, - "step": 243200 - }, - { - "epoch": 48.92, - "learning_rate": 3.648737776317801e-06, - "loss": 1.0558, - "step": 243300 - }, - { - "epoch": 48.94, - "learning_rate": 3.5159850047777885e-06, - "loss": 1.057, - "step": 243400 - }, - { - "epoch": 48.96, - "learning_rate": 3.3856887766392507e-06, - "loss": 1.0593, - "step": 243500 - }, - { - "epoch": 48.98, - "learning_rate": 3.259115580974137e-06, - "loss": 1.0585, - "step": 243600 - }, - { - "epoch": 49.0, - "eval_accuracy": 0.4232847884907615, - "eval_loss": 1.044702172279358, - "eval_runtime": 19.8772, - "eval_samples_per_second": 4002.975, - "eval_steps_per_second": 15.646, - "step": 243689 - }, - { - "epoch": 49.0, - "learning_rate": 3.1337086333987908e-06, - "loss": 1.0534, - "step": 243700 - }, - { - "epoch": 49.02, - "learning_rate": 3.0107589826065816e-06, - "loss": 1.0535, - "step": 243800 - }, - { - "epoch": 49.04, - "learning_rate": 2.8902668709041013e-06, - "loss": 1.0535, - "step": 243900 - }, - { - "epoch": 49.06, - "learning_rate": 2.772232535754593e-06, - "loss": 1.0536, - "step": 244000 - }, - { - "epoch": 49.08, - "learning_rate": 2.6566562097773973e-06, - "loss": 1.0529, - "step": 244100 - }, - { - "epoch": 49.1, - "learning_rate": 2.5435381207479514e-06, - "loss": 1.057, - "step": 244200 - }, - { - "epoch": 49.12, - "learning_rate": 2.432878491596957e-06, - "loss": 1.0544, - "step": 244300 - }, - { - "epoch": 49.14, - "learning_rate": 2.3246775404098252e-06, - "loss": 1.0568, - "step": 244400 - }, - { - "epoch": 49.16, - "learning_rate": 2.218935480426676e-06, - "loss": 1.0521, - "step": 244500 - }, - { - "epoch": 49.18, - "learning_rate": 2.1156525200416444e-06, - "loss": 1.0557, - "step": 244600 - }, - { - "epoch": 49.2, - "learning_rate": 2.0148288628026036e-06, - "loss": 1.0553, - "step": 244700 - }, - { - "epoch": 49.22, - "learning_rate": 1.9164647074104702e-06, - "loss": 1.0539, - "step": 244800 - }, - { - "epoch": 49.24, - "learning_rate": 1.8205602477193439e-06, - "loss": 1.0517, - "step": 244900 - }, - { - "epoch": 49.26, - "learning_rate": 1.7271156727355353e-06, - "loss": 1.0535, - "step": 245000 - }, - { - "epoch": 49.28, - "learning_rate": 1.6361311666174272e-06, - "loss": 1.0565, - "step": 245100 - }, - { - "epoch": 49.3, - "learning_rate": 1.5476069086756139e-06, - "loss": 1.0513, - "step": 245200 - }, - { - "epoch": 49.32, - "learning_rate": 1.4615430733713742e-06, - "loss": 1.0539, - "step": 245300 - }, - { - "epoch": 49.34, - "learning_rate": 1.3779398303177814e-06, - "loss": 1.0565, - "step": 245400 - }, - { - "epoch": 49.36, - "learning_rate": 1.296797344278039e-06, - "loss": 1.0572, - "step": 245500 - }, - { - "epoch": 49.38, - "learning_rate": 1.218115775166173e-06, - "loss": 1.0538, - "step": 245600 - }, - { - "epoch": 49.4, - "learning_rate": 1.1426453002136294e-06, - "loss": 1.0577, - "step": 245700 - }, - { - "epoch": 49.42, - "learning_rate": 1.0688614123491314e-06, - "loss": 1.0547, - "step": 245800 - }, - { - "epoch": 49.44, - "learning_rate": 9.97538890623456e-07, - "loss": 1.0539, - "step": 245900 - }, - { - "epoch": 49.46, - "learning_rate": 9.286778755976388e-07, - "loss": 1.0524, - "step": 246000 - }, - { - "epoch": 49.48, - "learning_rate": 8.622785029814562e-07, - "loss": 1.0569, - "step": 246100 - }, - { - "epoch": 49.5, - "learning_rate": 7.983409036331491e-07, - "loss": 1.0525, - "step": 246200 - }, - { - "epoch": 49.52, - "learning_rate": 7.368652035597001e-07, - "loss": 1.0557, - "step": 246300 - }, - { - "epoch": 49.55, - "learning_rate": 6.778515239161398e-07, - "loss": 1.0568, - "step": 246400 - }, - { - "epoch": 49.57, - "learning_rate": 6.212999810051301e-07, - "loss": 1.058, - "step": 246500 - }, - { - "epoch": 49.59, - "learning_rate": 5.672106862772419e-07, - "loss": 1.0585, - "step": 246600 - }, - { - "epoch": 49.61, - "learning_rate": 5.155837463306778e-07, - "loss": 1.0561, - "step": 246700 - }, - { - "epoch": 49.63, - "learning_rate": 4.664192629104391e-07, - "loss": 1.057, - "step": 246800 - }, - { - "epoch": 49.65, - "learning_rate": 4.201721622633381e-07, - "loss": 1.0523, - "step": 246900 - }, - { - "epoch": 49.67, - "learning_rate": 3.7590825082547965e-07, - "loss": 1.0533, - "step": 247000 - }, - { - "epoch": 49.69, - "learning_rate": 3.3410707118347595e-07, - "loss": 1.0538, - "step": 247100 - }, - { - "epoch": 49.71, - "learning_rate": 2.951498981840217e-07, - "loss": 1.0602, - "step": 247200 - }, - { - "epoch": 49.73, - "learning_rate": 2.5824979513769386e-07, - "loss": 1.0534, - "step": 247300 - }, - { - "epoch": 49.75, - "learning_rate": 2.238126557659037e-07, - "loss": 1.0558, - "step": 247400 - }, - { - "epoch": 49.77, - "learning_rate": 1.9183854793672352e-07, - "loss": 1.0555, - "step": 247500 - }, - { - "epoch": 49.79, - "learning_rate": 1.6232753466377536e-07, - "loss": 1.0578, - "step": 247600 - }, - { - "epoch": 49.81, - "learning_rate": 1.352796741069251e-07, - "loss": 1.0576, - "step": 247700 - }, - { - "epoch": 49.83, - "learning_rate": 1.1069501957144956e-07, - "loss": 1.0551, - "step": 247800 - }, - { - "epoch": 49.85, - "learning_rate": 8.857361950831422e-08, - "loss": 1.0557, - "step": 247900 - }, - { - "epoch": 49.87, - "learning_rate": 6.89155175137568e-08, - "loss": 1.0538, - "step": 248000 - }, - { - "epoch": 49.89, - "learning_rate": 5.172075232956486e-08, - "loss": 1.0543, - "step": 248100 - }, - { - "epoch": 49.91, - "learning_rate": 3.698935784279822e-08, - "loss": 1.056, - "step": 248200 - }, - { - "epoch": 49.93, - "learning_rate": 2.472136308592776e-08, - "loss": 1.055, - "step": 248300 - }, - { - "epoch": 49.95, - "learning_rate": 1.4916792236141507e-08, - "loss": 1.0544, - "step": 248400 - }, - { - "epoch": 49.97, - "learning_rate": 7.575664616454869e-09, - "loss": 1.0545, - "step": 248500 - }, - { - "epoch": 49.99, - "learning_rate": 2.6979946943228584e-09, - "loss": 1.0521, - "step": 248600 - }, - { - "epoch": 50.0, - "eval_accuracy": 0.42330467435544344, - "eval_loss": 1.0445035696029663, - "eval_runtime": 19.4961, - "eval_samples_per_second": 4081.225, - "eval_steps_per_second": 15.952, - "step": 248650 + "epoch": 49.98, + "eval_accuracy": 0.4236413605299604, + "eval_loss": 1.1810568571090698, + "eval_runtime": 18.1953, + "eval_samples_per_second": 4373.003, + "eval_steps_per_second": 17.092, + "step": 124300 }, { - "epoch": 50.0, - "step": 248650, - "total_flos": 6.912086038408397e+16, - "train_loss": 1.2392261905236766, - "train_runtime": 25350.8633, - "train_samples_per_second": 2510.968, - "train_steps_per_second": 9.808 + "epoch": 49.98, + "step": 124300, + "total_flos": 6.182289190910362e+16, + "train_loss": 1.379773639240096, + "train_runtime": 17824.9007, + "train_samples_per_second": 3571.139, + "train_steps_per_second": 6.973 } ], - "max_steps": 248650, + "logging_steps": 100, + "max_steps": 124300, "num_train_epochs": 50, - "total_flos": 6.912086038408397e+16, + "save_steps": 500, + "total_flos": 6.182289190910362e+16, "trial_name": null, "trial_params": null }