{ "best_metric": 0.6530519723892212, "best_model_checkpoint": "./test_ast\\checkpoint-1320", "epoch": 15.0, "eval_steps": 5, "global_step": 3915, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019984674329501915, "loss": 3.0547, "step": 5 }, { "epoch": 0.02, "eval_accuracy": 0.12643678160919541, "eval_loss": 3.064624309539795, "eval_runtime": 132.8013, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.00019964240102171137, "loss": 2.8437, "step": 10 }, { "epoch": 0.04, "eval_accuracy": 0.15517241379310345, "eval_loss": 2.6111764907836914, "eval_runtime": 135.5688, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00019938697318007664, "loss": 2.7144, "step": 15 }, { "epoch": 0.06, "eval_accuracy": 0.22988505747126436, "eval_loss": 2.3521151542663574, "eval_runtime": 131.8277, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.0001991315453384419, "loss": 2.3026, "step": 20 }, { "epoch": 0.08, "eval_accuracy": 0.25862068965517243, "eval_loss": 2.28005313873291, "eval_runtime": 131.5426, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00019887611749680716, "loss": 2.4449, "step": 25 }, { "epoch": 0.1, "eval_accuracy": 0.20689655172413793, "eval_loss": 2.2755722999572754, "eval_runtime": 133.8549, "eval_samples_per_second": 1.3, "eval_steps_per_second": 0.164, "step": 25 }, { "epoch": 0.11, "learning_rate": 0.00019862068965517243, "loss": 2.2115, "step": 30 }, { "epoch": 0.11, "eval_accuracy": 0.3218390804597701, "eval_loss": 1.9326324462890625, "eval_runtime": 133.4669, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.165, "step": 30 }, { "epoch": 0.13, "learning_rate": 0.0001983652618135377, "loss": 1.9883, "step": 35 }, { "epoch": 0.13, "eval_accuracy": 0.13793103448275862, "eval_loss": 2.199158191680908, "eval_runtime": 133.3902, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.165, "step": 35 }, { "epoch": 0.15, "learning_rate": 0.00019810983397190295, "loss": 1.829, "step": 40 }, { "epoch": 0.15, "eval_accuracy": 0.28735632183908044, "eval_loss": 2.290984630584717, "eval_runtime": 132.8831, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.166, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.0001978544061302682, "loss": 1.9662, "step": 45 }, { "epoch": 0.17, "eval_accuracy": 0.3103448275862069, "eval_loss": 1.9885331392288208, "eval_runtime": 132.7666, "eval_samples_per_second": 1.311, "eval_steps_per_second": 0.166, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.00019759897828863348, "loss": 2.2471, "step": 50 }, { "epoch": 0.19, "eval_accuracy": 0.19540229885057472, "eval_loss": 2.141151189804077, "eval_runtime": 132.4565, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.166, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00019734355044699872, "loss": 1.8606, "step": 55 }, { "epoch": 0.21, "eval_accuracy": 0.3735632183908046, "eval_loss": 1.9202948808670044, "eval_runtime": 133.0296, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 55 }, { "epoch": 0.23, "learning_rate": 0.00019713920817369094, "loss": 1.8904, "step": 60 }, { "epoch": 0.23, "eval_accuracy": 0.3735632183908046, "eval_loss": 1.8267676830291748, "eval_runtime": 140.614, "eval_samples_per_second": 1.237, "eval_steps_per_second": 0.156, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.0001968837803320562, "loss": 1.9574, "step": 65 }, { "epoch": 0.25, "eval_accuracy": 0.3563218390804598, "eval_loss": 1.690549612045288, "eval_runtime": 140.4999, "eval_samples_per_second": 1.238, "eval_steps_per_second": 0.157, "step": 65 }, { "epoch": 0.27, "learning_rate": 0.00019662835249042147, "loss": 1.7038, "step": 70 }, { "epoch": 0.27, "eval_accuracy": 0.3505747126436782, "eval_loss": 1.8225674629211426, "eval_runtime": 141.0186, "eval_samples_per_second": 1.234, "eval_steps_per_second": 0.156, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.00019637292464878673, "loss": 1.9195, "step": 75 }, { "epoch": 0.29, "eval_accuracy": 0.3793103448275862, "eval_loss": 1.6538840532302856, "eval_runtime": 141.7043, "eval_samples_per_second": 1.228, "eval_steps_per_second": 0.155, "step": 75 }, { "epoch": 0.31, "learning_rate": 0.000196117496807152, "loss": 1.8161, "step": 80 }, { "epoch": 0.31, "eval_accuracy": 0.3275862068965517, "eval_loss": 1.7468079328536987, "eval_runtime": 141.1574, "eval_samples_per_second": 1.233, "eval_steps_per_second": 0.156, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.00019586206896551723, "loss": 1.5665, "step": 85 }, { "epoch": 0.33, "eval_accuracy": 0.3218390804597701, "eval_loss": 1.797639012336731, "eval_runtime": 141.5066, "eval_samples_per_second": 1.23, "eval_steps_per_second": 0.155, "step": 85 }, { "epoch": 0.34, "learning_rate": 0.00019560664112388252, "loss": 1.5756, "step": 90 }, { "epoch": 0.34, "eval_accuracy": 0.4367816091954023, "eval_loss": 1.6475659608840942, "eval_runtime": 141.7711, "eval_samples_per_second": 1.227, "eval_steps_per_second": 0.155, "step": 90 }, { "epoch": 0.36, "learning_rate": 0.00019535121328224776, "loss": 1.5938, "step": 95 }, { "epoch": 0.36, "eval_accuracy": 0.3333333333333333, "eval_loss": 1.6348626613616943, "eval_runtime": 141.8628, "eval_samples_per_second": 1.227, "eval_steps_per_second": 0.155, "step": 95 }, { "epoch": 0.38, "learning_rate": 0.00019509578544061305, "loss": 1.8887, "step": 100 }, { "epoch": 0.38, "eval_accuracy": 0.3103448275862069, "eval_loss": 1.6541311740875244, "eval_runtime": 141.8903, "eval_samples_per_second": 1.226, "eval_steps_per_second": 0.155, "step": 100 }, { "epoch": 0.4, "learning_rate": 0.00019484035759897829, "loss": 1.7326, "step": 105 }, { "epoch": 0.4, "eval_accuracy": 0.3275862068965517, "eval_loss": 1.8677045106887817, "eval_runtime": 141.7931, "eval_samples_per_second": 1.227, "eval_steps_per_second": 0.155, "step": 105 }, { "epoch": 0.42, "learning_rate": 0.00019458492975734358, "loss": 2.0043, "step": 110 }, { "epoch": 0.42, "eval_accuracy": 0.2988505747126437, "eval_loss": 2.1178438663482666, "eval_runtime": 141.8724, "eval_samples_per_second": 1.226, "eval_steps_per_second": 0.155, "step": 110 }, { "epoch": 0.44, "learning_rate": 0.0001943295019157088, "loss": 1.4501, "step": 115 }, { "epoch": 0.44, "eval_accuracy": 0.43103448275862066, "eval_loss": 1.6648517847061157, "eval_runtime": 144.1195, "eval_samples_per_second": 1.207, "eval_steps_per_second": 0.153, "step": 115 }, { "epoch": 0.46, "learning_rate": 0.00019407407407407408, "loss": 1.2894, "step": 120 }, { "epoch": 0.46, "eval_accuracy": 0.4425287356321839, "eval_loss": 1.594812273979187, "eval_runtime": 137.6698, "eval_samples_per_second": 1.264, "eval_steps_per_second": 0.16, "step": 120 }, { "epoch": 0.48, "learning_rate": 0.00019381864623243934, "loss": 1.4052, "step": 125 }, { "epoch": 0.48, "eval_accuracy": 0.39655172413793105, "eval_loss": 1.7239457368850708, "eval_runtime": 143.1925, "eval_samples_per_second": 1.215, "eval_steps_per_second": 0.154, "step": 125 }, { "epoch": 0.5, "learning_rate": 0.0001935632183908046, "loss": 1.8974, "step": 130 }, { "epoch": 0.5, "eval_accuracy": 0.3218390804597701, "eval_loss": 1.5980931520462036, "eval_runtime": 140.1647, "eval_samples_per_second": 1.241, "eval_steps_per_second": 0.157, "step": 130 }, { "epoch": 0.52, "learning_rate": 0.00019330779054916987, "loss": 1.7137, "step": 135 }, { "epoch": 0.52, "eval_accuracy": 0.3793103448275862, "eval_loss": 1.4834296703338623, "eval_runtime": 140.4065, "eval_samples_per_second": 1.239, "eval_steps_per_second": 0.157, "step": 135 }, { "epoch": 0.54, "learning_rate": 0.00019305236270753513, "loss": 1.3159, "step": 140 }, { "epoch": 0.54, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.342922329902649, "eval_runtime": 140.6082, "eval_samples_per_second": 1.237, "eval_steps_per_second": 0.156, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.0001927969348659004, "loss": 1.4547, "step": 145 }, { "epoch": 0.56, "eval_accuracy": 0.43103448275862066, "eval_loss": 1.4868438243865967, "eval_runtime": 142.2009, "eval_samples_per_second": 1.224, "eval_steps_per_second": 0.155, "step": 145 }, { "epoch": 0.57, "learning_rate": 0.00019254150702426566, "loss": 1.103, "step": 150 }, { "epoch": 0.57, "eval_accuracy": 0.40229885057471265, "eval_loss": 1.5722764730453491, "eval_runtime": 140.4303, "eval_samples_per_second": 1.239, "eval_steps_per_second": 0.157, "step": 150 }, { "epoch": 0.59, "learning_rate": 0.00019228607918263092, "loss": 1.8516, "step": 155 }, { "epoch": 0.59, "eval_accuracy": 0.47126436781609193, "eval_loss": 1.3277612924575806, "eval_runtime": 140.4968, "eval_samples_per_second": 1.238, "eval_steps_per_second": 0.157, "step": 155 }, { "epoch": 0.61, "learning_rate": 0.00019203065134099618, "loss": 1.3584, "step": 160 }, { "epoch": 0.61, "eval_accuracy": 0.4425287356321839, "eval_loss": 1.4962925910949707, "eval_runtime": 140.6072, "eval_samples_per_second": 1.237, "eval_steps_per_second": 0.156, "step": 160 }, { "epoch": 0.63, "learning_rate": 0.00019177522349936145, "loss": 1.7123, "step": 165 }, { "epoch": 0.63, "eval_accuracy": 0.42528735632183906, "eval_loss": 1.4932773113250732, "eval_runtime": 137.6936, "eval_samples_per_second": 1.264, "eval_steps_per_second": 0.16, "step": 165 }, { "epoch": 0.65, "learning_rate": 0.0001915197956577267, "loss": 1.123, "step": 170 }, { "epoch": 0.65, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.396934986114502, "eval_runtime": 137.8732, "eval_samples_per_second": 1.262, "eval_steps_per_second": 0.16, "step": 170 }, { "epoch": 0.67, "learning_rate": 0.00019126436781609197, "loss": 1.3548, "step": 175 }, { "epoch": 0.67, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.5320322513580322, "eval_runtime": 133.0083, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 175 }, { "epoch": 0.69, "learning_rate": 0.00019100893997445724, "loss": 1.7216, "step": 180 }, { "epoch": 0.69, "eval_accuracy": 0.39655172413793105, "eval_loss": 1.4706974029541016, "eval_runtime": 134.1308, "eval_samples_per_second": 1.297, "eval_steps_per_second": 0.164, "step": 180 }, { "epoch": 0.71, "learning_rate": 0.0001907535121328225, "loss": 1.3303, "step": 185 }, { "epoch": 0.71, "eval_accuracy": 0.39655172413793105, "eval_loss": 1.4342669248580933, "eval_runtime": 133.9911, "eval_samples_per_second": 1.299, "eval_steps_per_second": 0.164, "step": 185 }, { "epoch": 0.73, "learning_rate": 0.00019049808429118774, "loss": 1.4668, "step": 190 }, { "epoch": 0.73, "eval_accuracy": 0.4425287356321839, "eval_loss": 1.375001311302185, "eval_runtime": 133.25, "eval_samples_per_second": 1.306, "eval_steps_per_second": 0.165, "step": 190 }, { "epoch": 0.75, "learning_rate": 0.00019024265644955303, "loss": 1.5037, "step": 195 }, { "epoch": 0.75, "eval_accuracy": 0.4367816091954023, "eval_loss": 1.3593645095825195, "eval_runtime": 133.4768, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.165, "step": 195 }, { "epoch": 0.77, "learning_rate": 0.00018998722860791826, "loss": 1.2836, "step": 200 }, { "epoch": 0.77, "eval_accuracy": 0.4827586206896552, "eval_loss": 1.3348760604858398, "eval_runtime": 133.1452, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 200 }, { "epoch": 0.79, "learning_rate": 0.00018973180076628355, "loss": 1.5183, "step": 205 }, { "epoch": 0.79, "eval_accuracy": 0.3275862068965517, "eval_loss": 1.6030648946762085, "eval_runtime": 133.0355, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 205 }, { "epoch": 0.8, "learning_rate": 0.0001894763729246488, "loss": 1.4127, "step": 210 }, { "epoch": 0.8, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.4263323545455933, "eval_runtime": 133.148, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 210 }, { "epoch": 0.82, "learning_rate": 0.00018922094508301408, "loss": 1.1152, "step": 215 }, { "epoch": 0.82, "eval_accuracy": 0.4540229885057471, "eval_loss": 1.3836369514465332, "eval_runtime": 136.7056, "eval_samples_per_second": 1.273, "eval_steps_per_second": 0.161, "step": 215 }, { "epoch": 0.84, "learning_rate": 0.00018896551724137932, "loss": 1.5974, "step": 220 }, { "epoch": 0.84, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.433830738067627, "eval_runtime": 132.8306, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 220 }, { "epoch": 0.86, "learning_rate": 0.00018871008939974458, "loss": 1.6355, "step": 225 }, { "epoch": 0.86, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.3150310516357422, "eval_runtime": 133.4297, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.165, "step": 225 }, { "epoch": 0.88, "learning_rate": 0.00018845466155810984, "loss": 1.3566, "step": 230 }, { "epoch": 0.88, "eval_accuracy": 0.4482758620689655, "eval_loss": 1.5567545890808105, "eval_runtime": 132.1689, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.166, "step": 230 }, { "epoch": 0.9, "learning_rate": 0.0001881992337164751, "loss": 1.9474, "step": 235 }, { "epoch": 0.9, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.5523122549057007, "eval_runtime": 132.1649, "eval_samples_per_second": 1.317, "eval_steps_per_second": 0.166, "step": 235 }, { "epoch": 0.92, "learning_rate": 0.00018794380587484037, "loss": 1.0851, "step": 240 }, { "epoch": 0.92, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.5121805667877197, "eval_runtime": 131.6317, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 240 }, { "epoch": 0.94, "learning_rate": 0.00018768837803320563, "loss": 1.1129, "step": 245 }, { "epoch": 0.94, "eval_accuracy": 0.41954022988505746, "eval_loss": 1.6791616678237915, "eval_runtime": 131.7928, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 245 }, { "epoch": 0.96, "learning_rate": 0.0001874329501915709, "loss": 0.9682, "step": 250 }, { "epoch": 0.96, "eval_accuracy": 0.4827586206896552, "eval_loss": 1.5817574262619019, "eval_runtime": 131.5171, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 250 }, { "epoch": 0.98, "learning_rate": 0.00018717752234993616, "loss": 1.9887, "step": 255 }, { "epoch": 0.98, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.4138919115066528, "eval_runtime": 131.7638, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 255 }, { "epoch": 1.0, "learning_rate": 0.0001869220945083014, "loss": 1.3416, "step": 260 }, { "epoch": 1.0, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.4067350625991821, "eval_runtime": 131.7587, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 260 }, { "epoch": 1.02, "learning_rate": 0.0001866666666666667, "loss": 1.4848, "step": 265 }, { "epoch": 1.02, "eval_accuracy": 0.4425287356321839, "eval_loss": 1.7022287845611572, "eval_runtime": 131.6479, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 265 }, { "epoch": 1.03, "learning_rate": 0.00018641123882503192, "loss": 1.1562, "step": 270 }, { "epoch": 1.03, "eval_accuracy": 0.4540229885057471, "eval_loss": 1.6273655891418457, "eval_runtime": 134.6063, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 270 }, { "epoch": 1.05, "learning_rate": 0.0001861558109833972, "loss": 1.3024, "step": 275 }, { "epoch": 1.05, "eval_accuracy": 0.41954022988505746, "eval_loss": 1.60133695602417, "eval_runtime": 131.5705, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 275 }, { "epoch": 1.07, "learning_rate": 0.00018590038314176245, "loss": 1.2188, "step": 280 }, { "epoch": 1.07, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.4648607969284058, "eval_runtime": 131.6668, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 280 }, { "epoch": 1.09, "learning_rate": 0.00018564495530012774, "loss": 1.5389, "step": 285 }, { "epoch": 1.09, "eval_accuracy": 0.4540229885057471, "eval_loss": 1.4482660293579102, "eval_runtime": 131.5739, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 285 }, { "epoch": 1.11, "learning_rate": 0.00018538952745849298, "loss": 1.4023, "step": 290 }, { "epoch": 1.11, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.3087146282196045, "eval_runtime": 134.5049, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 290 }, { "epoch": 1.13, "learning_rate": 0.00018513409961685824, "loss": 1.3401, "step": 295 }, { "epoch": 1.13, "eval_accuracy": 0.4942528735632184, "eval_loss": 1.3514604568481445, "eval_runtime": 133.6657, "eval_samples_per_second": 1.302, "eval_steps_per_second": 0.165, "step": 295 }, { "epoch": 1.15, "learning_rate": 0.0001848786717752235, "loss": 1.6219, "step": 300 }, { "epoch": 1.15, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.301418423652649, "eval_runtime": 137.1049, "eval_samples_per_second": 1.269, "eval_steps_per_second": 0.16, "step": 300 }, { "epoch": 1.17, "learning_rate": 0.00018462324393358877, "loss": 1.2755, "step": 305 }, { "epoch": 1.17, "eval_accuracy": 0.4942528735632184, "eval_loss": 1.3619904518127441, "eval_runtime": 133.0134, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 305 }, { "epoch": 1.19, "learning_rate": 0.00018436781609195403, "loss": 1.2092, "step": 310 }, { "epoch": 1.19, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.1019761562347412, "eval_runtime": 136.2481, "eval_samples_per_second": 1.277, "eval_steps_per_second": 0.161, "step": 310 }, { "epoch": 1.21, "learning_rate": 0.0001841123882503193, "loss": 1.0885, "step": 315 }, { "epoch": 1.21, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.226359248161316, "eval_runtime": 132.4179, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.166, "step": 315 }, { "epoch": 1.23, "learning_rate": 0.00018385696040868456, "loss": 1.7791, "step": 320 }, { "epoch": 1.23, "eval_accuracy": 0.4885057471264368, "eval_loss": 1.3002170324325562, "eval_runtime": 132.5532, "eval_samples_per_second": 1.313, "eval_steps_per_second": 0.166, "step": 320 }, { "epoch": 1.25, "learning_rate": 0.00018360153256704982, "loss": 1.1917, "step": 325 }, { "epoch": 1.25, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.3063517808914185, "eval_runtime": 136.0253, "eval_samples_per_second": 1.279, "eval_steps_per_second": 0.162, "step": 325 }, { "epoch": 1.26, "learning_rate": 0.00018334610472541506, "loss": 1.2198, "step": 330 }, { "epoch": 1.26, "eval_accuracy": 0.5, "eval_loss": 1.2917166948318481, "eval_runtime": 132.7461, "eval_samples_per_second": 1.311, "eval_steps_per_second": 0.166, "step": 330 }, { "epoch": 1.28, "learning_rate": 0.00018309067688378035, "loss": 1.1334, "step": 335 }, { "epoch": 1.28, "eval_accuracy": 0.4827586206896552, "eval_loss": 1.4044328927993774, "eval_runtime": 134.1548, "eval_samples_per_second": 1.297, "eval_steps_per_second": 0.164, "step": 335 }, { "epoch": 1.3, "learning_rate": 0.00018283524904214558, "loss": 0.7824, "step": 340 }, { "epoch": 1.3, "eval_accuracy": 0.5344827586206896, "eval_loss": 1.2563246488571167, "eval_runtime": 134.0894, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 340 }, { "epoch": 1.32, "learning_rate": 0.00018257982120051087, "loss": 1.5005, "step": 345 }, { "epoch": 1.32, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.3932344913482666, "eval_runtime": 132.8468, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 345 }, { "epoch": 1.34, "learning_rate": 0.0001823243933588761, "loss": 1.3661, "step": 350 }, { "epoch": 1.34, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.5287795066833496, "eval_runtime": 133.3154, "eval_samples_per_second": 1.305, "eval_steps_per_second": 0.165, "step": 350 }, { "epoch": 1.36, "learning_rate": 0.0001820689655172414, "loss": 1.0608, "step": 355 }, { "epoch": 1.36, "eval_accuracy": 0.4827586206896552, "eval_loss": 1.6409446001052856, "eval_runtime": 136.3939, "eval_samples_per_second": 1.276, "eval_steps_per_second": 0.161, "step": 355 }, { "epoch": 1.38, "learning_rate": 0.00018181353767560664, "loss": 1.3781, "step": 360 }, { "epoch": 1.38, "eval_accuracy": 0.41379310344827586, "eval_loss": 1.4301259517669678, "eval_runtime": 132.5893, "eval_samples_per_second": 1.312, "eval_steps_per_second": 0.166, "step": 360 }, { "epoch": 1.4, "learning_rate": 0.0001815581098339719, "loss": 1.3579, "step": 365 }, { "epoch": 1.4, "eval_accuracy": 0.47126436781609193, "eval_loss": 1.3473687171936035, "eval_runtime": 135.8729, "eval_samples_per_second": 1.281, "eval_steps_per_second": 0.162, "step": 365 }, { "epoch": 1.42, "learning_rate": 0.00018130268199233716, "loss": 1.536, "step": 370 }, { "epoch": 1.42, "eval_accuracy": 0.43103448275862066, "eval_loss": 1.419403076171875, "eval_runtime": 133.0684, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 370 }, { "epoch": 1.44, "learning_rate": 0.00018104725415070243, "loss": 1.3282, "step": 375 }, { "epoch": 1.44, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.365115761756897, "eval_runtime": 132.7207, "eval_samples_per_second": 1.311, "eval_steps_per_second": 0.166, "step": 375 }, { "epoch": 1.46, "learning_rate": 0.0001807918263090677, "loss": 1.4472, "step": 380 }, { "epoch": 1.46, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.1580694913864136, "eval_runtime": 133.2179, "eval_samples_per_second": 1.306, "eval_steps_per_second": 0.165, "step": 380 }, { "epoch": 1.48, "learning_rate": 0.00018053639846743295, "loss": 1.3804, "step": 385 }, { "epoch": 1.48, "eval_accuracy": 0.43103448275862066, "eval_loss": 1.3844115734100342, "eval_runtime": 131.9177, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.167, "step": 385 }, { "epoch": 1.49, "learning_rate": 0.00018028097062579822, "loss": 1.3419, "step": 390 }, { "epoch": 1.49, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.1873112916946411, "eval_runtime": 131.4991, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 390 }, { "epoch": 1.51, "learning_rate": 0.00018002554278416348, "loss": 0.9519, "step": 395 }, { "epoch": 1.51, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.1380841732025146, "eval_runtime": 131.7687, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 395 }, { "epoch": 1.53, "learning_rate": 0.00017977011494252874, "loss": 0.9128, "step": 400 }, { "epoch": 1.53, "eval_accuracy": 0.47126436781609193, "eval_loss": 1.3454612493515015, "eval_runtime": 132.2394, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.166, "step": 400 }, { "epoch": 1.55, "learning_rate": 0.000179514687100894, "loss": 1.4765, "step": 405 }, { "epoch": 1.55, "eval_accuracy": 0.4885057471264368, "eval_loss": 1.3226633071899414, "eval_runtime": 131.5246, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 405 }, { "epoch": 1.57, "learning_rate": 0.00017925925925925927, "loss": 1.3164, "step": 410 }, { "epoch": 1.57, "eval_accuracy": 0.5344827586206896, "eval_loss": 1.489040732383728, "eval_runtime": 131.6528, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 410 }, { "epoch": 1.59, "learning_rate": 0.00017900383141762453, "loss": 1.1459, "step": 415 }, { "epoch": 1.59, "eval_accuracy": 0.603448275862069, "eval_loss": 0.9928140044212341, "eval_runtime": 132.4729, "eval_samples_per_second": 1.313, "eval_steps_per_second": 0.166, "step": 415 }, { "epoch": 1.61, "learning_rate": 0.0001787484035759898, "loss": 0.8739, "step": 420 }, { "epoch": 1.61, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.1961568593978882, "eval_runtime": 131.6996, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 420 }, { "epoch": 1.63, "learning_rate": 0.00017849297573435506, "loss": 1.0313, "step": 425 }, { "epoch": 1.63, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.2308257818222046, "eval_runtime": 131.6254, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 425 }, { "epoch": 1.65, "learning_rate": 0.00017823754789272032, "loss": 1.1317, "step": 430 }, { "epoch": 1.65, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.2264448404312134, "eval_runtime": 131.6761, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 430 }, { "epoch": 1.67, "learning_rate": 0.0001779821200510856, "loss": 1.022, "step": 435 }, { "epoch": 1.67, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.1023842096328735, "eval_runtime": 131.5135, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 435 }, { "epoch": 1.69, "learning_rate": 0.00017772669220945085, "loss": 0.9706, "step": 440 }, { "epoch": 1.69, "eval_accuracy": 0.5517241379310345, "eval_loss": 1.372861623764038, "eval_runtime": 131.6429, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 440 }, { "epoch": 1.7, "learning_rate": 0.00017747126436781609, "loss": 1.7438, "step": 445 }, { "epoch": 1.7, "eval_accuracy": 0.5, "eval_loss": 1.346054196357727, "eval_runtime": 131.6993, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 445 }, { "epoch": 1.72, "learning_rate": 0.00017721583652618138, "loss": 1.6391, "step": 450 }, { "epoch": 1.72, "eval_accuracy": 0.603448275862069, "eval_loss": 1.139776587486267, "eval_runtime": 131.5962, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 450 }, { "epoch": 1.74, "learning_rate": 0.0001769604086845466, "loss": 0.824, "step": 455 }, { "epoch": 1.74, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.4202344417572021, "eval_runtime": 131.5281, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 455 }, { "epoch": 1.76, "learning_rate": 0.0001767049808429119, "loss": 1.6423, "step": 460 }, { "epoch": 1.76, "eval_accuracy": 0.5574712643678161, "eval_loss": 1.1726796627044678, "eval_runtime": 131.5031, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 460 }, { "epoch": 1.78, "learning_rate": 0.00017644955300127714, "loss": 1.0754, "step": 465 }, { "epoch": 1.78, "eval_accuracy": 0.5287356321839081, "eval_loss": 1.1190135478973389, "eval_runtime": 131.6364, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 465 }, { "epoch": 1.8, "learning_rate": 0.00017619412515964243, "loss": 1.0556, "step": 470 }, { "epoch": 1.8, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.3851169347763062, "eval_runtime": 132.8284, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 470 }, { "epoch": 1.82, "learning_rate": 0.00017593869731800767, "loss": 1.0506, "step": 475 }, { "epoch": 1.82, "eval_accuracy": 0.4367816091954023, "eval_loss": 1.5636210441589355, "eval_runtime": 131.5546, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 475 }, { "epoch": 1.84, "learning_rate": 0.00017568326947637293, "loss": 1.6747, "step": 480 }, { "epoch": 1.84, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.355259895324707, "eval_runtime": 131.7784, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 480 }, { "epoch": 1.86, "learning_rate": 0.0001754278416347382, "loss": 1.3546, "step": 485 }, { "epoch": 1.86, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.2008614540100098, "eval_runtime": 131.685, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 485 }, { "epoch": 1.88, "learning_rate": 0.00017517241379310346, "loss": 1.1251, "step": 490 }, { "epoch": 1.88, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0508471727371216, "eval_runtime": 134.9935, "eval_samples_per_second": 1.289, "eval_steps_per_second": 0.163, "step": 490 }, { "epoch": 1.9, "learning_rate": 0.00017491698595146872, "loss": 1.1712, "step": 495 }, { "epoch": 1.9, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.0730408430099487, "eval_runtime": 131.6035, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 495 }, { "epoch": 1.92, "learning_rate": 0.00017466155810983398, "loss": 1.1422, "step": 500 }, { "epoch": 1.92, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9529359936714172, "eval_runtime": 131.4783, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 500 }, { "epoch": 1.93, "learning_rate": 0.00017440613026819925, "loss": 0.7611, "step": 505 }, { "epoch": 1.93, "eval_accuracy": 0.6264367816091954, "eval_loss": 0.8788040280342102, "eval_runtime": 131.3992, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 505 }, { "epoch": 1.95, "learning_rate": 0.0001741507024265645, "loss": 0.8955, "step": 510 }, { "epoch": 1.95, "eval_accuracy": 0.5919540229885057, "eval_loss": 0.9696133732795715, "eval_runtime": 131.6903, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 510 }, { "epoch": 1.97, "learning_rate": 0.00017389527458492975, "loss": 1.1928, "step": 515 }, { "epoch": 1.97, "eval_accuracy": 0.5977011494252874, "eval_loss": 0.977599561214447, "eval_runtime": 131.6782, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 515 }, { "epoch": 1.99, "learning_rate": 0.00017363984674329504, "loss": 1.0149, "step": 520 }, { "epoch": 1.99, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.3859111070632935, "eval_runtime": 134.5921, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 520 }, { "epoch": 2.01, "learning_rate": 0.00017338441890166027, "loss": 1.3012, "step": 525 }, { "epoch": 2.01, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.179227352142334, "eval_runtime": 131.5454, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 525 }, { "epoch": 2.03, "learning_rate": 0.00017312899106002556, "loss": 1.1046, "step": 530 }, { "epoch": 2.03, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.3357328176498413, "eval_runtime": 135.5522, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.162, "step": 530 }, { "epoch": 2.05, "learning_rate": 0.0001728735632183908, "loss": 0.818, "step": 535 }, { "epoch": 2.05, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.2086304426193237, "eval_runtime": 131.7474, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 535 }, { "epoch": 2.07, "learning_rate": 0.0001726181353767561, "loss": 0.9141, "step": 540 }, { "epoch": 2.07, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.3623217344284058, "eval_runtime": 131.7011, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 540 }, { "epoch": 2.09, "learning_rate": 0.00017236270753512133, "loss": 1.3053, "step": 545 }, { "epoch": 2.09, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.2260451316833496, "eval_runtime": 131.6132, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 545 }, { "epoch": 2.11, "learning_rate": 0.0001721072796934866, "loss": 0.8546, "step": 550 }, { "epoch": 2.11, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.1196646690368652, "eval_runtime": 131.5362, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 550 }, { "epoch": 2.13, "learning_rate": 0.00017185185185185185, "loss": 1.0537, "step": 555 }, { "epoch": 2.13, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.2455346584320068, "eval_runtime": 131.6252, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 555 }, { "epoch": 2.15, "learning_rate": 0.00017159642401021712, "loss": 1.1647, "step": 560 }, { "epoch": 2.15, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.205663800239563, "eval_runtime": 131.6224, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 560 }, { "epoch": 2.16, "learning_rate": 0.00017134099616858238, "loss": 1.3766, "step": 565 }, { "epoch": 2.16, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.0733550786972046, "eval_runtime": 131.7488, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 565 }, { "epoch": 2.18, "learning_rate": 0.00017108556832694764, "loss": 1.0225, "step": 570 }, { "epoch": 2.18, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.123944878578186, "eval_runtime": 132.6409, "eval_samples_per_second": 1.312, "eval_steps_per_second": 0.166, "step": 570 }, { "epoch": 2.2, "learning_rate": 0.0001708301404853129, "loss": 1.6453, "step": 575 }, { "epoch": 2.2, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.2486422061920166, "eval_runtime": 131.72, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 575 }, { "epoch": 2.22, "learning_rate": 0.00017057471264367817, "loss": 1.0113, "step": 580 }, { "epoch": 2.22, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.1964409351348877, "eval_runtime": 131.7543, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 580 }, { "epoch": 2.24, "learning_rate": 0.00017031928480204343, "loss": 1.0514, "step": 585 }, { "epoch": 2.24, "eval_accuracy": 0.5287356321839081, "eval_loss": 1.1057004928588867, "eval_runtime": 131.7576, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 585 }, { "epoch": 2.26, "learning_rate": 0.0001700638569604087, "loss": 0.9404, "step": 590 }, { "epoch": 2.26, "eval_accuracy": 0.5919540229885057, "eval_loss": 0.9724916219711304, "eval_runtime": 132.3, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.166, "step": 590 }, { "epoch": 2.28, "learning_rate": 0.00016980842911877396, "loss": 1.0388, "step": 595 }, { "epoch": 2.28, "eval_accuracy": 0.5517241379310345, "eval_loss": 1.1437703371047974, "eval_runtime": 131.5125, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 595 }, { "epoch": 2.3, "learning_rate": 0.00016955300127713922, "loss": 1.0182, "step": 600 }, { "epoch": 2.3, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.0556267499923706, "eval_runtime": 131.6259, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 600 }, { "epoch": 2.32, "learning_rate": 0.0001692975734355045, "loss": 0.894, "step": 605 }, { "epoch": 2.32, "eval_accuracy": 0.4885057471264368, "eval_loss": 1.2667330503463745, "eval_runtime": 134.6245, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 605 }, { "epoch": 2.34, "learning_rate": 0.00016904214559386975, "loss": 0.8542, "step": 610 }, { "epoch": 2.34, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.1101963520050049, "eval_runtime": 131.7211, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 610 }, { "epoch": 2.36, "learning_rate": 0.000168786717752235, "loss": 1.1322, "step": 615 }, { "epoch": 2.36, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.0164850950241089, "eval_runtime": 131.7375, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 615 }, { "epoch": 2.38, "learning_rate": 0.00016853128991060025, "loss": 1.0992, "step": 620 }, { "epoch": 2.38, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0806705951690674, "eval_runtime": 131.6335, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 620 }, { "epoch": 2.39, "learning_rate": 0.00016827586206896554, "loss": 0.7137, "step": 625 }, { "epoch": 2.39, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.1090716123580933, "eval_runtime": 135.3852, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 625 }, { "epoch": 2.41, "learning_rate": 0.00016802043422733078, "loss": 0.8266, "step": 630 }, { "epoch": 2.41, "eval_accuracy": 0.5517241379310345, "eval_loss": 1.1446123123168945, "eval_runtime": 131.8178, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 630 }, { "epoch": 2.43, "learning_rate": 0.00016776500638569607, "loss": 0.7162, "step": 635 }, { "epoch": 2.43, "eval_accuracy": 0.5574712643678161, "eval_loss": 1.2635241746902466, "eval_runtime": 131.6312, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 635 }, { "epoch": 2.45, "learning_rate": 0.0001675095785440613, "loss": 1.3462, "step": 640 }, { "epoch": 2.45, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.2049150466918945, "eval_runtime": 131.4819, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 640 }, { "epoch": 2.47, "learning_rate": 0.0001672541507024266, "loss": 1.1599, "step": 645 }, { "epoch": 2.47, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.0467811822891235, "eval_runtime": 131.4566, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 645 }, { "epoch": 2.49, "learning_rate": 0.00016699872286079183, "loss": 0.9418, "step": 650 }, { "epoch": 2.49, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.0609161853790283, "eval_runtime": 131.6905, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 650 }, { "epoch": 2.51, "learning_rate": 0.0001667432950191571, "loss": 0.6358, "step": 655 }, { "epoch": 2.51, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0026150941848755, "eval_runtime": 131.8875, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.167, "step": 655 }, { "epoch": 2.53, "learning_rate": 0.00016648786717752236, "loss": 0.9866, "step": 660 }, { "epoch": 2.53, "eval_accuracy": 0.5287356321839081, "eval_loss": 1.2802002429962158, "eval_runtime": 131.604, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 660 }, { "epoch": 2.55, "learning_rate": 0.00016623243933588762, "loss": 0.7078, "step": 665 }, { "epoch": 2.55, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.2605894804000854, "eval_runtime": 131.9489, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.167, "step": 665 }, { "epoch": 2.57, "learning_rate": 0.00016597701149425288, "loss": 1.317, "step": 670 }, { "epoch": 2.57, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.24140465259552, "eval_runtime": 133.607, "eval_samples_per_second": 1.302, "eval_steps_per_second": 0.165, "step": 670 }, { "epoch": 2.59, "learning_rate": 0.00016572158365261815, "loss": 1.725, "step": 675 }, { "epoch": 2.59, "eval_accuracy": 0.6149425287356322, "eval_loss": 0.9985790252685547, "eval_runtime": 132.9682, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.165, "step": 675 }, { "epoch": 2.61, "learning_rate": 0.0001654661558109834, "loss": 1.1582, "step": 680 }, { "epoch": 2.61, "eval_accuracy": 0.5574712643678161, "eval_loss": 1.0938341617584229, "eval_runtime": 133.1611, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 680 }, { "epoch": 2.62, "learning_rate": 0.00016521072796934867, "loss": 0.9204, "step": 685 }, { "epoch": 2.62, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.0171951055526733, "eval_runtime": 133.1482, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 685 }, { "epoch": 2.64, "learning_rate": 0.0001649553001277139, "loss": 0.8029, "step": 690 }, { "epoch": 2.64, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9562932252883911, "eval_runtime": 135.7074, "eval_samples_per_second": 1.282, "eval_steps_per_second": 0.162, "step": 690 }, { "epoch": 2.66, "learning_rate": 0.0001646998722860792, "loss": 1.0699, "step": 695 }, { "epoch": 2.66, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.8641893863677979, "eval_runtime": 137.258, "eval_samples_per_second": 1.268, "eval_steps_per_second": 0.16, "step": 695 }, { "epoch": 2.68, "learning_rate": 0.00016444444444444444, "loss": 0.8689, "step": 700 }, { "epoch": 2.68, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8628016710281372, "eval_runtime": 136.3069, "eval_samples_per_second": 1.277, "eval_steps_per_second": 0.161, "step": 700 }, { "epoch": 2.7, "learning_rate": 0.00016418901660280973, "loss": 0.8868, "step": 705 }, { "epoch": 2.7, "eval_accuracy": 0.6379310344827587, "eval_loss": 0.924765944480896, "eval_runtime": 133.9642, "eval_samples_per_second": 1.299, "eval_steps_per_second": 0.164, "step": 705 }, { "epoch": 2.72, "learning_rate": 0.00016393358876117496, "loss": 1.1199, "step": 710 }, { "epoch": 2.72, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.87518310546875, "eval_runtime": 134.5558, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.164, "step": 710 }, { "epoch": 2.74, "learning_rate": 0.00016367816091954025, "loss": 0.8855, "step": 715 }, { "epoch": 2.74, "eval_accuracy": 0.4942528735632184, "eval_loss": 1.2723013162612915, "eval_runtime": 133.0158, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 715 }, { "epoch": 2.76, "learning_rate": 0.0001634227330779055, "loss": 0.8273, "step": 720 }, { "epoch": 2.76, "eval_accuracy": 0.5804597701149425, "eval_loss": 1.177869200706482, "eval_runtime": 133.1085, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 720 }, { "epoch": 2.78, "learning_rate": 0.00016316730523627075, "loss": 1.1579, "step": 725 }, { "epoch": 2.78, "eval_accuracy": 0.6149425287356322, "eval_loss": 0.9605039358139038, "eval_runtime": 133.5111, "eval_samples_per_second": 1.303, "eval_steps_per_second": 0.165, "step": 725 }, { "epoch": 2.8, "learning_rate": 0.00016291187739463602, "loss": 1.2568, "step": 730 }, { "epoch": 2.8, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0269392728805542, "eval_runtime": 136.0303, "eval_samples_per_second": 1.279, "eval_steps_per_second": 0.162, "step": 730 }, { "epoch": 2.82, "learning_rate": 0.00016265644955300128, "loss": 1.4258, "step": 735 }, { "epoch": 2.82, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.103205919265747, "eval_runtime": 133.1609, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 735 }, { "epoch": 2.84, "learning_rate": 0.00016240102171136654, "loss": 0.9129, "step": 740 }, { "epoch": 2.84, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9196251034736633, "eval_runtime": 132.8322, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 740 }, { "epoch": 2.85, "learning_rate": 0.0001621455938697318, "loss": 1.1291, "step": 745 }, { "epoch": 2.85, "eval_accuracy": 0.5804597701149425, "eval_loss": 1.008170485496521, "eval_runtime": 132.7388, "eval_samples_per_second": 1.311, "eval_steps_per_second": 0.166, "step": 745 }, { "epoch": 2.87, "learning_rate": 0.00016189016602809707, "loss": 0.679, "step": 750 }, { "epoch": 2.87, "eval_accuracy": 0.6206896551724138, "eval_loss": 0.9392971992492676, "eval_runtime": 133.0347, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 750 }, { "epoch": 2.89, "learning_rate": 0.00016163473818646233, "loss": 0.8524, "step": 755 }, { "epoch": 2.89, "eval_accuracy": 0.632183908045977, "eval_loss": 0.9525014162063599, "eval_runtime": 133.0788, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 755 }, { "epoch": 2.91, "learning_rate": 0.0001613793103448276, "loss": 0.8104, "step": 760 }, { "epoch": 2.91, "eval_accuracy": 0.5977011494252874, "eval_loss": 1.1698288917541504, "eval_runtime": 131.9602, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.167, "step": 760 }, { "epoch": 2.93, "learning_rate": 0.00016112388250319286, "loss": 0.8986, "step": 765 }, { "epoch": 2.93, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.038144826889038, "eval_runtime": 131.7177, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 765 }, { "epoch": 2.95, "learning_rate": 0.00016086845466155812, "loss": 0.7805, "step": 770 }, { "epoch": 2.95, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.8878708481788635, "eval_runtime": 131.7896, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 770 }, { "epoch": 2.97, "learning_rate": 0.0001606130268199234, "loss": 0.9715, "step": 775 }, { "epoch": 2.97, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.8562762141227722, "eval_runtime": 132.4485, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.166, "step": 775 }, { "epoch": 2.99, "learning_rate": 0.00016035759897828865, "loss": 0.9432, "step": 780 }, { "epoch": 2.99, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.8954753875732422, "eval_runtime": 132.4602, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.166, "step": 780 }, { "epoch": 3.01, "learning_rate": 0.0001601021711366539, "loss": 1.0624, "step": 785 }, { "epoch": 3.01, "eval_accuracy": 0.632183908045977, "eval_loss": 0.9727337956428528, "eval_runtime": 134.6432, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 785 }, { "epoch": 3.03, "learning_rate": 0.00015984674329501918, "loss": 0.9685, "step": 790 }, { "epoch": 3.03, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0076591968536377, "eval_runtime": 131.6852, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 790 }, { "epoch": 3.05, "learning_rate": 0.0001595913154533844, "loss": 0.7053, "step": 795 }, { "epoch": 3.05, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.068995475769043, "eval_runtime": 134.9079, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 795 }, { "epoch": 3.07, "learning_rate": 0.0001593358876117497, "loss": 0.7795, "step": 800 }, { "epoch": 3.07, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.9434211254119873, "eval_runtime": 131.5148, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 800 }, { "epoch": 3.08, "learning_rate": 0.00015908045977011494, "loss": 0.7404, "step": 805 }, { "epoch": 3.08, "eval_accuracy": 0.6264367816091954, "eval_loss": 0.9875686764717102, "eval_runtime": 131.6597, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 805 }, { "epoch": 3.1, "learning_rate": 0.00015882503192848023, "loss": 0.6817, "step": 810 }, { "epoch": 3.1, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9223694801330566, "eval_runtime": 131.6839, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 810 }, { "epoch": 3.12, "learning_rate": 0.00015856960408684547, "loss": 0.4526, "step": 815 }, { "epoch": 3.12, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.0036951303482056, "eval_runtime": 131.8156, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 815 }, { "epoch": 3.14, "learning_rate": 0.00015831417624521076, "loss": 0.8705, "step": 820 }, { "epoch": 3.14, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.1535873413085938, "eval_runtime": 131.6451, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 820 }, { "epoch": 3.16, "learning_rate": 0.000158058748403576, "loss": 0.9392, "step": 825 }, { "epoch": 3.16, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0609076023101807, "eval_runtime": 131.5131, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 825 }, { "epoch": 3.18, "learning_rate": 0.00015780332056194128, "loss": 1.0781, "step": 830 }, { "epoch": 3.18, "eval_accuracy": 0.6494252873563219, "eval_loss": 0.9658277034759521, "eval_runtime": 131.5389, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 830 }, { "epoch": 3.2, "learning_rate": 0.00015754789272030652, "loss": 0.7314, "step": 835 }, { "epoch": 3.2, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.0462124347686768, "eval_runtime": 131.6181, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 835 }, { "epoch": 3.22, "learning_rate": 0.00015729246487867178, "loss": 0.8784, "step": 840 }, { "epoch": 3.22, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9317790269851685, "eval_runtime": 131.6835, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 840 }, { "epoch": 3.24, "learning_rate": 0.00015703703703703705, "loss": 1.361, "step": 845 }, { "epoch": 3.24, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.1364516019821167, "eval_runtime": 131.5188, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 845 }, { "epoch": 3.26, "learning_rate": 0.0001567816091954023, "loss": 0.6983, "step": 850 }, { "epoch": 3.26, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.2935972213745117, "eval_runtime": 131.8128, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 850 }, { "epoch": 3.28, "learning_rate": 0.00015652618135376757, "loss": 1.051, "step": 855 }, { "epoch": 3.28, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.2600092887878418, "eval_runtime": 131.621, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 855 }, { "epoch": 3.3, "learning_rate": 0.00015627075351213284, "loss": 0.6664, "step": 860 }, { "epoch": 3.3, "eval_accuracy": 0.603448275862069, "eval_loss": 1.1138763427734375, "eval_runtime": 131.7652, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 860 }, { "epoch": 3.31, "learning_rate": 0.0001560153256704981, "loss": 1.077, "step": 865 }, { "epoch": 3.31, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.1129592657089233, "eval_runtime": 131.519, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 865 }, { "epoch": 3.33, "learning_rate": 0.00015575989782886336, "loss": 1.2009, "step": 870 }, { "epoch": 3.33, "eval_accuracy": 0.6264367816091954, "eval_loss": 0.9561058282852173, "eval_runtime": 131.4719, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 870 }, { "epoch": 3.35, "learning_rate": 0.0001555044699872286, "loss": 0.8938, "step": 875 }, { "epoch": 3.35, "eval_accuracy": 0.6149425287356322, "eval_loss": 0.999401867389679, "eval_runtime": 134.2227, "eval_samples_per_second": 1.296, "eval_steps_per_second": 0.164, "step": 875 }, { "epoch": 3.37, "learning_rate": 0.0001552490421455939, "loss": 0.6466, "step": 880 }, { "epoch": 3.37, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.9206087589263916, "eval_runtime": 131.6728, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 880 }, { "epoch": 3.39, "learning_rate": 0.00015499361430395913, "loss": 1.0424, "step": 885 }, { "epoch": 3.39, "eval_accuracy": 0.6264367816091954, "eval_loss": 0.9985089302062988, "eval_runtime": 135.0805, "eval_samples_per_second": 1.288, "eval_steps_per_second": 0.163, "step": 885 }, { "epoch": 3.41, "learning_rate": 0.00015473818646232442, "loss": 0.9582, "step": 890 }, { "epoch": 3.41, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.9164769649505615, "eval_runtime": 134.7066, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 890 }, { "epoch": 3.43, "learning_rate": 0.00015448275862068965, "loss": 0.5003, "step": 895 }, { "epoch": 3.43, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.906107485294342, "eval_runtime": 131.7569, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 895 }, { "epoch": 3.45, "learning_rate": 0.00015422733077905494, "loss": 0.5526, "step": 900 }, { "epoch": 3.45, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9473897218704224, "eval_runtime": 131.708, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 900 }, { "epoch": 3.47, "learning_rate": 0.00015397190293742018, "loss": 0.5755, "step": 905 }, { "epoch": 3.47, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8715148568153381, "eval_runtime": 131.756, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 905 }, { "epoch": 3.49, "learning_rate": 0.00015371647509578544, "loss": 1.1355, "step": 910 }, { "epoch": 3.49, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.8548762798309326, "eval_runtime": 131.7286, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 910 }, { "epoch": 3.51, "learning_rate": 0.0001534610472541507, "loss": 1.3261, "step": 915 }, { "epoch": 3.51, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.3770815134048462, "eval_runtime": 131.6422, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 915 }, { "epoch": 3.52, "learning_rate": 0.00015320561941251597, "loss": 1.3727, "step": 920 }, { "epoch": 3.52, "eval_accuracy": 0.6206896551724138, "eval_loss": 0.981780469417572, "eval_runtime": 131.4313, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 920 }, { "epoch": 3.54, "learning_rate": 0.00015295019157088123, "loss": 0.6203, "step": 925 }, { "epoch": 3.54, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9016448855400085, "eval_runtime": 131.6346, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 925 }, { "epoch": 3.56, "learning_rate": 0.0001526947637292465, "loss": 1.0178, "step": 930 }, { "epoch": 3.56, "eval_accuracy": 0.5919540229885057, "eval_loss": 0.9804055094718933, "eval_runtime": 131.7044, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 930 }, { "epoch": 3.58, "learning_rate": 0.00015243933588761176, "loss": 0.5602, "step": 935 }, { "epoch": 3.58, "eval_accuracy": 0.632183908045977, "eval_loss": 1.017970085144043, "eval_runtime": 131.7459, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 935 }, { "epoch": 3.6, "learning_rate": 0.00015218390804597702, "loss": 0.9365, "step": 940 }, { "epoch": 3.6, "eval_accuracy": 0.6379310344827587, "eval_loss": 0.9289452433586121, "eval_runtime": 135.6119, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 940 }, { "epoch": 3.62, "learning_rate": 0.00015192848020434226, "loss": 0.8331, "step": 945 }, { "epoch": 3.62, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.1076228618621826, "eval_runtime": 135.5696, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 945 }, { "epoch": 3.64, "learning_rate": 0.00015167305236270755, "loss": 0.8512, "step": 950 }, { "epoch": 3.64, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.8155642747879028, "eval_runtime": 131.7614, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 950 }, { "epoch": 3.66, "learning_rate": 0.00015141762452107279, "loss": 0.8797, "step": 955 }, { "epoch": 3.66, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0940409898757935, "eval_runtime": 132.2455, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.166, "step": 955 }, { "epoch": 3.68, "learning_rate": 0.00015116219667943808, "loss": 0.914, "step": 960 }, { "epoch": 3.68, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.8316068053245544, "eval_runtime": 131.4826, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 960 }, { "epoch": 3.7, "learning_rate": 0.0001509067688378033, "loss": 0.6969, "step": 965 }, { "epoch": 3.7, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.8965338468551636, "eval_runtime": 131.5912, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 965 }, { "epoch": 3.72, "learning_rate": 0.0001506513409961686, "loss": 0.7973, "step": 970 }, { "epoch": 3.72, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.9823299646377563, "eval_runtime": 131.6359, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 970 }, { "epoch": 3.74, "learning_rate": 0.00015039591315453384, "loss": 0.8665, "step": 975 }, { "epoch": 3.74, "eval_accuracy": 0.632183908045977, "eval_loss": 0.9927699565887451, "eval_runtime": 131.5757, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 975 }, { "epoch": 3.75, "learning_rate": 0.0001501404853128991, "loss": 0.8057, "step": 980 }, { "epoch": 3.75, "eval_accuracy": 0.6149425287356322, "eval_loss": 0.9669252634048462, "eval_runtime": 131.6483, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 980 }, { "epoch": 3.77, "learning_rate": 0.00014988505747126437, "loss": 0.8764, "step": 985 }, { "epoch": 3.77, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.1127873659133911, "eval_runtime": 131.6238, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 985 }, { "epoch": 3.79, "learning_rate": 0.00014962962962962963, "loss": 0.9281, "step": 990 }, { "epoch": 3.79, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9737154245376587, "eval_runtime": 131.6838, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 990 }, { "epoch": 3.81, "learning_rate": 0.0001493742017879949, "loss": 0.7301, "step": 995 }, { "epoch": 3.81, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9511990547180176, "eval_runtime": 131.6945, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 995 }, { "epoch": 3.83, "learning_rate": 0.00014911877394636016, "loss": 0.5308, "step": 1000 }, { "epoch": 3.83, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.1504030227661133, "eval_runtime": 131.6388, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1000 }, { "epoch": 3.85, "learning_rate": 0.00014886334610472542, "loss": 0.6637, "step": 1005 }, { "epoch": 3.85, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.2047678232192993, "eval_runtime": 131.9226, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.167, "step": 1005 }, { "epoch": 3.87, "learning_rate": 0.00014860791826309068, "loss": 0.6633, "step": 1010 }, { "epoch": 3.87, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.1038755178451538, "eval_runtime": 131.5702, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1010 }, { "epoch": 3.89, "learning_rate": 0.00014835249042145595, "loss": 1.0671, "step": 1015 }, { "epoch": 3.89, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.1816108226776123, "eval_runtime": 131.3997, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 1015 }, { "epoch": 3.91, "learning_rate": 0.0001480970625798212, "loss": 0.7547, "step": 1020 }, { "epoch": 3.91, "eval_accuracy": 0.6264367816091954, "eval_loss": 0.9733718633651733, "eval_runtime": 131.6592, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1020 }, { "epoch": 3.93, "learning_rate": 0.00014784163473818647, "loss": 1.3147, "step": 1025 }, { "epoch": 3.93, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.8259694576263428, "eval_runtime": 132.7872, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 1025 }, { "epoch": 3.95, "learning_rate": 0.00014758620689655174, "loss": 1.0075, "step": 1030 }, { "epoch": 3.95, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.8831397891044617, "eval_runtime": 131.4769, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1030 }, { "epoch": 3.97, "learning_rate": 0.000147330779054917, "loss": 0.5925, "step": 1035 }, { "epoch": 3.97, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.0289727449417114, "eval_runtime": 131.759, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1035 }, { "epoch": 3.98, "learning_rate": 0.00014707535121328226, "loss": 0.8289, "step": 1040 }, { "epoch": 3.98, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.9349720478057861, "eval_runtime": 133.5695, "eval_samples_per_second": 1.303, "eval_steps_per_second": 0.165, "step": 1040 }, { "epoch": 4.0, "learning_rate": 0.00014681992337164753, "loss": 1.0505, "step": 1045 }, { "epoch": 4.0, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9338142275810242, "eval_runtime": 135.7737, "eval_samples_per_second": 1.282, "eval_steps_per_second": 0.162, "step": 1045 }, { "epoch": 4.02, "learning_rate": 0.00014656449553001276, "loss": 0.329, "step": 1050 }, { "epoch": 4.02, "eval_accuracy": 0.6494252873563219, "eval_loss": 0.9524909853935242, "eval_runtime": 134.3827, "eval_samples_per_second": 1.295, "eval_steps_per_second": 0.164, "step": 1050 }, { "epoch": 4.04, "learning_rate": 0.00014630906768837805, "loss": 0.3039, "step": 1055 }, { "epoch": 4.04, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.0120184421539307, "eval_runtime": 133.1888, "eval_samples_per_second": 1.306, "eval_steps_per_second": 0.165, "step": 1055 }, { "epoch": 4.06, "learning_rate": 0.0001460536398467433, "loss": 0.5585, "step": 1060 }, { "epoch": 4.06, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.1452168226242065, "eval_runtime": 132.8053, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 1060 }, { "epoch": 4.08, "learning_rate": 0.00014579821200510858, "loss": 0.6825, "step": 1065 }, { "epoch": 4.08, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.0479168891906738, "eval_runtime": 137.784, "eval_samples_per_second": 1.263, "eval_steps_per_second": 0.16, "step": 1065 }, { "epoch": 4.1, "learning_rate": 0.00014554278416347382, "loss": 0.5304, "step": 1070 }, { "epoch": 4.1, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.1510097980499268, "eval_runtime": 133.2682, "eval_samples_per_second": 1.306, "eval_steps_per_second": 0.165, "step": 1070 }, { "epoch": 4.12, "learning_rate": 0.0001452873563218391, "loss": 1.0301, "step": 1075 }, { "epoch": 4.12, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.9221396446228027, "eval_runtime": 134.0831, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 1075 }, { "epoch": 4.14, "learning_rate": 0.00014503192848020434, "loss": 0.3273, "step": 1080 }, { "epoch": 4.14, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9179468154907227, "eval_runtime": 136.8309, "eval_samples_per_second": 1.272, "eval_steps_per_second": 0.161, "step": 1080 }, { "epoch": 4.16, "learning_rate": 0.0001447765006385696, "loss": 0.7577, "step": 1085 }, { "epoch": 4.16, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.1157200336456299, "eval_runtime": 132.9971, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 1085 }, { "epoch": 4.18, "learning_rate": 0.00014452107279693487, "loss": 1.0743, "step": 1090 }, { "epoch": 4.18, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9640018939971924, "eval_runtime": 133.1707, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 1090 }, { "epoch": 4.2, "learning_rate": 0.00014426564495530013, "loss": 0.7973, "step": 1095 }, { "epoch": 4.2, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0484741926193237, "eval_runtime": 133.7107, "eval_samples_per_second": 1.301, "eval_steps_per_second": 0.165, "step": 1095 }, { "epoch": 4.21, "learning_rate": 0.0001440102171136654, "loss": 0.5881, "step": 1100 }, { "epoch": 4.21, "eval_accuracy": 0.6379310344827587, "eval_loss": 0.947981059551239, "eval_runtime": 136.1859, "eval_samples_per_second": 1.278, "eval_steps_per_second": 0.162, "step": 1100 }, { "epoch": 4.23, "learning_rate": 0.00014375478927203066, "loss": 0.5461, "step": 1105 }, { "epoch": 4.23, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.017594814300537, "eval_runtime": 133.6971, "eval_samples_per_second": 1.301, "eval_steps_per_second": 0.165, "step": 1105 }, { "epoch": 4.25, "learning_rate": 0.00014349936143039592, "loss": 0.997, "step": 1110 }, { "epoch": 4.25, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9593090415000916, "eval_runtime": 133.0295, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 1110 }, { "epoch": 4.27, "learning_rate": 0.00014324393358876119, "loss": 0.7955, "step": 1115 }, { "epoch": 4.27, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.090934157371521, "eval_runtime": 133.0528, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 1115 }, { "epoch": 4.29, "learning_rate": 0.00014298850574712642, "loss": 0.6282, "step": 1120 }, { "epoch": 4.29, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.9476028680801392, "eval_runtime": 136.0334, "eval_samples_per_second": 1.279, "eval_steps_per_second": 0.162, "step": 1120 }, { "epoch": 4.31, "learning_rate": 0.0001427330779054917, "loss": 0.4928, "step": 1125 }, { "epoch": 4.31, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.8954192399978638, "eval_runtime": 135.9877, "eval_samples_per_second": 1.28, "eval_steps_per_second": 0.162, "step": 1125 }, { "epoch": 4.33, "learning_rate": 0.00014247765006385695, "loss": 0.6038, "step": 1130 }, { "epoch": 4.33, "eval_accuracy": 0.632183908045977, "eval_loss": 1.013425588607788, "eval_runtime": 135.7265, "eval_samples_per_second": 1.282, "eval_steps_per_second": 0.162, "step": 1130 }, { "epoch": 4.35, "learning_rate": 0.00014222222222222224, "loss": 0.5476, "step": 1135 }, { "epoch": 4.35, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.1263020038604736, "eval_runtime": 131.5323, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1135 }, { "epoch": 4.37, "learning_rate": 0.00014196679438058748, "loss": 0.6955, "step": 1140 }, { "epoch": 4.37, "eval_accuracy": 0.6494252873563219, "eval_loss": 0.9447872042655945, "eval_runtime": 131.7146, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1140 }, { "epoch": 4.39, "learning_rate": 0.00014171136653895277, "loss": 0.5385, "step": 1145 }, { "epoch": 4.39, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9447667002677917, "eval_runtime": 131.6399, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1145 }, { "epoch": 4.41, "learning_rate": 0.000141455938697318, "loss": 0.4345, "step": 1150 }, { "epoch": 4.41, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.9276512265205383, "eval_runtime": 131.3205, "eval_samples_per_second": 1.325, "eval_steps_per_second": 0.168, "step": 1150 }, { "epoch": 4.43, "learning_rate": 0.00014120051085568327, "loss": 0.7115, "step": 1155 }, { "epoch": 4.43, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.017700433731079, "eval_runtime": 131.4128, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 1155 }, { "epoch": 4.44, "learning_rate": 0.00014094508301404853, "loss": 0.6605, "step": 1160 }, { "epoch": 4.44, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.289931058883667, "eval_runtime": 131.6646, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1160 }, { "epoch": 4.46, "learning_rate": 0.0001406896551724138, "loss": 0.7257, "step": 1165 }, { "epoch": 4.46, "eval_accuracy": 0.603448275862069, "eval_loss": 1.2731506824493408, "eval_runtime": 131.6448, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1165 }, { "epoch": 4.48, "learning_rate": 0.00014043422733077906, "loss": 0.8842, "step": 1170 }, { "epoch": 4.48, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.1442424058914185, "eval_runtime": 131.62, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1170 }, { "epoch": 4.5, "learning_rate": 0.00014017879948914432, "loss": 0.7097, "step": 1175 }, { "epoch": 4.5, "eval_accuracy": 0.6494252873563219, "eval_loss": 0.9837198257446289, "eval_runtime": 131.6089, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1175 }, { "epoch": 4.52, "learning_rate": 0.00013992337164750958, "loss": 0.6335, "step": 1180 }, { "epoch": 4.52, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9646813273429871, "eval_runtime": 134.7732, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 1180 }, { "epoch": 4.54, "learning_rate": 0.00013966794380587485, "loss": 0.7804, "step": 1185 }, { "epoch": 4.54, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.8581375479698181, "eval_runtime": 134.6733, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 1185 }, { "epoch": 4.56, "learning_rate": 0.0001394125159642401, "loss": 0.5464, "step": 1190 }, { "epoch": 4.56, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.8662963509559631, "eval_runtime": 131.6239, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1190 }, { "epoch": 4.58, "learning_rate": 0.00013915708812260537, "loss": 0.2931, "step": 1195 }, { "epoch": 4.58, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.0258806943893433, "eval_runtime": 131.4426, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 1195 }, { "epoch": 4.6, "learning_rate": 0.00013890166028097064, "loss": 0.5038, "step": 1200 }, { "epoch": 4.6, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.886447548866272, "eval_runtime": 134.426, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 1200 }, { "epoch": 4.62, "learning_rate": 0.0001386462324393359, "loss": 1.0206, "step": 1205 }, { "epoch": 4.62, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.2909624576568604, "eval_runtime": 134.5453, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.164, "step": 1205 }, { "epoch": 4.64, "learning_rate": 0.00013839080459770116, "loss": 0.5508, "step": 1210 }, { "epoch": 4.64, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.152716040611267, "eval_runtime": 131.5963, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1210 }, { "epoch": 4.66, "learning_rate": 0.00013813537675606643, "loss": 1.2308, "step": 1215 }, { "epoch": 4.66, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.0279332399368286, "eval_runtime": 135.4076, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 1215 }, { "epoch": 4.67, "learning_rate": 0.0001378799489144317, "loss": 0.388, "step": 1220 }, { "epoch": 4.67, "eval_accuracy": 0.632183908045977, "eval_loss": 0.9292969703674316, "eval_runtime": 135.6227, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 1220 }, { "epoch": 4.69, "learning_rate": 0.00013762452107279695, "loss": 0.4747, "step": 1225 }, { "epoch": 4.69, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.0777442455291748, "eval_runtime": 131.6533, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1225 }, { "epoch": 4.71, "learning_rate": 0.00013736909323116222, "loss": 1.0655, "step": 1230 }, { "epoch": 4.71, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.9733009934425354, "eval_runtime": 131.6904, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1230 }, { "epoch": 4.73, "learning_rate": 0.00013711366538952745, "loss": 0.7551, "step": 1235 }, { "epoch": 4.73, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8783059120178223, "eval_runtime": 131.5129, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1235 }, { "epoch": 4.75, "learning_rate": 0.00013685823754789274, "loss": 0.5262, "step": 1240 }, { "epoch": 4.75, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.9284844994544983, "eval_runtime": 131.5523, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1240 }, { "epoch": 4.77, "learning_rate": 0.00013660280970625798, "loss": 0.6098, "step": 1245 }, { "epoch": 4.77, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.841589629650116, "eval_runtime": 131.461, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 1245 }, { "epoch": 4.79, "learning_rate": 0.00013634738186462327, "loss": 0.8836, "step": 1250 }, { "epoch": 4.79, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8214066624641418, "eval_runtime": 131.6847, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1250 }, { "epoch": 4.81, "learning_rate": 0.0001360919540229885, "loss": 0.5507, "step": 1255 }, { "epoch": 4.81, "eval_accuracy": 0.735632183908046, "eval_loss": 0.7416496872901917, "eval_runtime": 131.6767, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1255 }, { "epoch": 4.83, "learning_rate": 0.0001358365261813538, "loss": 0.1952, "step": 1260 }, { "epoch": 4.83, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8377039432525635, "eval_runtime": 131.5954, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1260 }, { "epoch": 4.85, "learning_rate": 0.00013558109833971903, "loss": 0.7898, "step": 1265 }, { "epoch": 4.85, "eval_accuracy": 0.735632183908046, "eval_loss": 0.7278856039047241, "eval_runtime": 131.6799, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1265 }, { "epoch": 4.87, "learning_rate": 0.0001353256704980843, "loss": 0.7374, "step": 1270 }, { "epoch": 4.87, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9924662113189697, "eval_runtime": 131.6929, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1270 }, { "epoch": 4.89, "learning_rate": 0.00013507024265644956, "loss": 0.5315, "step": 1275 }, { "epoch": 4.89, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.9290440082550049, "eval_runtime": 131.6471, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1275 }, { "epoch": 4.9, "learning_rate": 0.00013481481481481482, "loss": 0.3167, "step": 1280 }, { "epoch": 4.9, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.7473086714744568, "eval_runtime": 131.7751, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1280 }, { "epoch": 4.92, "learning_rate": 0.00013455938697318009, "loss": 0.9122, "step": 1285 }, { "epoch": 4.92, "eval_accuracy": 0.7471264367816092, "eval_loss": 0.8101300001144409, "eval_runtime": 131.6511, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1285 }, { "epoch": 4.94, "learning_rate": 0.00013430395913154535, "loss": 0.5306, "step": 1290 }, { "epoch": 4.94, "eval_accuracy": 0.7183908045977011, "eval_loss": 0.7988009452819824, "eval_runtime": 131.5511, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1290 }, { "epoch": 4.96, "learning_rate": 0.0001340485312899106, "loss": 0.9036, "step": 1295 }, { "epoch": 4.96, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.9747323393821716, "eval_runtime": 131.6304, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1295 }, { "epoch": 4.98, "learning_rate": 0.00013379310344827588, "loss": 0.4449, "step": 1300 }, { "epoch": 4.98, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.7531276345252991, "eval_runtime": 131.6404, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1300 }, { "epoch": 5.0, "learning_rate": 0.0001335376756066411, "loss": 0.761, "step": 1305 }, { "epoch": 5.0, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.8166816234588623, "eval_runtime": 131.8052, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1305 }, { "epoch": 5.02, "learning_rate": 0.0001332822477650064, "loss": 0.5163, "step": 1310 }, { "epoch": 5.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.006870985031128, "eval_runtime": 131.6867, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1310 }, { "epoch": 5.04, "learning_rate": 0.00013302681992337164, "loss": 0.2701, "step": 1315 }, { "epoch": 5.04, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.8416351675987244, "eval_runtime": 131.8363, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1315 }, { "epoch": 5.06, "learning_rate": 0.00013277139208173693, "loss": 0.1513, "step": 1320 }, { "epoch": 5.06, "eval_accuracy": 0.7758620689655172, "eval_loss": 0.6530519723892212, "eval_runtime": 134.8529, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 1320 }, { "epoch": 5.08, "learning_rate": 0.00013251596424010217, "loss": 0.3641, "step": 1325 }, { "epoch": 5.08, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.8022194504737854, "eval_runtime": 131.7261, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1325 }, { "epoch": 5.1, "learning_rate": 0.00013226053639846746, "loss": 0.6804, "step": 1330 }, { "epoch": 5.1, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.9907580614089966, "eval_runtime": 131.5946, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1330 }, { "epoch": 5.11, "learning_rate": 0.0001320051085568327, "loss": 0.9176, "step": 1335 }, { "epoch": 5.11, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8896319270133972, "eval_runtime": 131.5913, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1335 }, { "epoch": 5.13, "learning_rate": 0.00013174968071519796, "loss": 0.4943, "step": 1340 }, { "epoch": 5.13, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8112825155258179, "eval_runtime": 132.8289, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 1340 }, { "epoch": 5.15, "learning_rate": 0.00013149425287356322, "loss": 0.4788, "step": 1345 }, { "epoch": 5.15, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.9182735681533813, "eval_runtime": 131.6068, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1345 }, { "epoch": 5.17, "learning_rate": 0.00013123882503192848, "loss": 0.2654, "step": 1350 }, { "epoch": 5.17, "eval_accuracy": 0.7183908045977011, "eval_loss": 0.8970803618431091, "eval_runtime": 131.7165, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1350 }, { "epoch": 5.19, "learning_rate": 0.00013098339719029375, "loss": 0.5567, "step": 1355 }, { "epoch": 5.19, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.0155576467514038, "eval_runtime": 131.4848, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1355 }, { "epoch": 5.21, "learning_rate": 0.000130727969348659, "loss": 0.5702, "step": 1360 }, { "epoch": 5.21, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8736016154289246, "eval_runtime": 134.812, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 1360 }, { "epoch": 5.23, "learning_rate": 0.00013047254150702427, "loss": 0.7055, "step": 1365 }, { "epoch": 5.23, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.0454132556915283, "eval_runtime": 131.6517, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1365 }, { "epoch": 5.25, "learning_rate": 0.00013021711366538954, "loss": 0.5203, "step": 1370 }, { "epoch": 5.25, "eval_accuracy": 0.632183908045977, "eval_loss": 1.1284202337265015, "eval_runtime": 131.6682, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1370 }, { "epoch": 5.27, "learning_rate": 0.0001299616858237548, "loss": 0.512, "step": 1375 }, { "epoch": 5.27, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0251713991165161, "eval_runtime": 134.5714, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 1375 }, { "epoch": 5.29, "learning_rate": 0.00012970625798212006, "loss": 0.8081, "step": 1380 }, { "epoch": 5.29, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.8887814283370972, "eval_runtime": 131.5304, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1380 }, { "epoch": 5.31, "learning_rate": 0.00012945083014048533, "loss": 0.3513, "step": 1385 }, { "epoch": 5.31, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.7672654986381531, "eval_runtime": 131.5428, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1385 }, { "epoch": 5.33, "learning_rate": 0.0001291954022988506, "loss": 0.3606, "step": 1390 }, { "epoch": 5.33, "eval_accuracy": 0.7413793103448276, "eval_loss": 0.7865743637084961, "eval_runtime": 131.6146, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1390 }, { "epoch": 5.34, "learning_rate": 0.00012893997445721583, "loss": 0.539, "step": 1395 }, { "epoch": 5.34, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.8202821612358093, "eval_runtime": 131.7497, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1395 }, { "epoch": 5.36, "learning_rate": 0.00012868454661558112, "loss": 0.5642, "step": 1400 }, { "epoch": 5.36, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.1715248823165894, "eval_runtime": 131.6954, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1400 }, { "epoch": 5.38, "learning_rate": 0.00012842911877394635, "loss": 0.5947, "step": 1405 }, { "epoch": 5.38, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9037488102912903, "eval_runtime": 134.8795, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 1405 }, { "epoch": 5.4, "learning_rate": 0.00012817369093231162, "loss": 0.4159, "step": 1410 }, { "epoch": 5.4, "eval_accuracy": 0.735632183908046, "eval_loss": 0.8044033050537109, "eval_runtime": 131.6438, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1410 }, { "epoch": 5.42, "learning_rate": 0.00012791826309067688, "loss": 0.537, "step": 1415 }, { "epoch": 5.42, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.9017471075057983, "eval_runtime": 131.673, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1415 }, { "epoch": 5.44, "learning_rate": 0.00012766283524904214, "loss": 0.3371, "step": 1420 }, { "epoch": 5.44, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.0497161149978638, "eval_runtime": 131.6194, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1420 }, { "epoch": 5.46, "learning_rate": 0.0001274074074074074, "loss": 0.5586, "step": 1425 }, { "epoch": 5.46, "eval_accuracy": 0.735632183908046, "eval_loss": 0.8958852291107178, "eval_runtime": 131.522, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1425 }, { "epoch": 5.48, "learning_rate": 0.00012715197956577267, "loss": 0.7869, "step": 1430 }, { "epoch": 5.48, "eval_accuracy": 0.7241379310344828, "eval_loss": 0.8901216387748718, "eval_runtime": 131.4981, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1430 }, { "epoch": 5.5, "learning_rate": 0.00012689655172413793, "loss": 0.2459, "step": 1435 }, { "epoch": 5.5, "eval_accuracy": 0.7413793103448276, "eval_loss": 0.7955420613288879, "eval_runtime": 131.5057, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1435 }, { "epoch": 5.52, "learning_rate": 0.0001266411238825032, "loss": 0.0927, "step": 1440 }, { "epoch": 5.52, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.9924725294113159, "eval_runtime": 131.6887, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1440 }, { "epoch": 5.54, "learning_rate": 0.00012638569604086846, "loss": 0.4005, "step": 1445 }, { "epoch": 5.54, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.1416829824447632, "eval_runtime": 131.6825, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1445 }, { "epoch": 5.56, "learning_rate": 0.00012613026819923372, "loss": 0.4054, "step": 1450 }, { "epoch": 5.56, "eval_accuracy": 0.7183908045977011, "eval_loss": 0.9588910937309265, "eval_runtime": 131.6228, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1450 }, { "epoch": 5.57, "learning_rate": 0.00012587484035759899, "loss": 0.5116, "step": 1455 }, { "epoch": 5.57, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.0598994493484497, "eval_runtime": 131.7436, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1455 }, { "epoch": 5.59, "learning_rate": 0.00012561941251596425, "loss": 0.257, "step": 1460 }, { "epoch": 5.59, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.162794828414917, "eval_runtime": 131.7132, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1460 }, { "epoch": 5.61, "learning_rate": 0.0001253639846743295, "loss": 0.2287, "step": 1465 }, { "epoch": 5.61, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.2925313711166382, "eval_runtime": 131.7156, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1465 }, { "epoch": 5.63, "learning_rate": 0.00012510855683269478, "loss": 0.8024, "step": 1470 }, { "epoch": 5.63, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.0764447450637817, "eval_runtime": 134.9292, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 1470 }, { "epoch": 5.65, "learning_rate": 0.00012485312899106004, "loss": 0.4949, "step": 1475 }, { "epoch": 5.65, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.1142551898956299, "eval_runtime": 131.5409, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1475 }, { "epoch": 5.67, "learning_rate": 0.00012459770114942528, "loss": 0.4655, "step": 1480 }, { "epoch": 5.67, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.353607177734375, "eval_runtime": 131.5492, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1480 }, { "epoch": 5.69, "learning_rate": 0.00012434227330779057, "loss": 0.7536, "step": 1485 }, { "epoch": 5.69, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.0325322151184082, "eval_runtime": 134.5218, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.164, "step": 1485 }, { "epoch": 5.71, "learning_rate": 0.0001240868454661558, "loss": 0.326, "step": 1490 }, { "epoch": 5.71, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.112859845161438, "eval_runtime": 131.7407, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1490 }, { "epoch": 5.73, "learning_rate": 0.0001238314176245211, "loss": 0.5502, "step": 1495 }, { "epoch": 5.73, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.3975075483322144, "eval_runtime": 131.6069, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1495 }, { "epoch": 5.75, "learning_rate": 0.00012357598978288633, "loss": 0.7814, "step": 1500 }, { "epoch": 5.75, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.479273796081543, "eval_runtime": 132.0973, "eval_samples_per_second": 1.317, "eval_steps_per_second": 0.167, "step": 1500 }, { "epoch": 5.77, "learning_rate": 0.00012332056194125162, "loss": 0.8521, "step": 1505 }, { "epoch": 5.77, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.2535182237625122, "eval_runtime": 131.6457, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1505 }, { "epoch": 5.79, "learning_rate": 0.00012306513409961686, "loss": 0.452, "step": 1510 }, { "epoch": 5.79, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.3425843715667725, "eval_runtime": 135.2782, "eval_samples_per_second": 1.286, "eval_steps_per_second": 0.163, "step": 1510 }, { "epoch": 5.8, "learning_rate": 0.00012280970625798212, "loss": 1.1244, "step": 1515 }, { "epoch": 5.8, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.1074714660644531, "eval_runtime": 131.544, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1515 }, { "epoch": 5.82, "learning_rate": 0.00012255427841634738, "loss": 0.5211, "step": 1520 }, { "epoch": 5.82, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0719375610351562, "eval_runtime": 131.5325, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1520 }, { "epoch": 5.84, "learning_rate": 0.00012229885057471265, "loss": 0.4944, "step": 1525 }, { "epoch": 5.84, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.1987359523773193, "eval_runtime": 131.5625, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1525 }, { "epoch": 5.86, "learning_rate": 0.00012204342273307792, "loss": 0.619, "step": 1530 }, { "epoch": 5.86, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.0625687837600708, "eval_runtime": 131.5921, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1530 }, { "epoch": 5.88, "learning_rate": 0.00012178799489144317, "loss": 0.3932, "step": 1535 }, { "epoch": 5.88, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.371453881263733, "eval_runtime": 134.6534, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 1535 }, { "epoch": 5.9, "learning_rate": 0.00012153256704980845, "loss": 1.001, "step": 1540 }, { "epoch": 5.9, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.1620148420333862, "eval_runtime": 131.6762, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1540 }, { "epoch": 5.92, "learning_rate": 0.0001212771392081737, "loss": 0.4258, "step": 1545 }, { "epoch": 5.92, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.0935604572296143, "eval_runtime": 131.6743, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1545 }, { "epoch": 5.94, "learning_rate": 0.00012102171136653895, "loss": 0.6611, "step": 1550 }, { "epoch": 5.94, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.1292699575424194, "eval_runtime": 134.8793, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 1550 }, { "epoch": 5.96, "learning_rate": 0.00012076628352490423, "loss": 0.6265, "step": 1555 }, { "epoch": 5.96, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.937195360660553, "eval_runtime": 131.5531, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1555 }, { "epoch": 5.98, "learning_rate": 0.00012051085568326948, "loss": 0.5634, "step": 1560 }, { "epoch": 5.98, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8594533205032349, "eval_runtime": 131.654, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1560 }, { "epoch": 6.0, "learning_rate": 0.00012025542784163475, "loss": 0.3986, "step": 1565 }, { "epoch": 6.0, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.9495770931243896, "eval_runtime": 131.7261, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1565 }, { "epoch": 6.02, "learning_rate": 0.00012, "loss": 0.2532, "step": 1570 }, { "epoch": 6.02, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.0734517574310303, "eval_runtime": 131.5139, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1570 }, { "epoch": 6.03, "learning_rate": 0.00011974457215836528, "loss": 0.1247, "step": 1575 }, { "epoch": 6.03, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.0141799449920654, "eval_runtime": 131.6291, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1575 }, { "epoch": 6.05, "learning_rate": 0.00011948914431673053, "loss": 0.2655, "step": 1580 }, { "epoch": 6.05, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.9764288663864136, "eval_runtime": 131.78, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1580 }, { "epoch": 6.07, "learning_rate": 0.00011923371647509578, "loss": 0.5073, "step": 1585 }, { "epoch": 6.07, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.0659440755844116, "eval_runtime": 131.6962, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1585 }, { "epoch": 6.09, "learning_rate": 0.00011897828863346106, "loss": 0.5426, "step": 1590 }, { "epoch": 6.09, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.1733648777008057, "eval_runtime": 131.6005, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1590 }, { "epoch": 6.11, "learning_rate": 0.0001187228607918263, "loss": 0.3363, "step": 1595 }, { "epoch": 6.11, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0996015071868896, "eval_runtime": 131.7781, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1595 }, { "epoch": 6.13, "learning_rate": 0.00011846743295019158, "loss": 0.1399, "step": 1600 }, { "epoch": 6.13, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.1617628335952759, "eval_runtime": 131.6546, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1600 }, { "epoch": 6.15, "learning_rate": 0.00011821200510855683, "loss": 0.1777, "step": 1605 }, { "epoch": 6.15, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.0963499546051025, "eval_runtime": 131.6556, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1605 }, { "epoch": 6.17, "learning_rate": 0.00011795657726692211, "loss": 0.2072, "step": 1610 }, { "epoch": 6.17, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.0916483402252197, "eval_runtime": 131.5815, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1610 }, { "epoch": 6.19, "learning_rate": 0.00011770114942528736, "loss": 0.3631, "step": 1615 }, { "epoch": 6.19, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.227651596069336, "eval_runtime": 135.4252, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 1615 }, { "epoch": 6.21, "learning_rate": 0.00011744572158365264, "loss": 0.2738, "step": 1620 }, { "epoch": 6.21, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.0606082677841187, "eval_runtime": 131.4727, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1620 }, { "epoch": 6.23, "learning_rate": 0.00011719029374201789, "loss": 0.2884, "step": 1625 }, { "epoch": 6.23, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.3723126649856567, "eval_runtime": 131.7429, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1625 }, { "epoch": 6.25, "learning_rate": 0.00011693486590038314, "loss": 0.5021, "step": 1630 }, { "epoch": 6.25, "eval_accuracy": 0.5977011494252874, "eval_loss": 1.417180061340332, "eval_runtime": 132.2401, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.166, "step": 1630 }, { "epoch": 6.26, "learning_rate": 0.00011667943805874841, "loss": 0.1844, "step": 1635 }, { "epoch": 6.26, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.007380723953247, "eval_runtime": 135.4247, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 1635 }, { "epoch": 6.28, "learning_rate": 0.00011642401021711366, "loss": 0.5767, "step": 1640 }, { "epoch": 6.28, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.0852534770965576, "eval_runtime": 131.63, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1640 }, { "epoch": 6.3, "learning_rate": 0.00011616858237547894, "loss": 0.5302, "step": 1645 }, { "epoch": 6.3, "eval_accuracy": 0.735632183908046, "eval_loss": 0.877869188785553, "eval_runtime": 131.6419, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1645 }, { "epoch": 6.32, "learning_rate": 0.00011591315453384419, "loss": 0.3801, "step": 1650 }, { "epoch": 6.32, "eval_accuracy": 0.7241379310344828, "eval_loss": 0.822873592376709, "eval_runtime": 134.6907, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 1650 }, { "epoch": 6.34, "learning_rate": 0.00011565772669220947, "loss": 0.3961, "step": 1655 }, { "epoch": 6.34, "eval_accuracy": 0.7701149425287356, "eval_loss": 0.7614623308181763, "eval_runtime": 131.6409, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1655 }, { "epoch": 6.36, "learning_rate": 0.00011540229885057472, "loss": 0.2774, "step": 1660 }, { "epoch": 6.36, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8031173944473267, "eval_runtime": 131.543, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1660 }, { "epoch": 6.38, "learning_rate": 0.00011514687100893997, "loss": 0.4219, "step": 1665 }, { "epoch": 6.38, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.8798435926437378, "eval_runtime": 131.6434, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1665 }, { "epoch": 6.4, "learning_rate": 0.00011489144316730524, "loss": 0.4269, "step": 1670 }, { "epoch": 6.4, "eval_accuracy": 0.7758620689655172, "eval_loss": 0.7993461489677429, "eval_runtime": 131.6926, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1670 }, { "epoch": 6.42, "learning_rate": 0.00011463601532567049, "loss": 0.0621, "step": 1675 }, { "epoch": 6.42, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8598664999008179, "eval_runtime": 131.7999, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1675 }, { "epoch": 6.44, "learning_rate": 0.00011438058748403577, "loss": 0.1985, "step": 1680 }, { "epoch": 6.44, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.0499699115753174, "eval_runtime": 134.7139, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 1680 }, { "epoch": 6.46, "learning_rate": 0.00011412515964240102, "loss": 0.2481, "step": 1685 }, { "epoch": 6.46, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.2009177207946777, "eval_runtime": 134.8296, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 1685 }, { "epoch": 6.48, "learning_rate": 0.0001138697318007663, "loss": 0.3036, "step": 1690 }, { "epoch": 6.48, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.1416090726852417, "eval_runtime": 131.6305, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1690 }, { "epoch": 6.49, "learning_rate": 0.00011361430395913155, "loss": 0.4456, "step": 1695 }, { "epoch": 6.49, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.9773580431938171, "eval_runtime": 131.6527, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1695 }, { "epoch": 6.51, "learning_rate": 0.00011335887611749681, "loss": 0.1675, "step": 1700 }, { "epoch": 6.51, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.015581488609314, "eval_runtime": 133.994, "eval_samples_per_second": 1.299, "eval_steps_per_second": 0.164, "step": 1700 }, { "epoch": 6.53, "learning_rate": 0.00011310344827586207, "loss": 0.0375, "step": 1705 }, { "epoch": 6.53, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.1146481037139893, "eval_runtime": 131.6013, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1705 }, { "epoch": 6.55, "learning_rate": 0.00011284802043422734, "loss": 0.4738, "step": 1710 }, { "epoch": 6.55, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.221787691116333, "eval_runtime": 131.6201, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1710 }, { "epoch": 6.57, "learning_rate": 0.0001125925925925926, "loss": 0.2477, "step": 1715 }, { "epoch": 6.57, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.130721926689148, "eval_runtime": 131.564, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1715 }, { "epoch": 6.59, "learning_rate": 0.00011233716475095786, "loss": 0.4616, "step": 1720 }, { "epoch": 6.59, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.2159687280654907, "eval_runtime": 131.7356, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1720 }, { "epoch": 6.61, "learning_rate": 0.00011208173690932313, "loss": 0.2249, "step": 1725 }, { "epoch": 6.61, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.1174477338790894, "eval_runtime": 131.6794, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1725 }, { "epoch": 6.63, "learning_rate": 0.00011182630906768839, "loss": 0.3522, "step": 1730 }, { "epoch": 6.63, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.5797643661499023, "eval_runtime": 135.457, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 1730 }, { "epoch": 6.65, "learning_rate": 0.00011157088122605364, "loss": 1.0043, "step": 1735 }, { "epoch": 6.65, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.6363530158996582, "eval_runtime": 131.5665, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1735 }, { "epoch": 6.67, "learning_rate": 0.00011131545338441892, "loss": 0.6924, "step": 1740 }, { "epoch": 6.67, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.2205984592437744, "eval_runtime": 131.6305, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1740 }, { "epoch": 6.69, "learning_rate": 0.00011106002554278417, "loss": 0.4349, "step": 1745 }, { "epoch": 6.69, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.386996865272522, "eval_runtime": 131.7762, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1745 }, { "epoch": 6.7, "learning_rate": 0.00011080459770114944, "loss": 0.5771, "step": 1750 }, { "epoch": 6.7, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.1229805946350098, "eval_runtime": 131.6963, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1750 }, { "epoch": 6.72, "learning_rate": 0.00011054916985951469, "loss": 0.7905, "step": 1755 }, { "epoch": 6.72, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.8128155469894409, "eval_runtime": 131.306, "eval_samples_per_second": 1.325, "eval_steps_per_second": 0.168, "step": 1755 }, { "epoch": 6.74, "learning_rate": 0.00011029374201787997, "loss": 0.3866, "step": 1760 }, { "epoch": 6.74, "eval_accuracy": 0.735632183908046, "eval_loss": 0.9595608711242676, "eval_runtime": 131.4978, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1760 }, { "epoch": 6.76, "learning_rate": 0.00011003831417624522, "loss": 0.4893, "step": 1765 }, { "epoch": 6.76, "eval_accuracy": 0.735632183908046, "eval_loss": 0.9293403029441833, "eval_runtime": 131.571, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1765 }, { "epoch": 6.78, "learning_rate": 0.00010978288633461047, "loss": 0.0439, "step": 1770 }, { "epoch": 6.78, "eval_accuracy": 0.735632183908046, "eval_loss": 0.8708416223526001, "eval_runtime": 131.7352, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1770 }, { "epoch": 6.8, "learning_rate": 0.00010952745849297575, "loss": 0.1671, "step": 1775 }, { "epoch": 6.8, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.9392874836921692, "eval_runtime": 131.5959, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1775 }, { "epoch": 6.82, "learning_rate": 0.000109272030651341, "loss": 0.3718, "step": 1780 }, { "epoch": 6.82, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.0529624223709106, "eval_runtime": 131.7631, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1780 }, { "epoch": 6.84, "learning_rate": 0.00010901660280970627, "loss": 0.5083, "step": 1785 }, { "epoch": 6.84, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.0124198198318481, "eval_runtime": 131.6354, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1785 }, { "epoch": 6.86, "learning_rate": 0.00010876117496807152, "loss": 0.4628, "step": 1790 }, { "epoch": 6.86, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.1633274555206299, "eval_runtime": 135.5536, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.162, "step": 1790 }, { "epoch": 6.88, "learning_rate": 0.0001085057471264368, "loss": 0.3202, "step": 1795 }, { "epoch": 6.88, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.9682561159133911, "eval_runtime": 131.5899, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1795 }, { "epoch": 6.9, "learning_rate": 0.00010825031928480205, "loss": 0.1311, "step": 1800 }, { "epoch": 6.9, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.9268329739570618, "eval_runtime": 131.5503, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1800 }, { "epoch": 6.92, "learning_rate": 0.0001079948914431673, "loss": 0.4311, "step": 1805 }, { "epoch": 6.92, "eval_accuracy": 0.7471264367816092, "eval_loss": 0.889406144618988, "eval_runtime": 131.7168, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1805 }, { "epoch": 6.93, "learning_rate": 0.00010773946360153258, "loss": 0.4067, "step": 1810 }, { "epoch": 6.93, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.9293746948242188, "eval_runtime": 131.5506, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1810 }, { "epoch": 6.95, "learning_rate": 0.00010748403575989783, "loss": 0.1898, "step": 1815 }, { "epoch": 6.95, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.1521073579788208, "eval_runtime": 131.7195, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1815 }, { "epoch": 6.97, "learning_rate": 0.0001072286079182631, "loss": 0.695, "step": 1820 }, { "epoch": 6.97, "eval_accuracy": 0.735632183908046, "eval_loss": 0.9605730175971985, "eval_runtime": 131.5115, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1820 }, { "epoch": 6.99, "learning_rate": 0.00010697318007662835, "loss": 0.0965, "step": 1825 }, { "epoch": 6.99, "eval_accuracy": 0.735632183908046, "eval_loss": 1.0009174346923828, "eval_runtime": 131.6509, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1825 }, { "epoch": 7.01, "learning_rate": 0.00010671775223499363, "loss": 0.5734, "step": 1830 }, { "epoch": 7.01, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.9490659832954407, "eval_runtime": 131.7585, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1830 }, { "epoch": 7.03, "learning_rate": 0.00010646232439335888, "loss": 0.4251, "step": 1835 }, { "epoch": 7.03, "eval_accuracy": 0.7471264367816092, "eval_loss": 0.9266923666000366, "eval_runtime": 131.6985, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1835 }, { "epoch": 7.05, "learning_rate": 0.00010620689655172413, "loss": 0.027, "step": 1840 }, { "epoch": 7.05, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.8665022850036621, "eval_runtime": 131.6926, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1840 }, { "epoch": 7.07, "learning_rate": 0.0001059514687100894, "loss": 0.0263, "step": 1845 }, { "epoch": 7.07, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.9958654642105103, "eval_runtime": 131.7555, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1845 }, { "epoch": 7.09, "learning_rate": 0.00010569604086845466, "loss": 0.0401, "step": 1850 }, { "epoch": 7.09, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.1048107147216797, "eval_runtime": 131.5719, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1850 }, { "epoch": 7.11, "learning_rate": 0.00010544061302681993, "loss": 0.1228, "step": 1855 }, { "epoch": 7.11, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.3274551630020142, "eval_runtime": 131.6336, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 1855 }, { "epoch": 7.13, "learning_rate": 0.00010518518518518518, "loss": 0.0653, "step": 1860 }, { "epoch": 7.13, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.5582555532455444, "eval_runtime": 131.7409, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1860 }, { "epoch": 7.15, "learning_rate": 0.00010492975734355046, "loss": 0.3934, "step": 1865 }, { "epoch": 7.15, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.6236568689346313, "eval_runtime": 131.8161, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1865 }, { "epoch": 7.16, "learning_rate": 0.00010467432950191571, "loss": 0.0691, "step": 1870 }, { "epoch": 7.16, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.592093586921692, "eval_runtime": 131.5384, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1870 }, { "epoch": 7.18, "learning_rate": 0.00010441890166028096, "loss": 0.2809, "step": 1875 }, { "epoch": 7.18, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.475704312324524, "eval_runtime": 136.9559, "eval_samples_per_second": 1.27, "eval_steps_per_second": 0.161, "step": 1875 }, { "epoch": 7.2, "learning_rate": 0.00010416347381864624, "loss": 0.0979, "step": 1880 }, { "epoch": 7.2, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.3467644453048706, "eval_runtime": 135.9935, "eval_samples_per_second": 1.279, "eval_steps_per_second": 0.162, "step": 1880 }, { "epoch": 7.22, "learning_rate": 0.00010390804597701149, "loss": 0.1615, "step": 1885 }, { "epoch": 7.22, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.4109445810317993, "eval_runtime": 133.1481, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 1885 }, { "epoch": 7.24, "learning_rate": 0.00010365261813537676, "loss": 0.3491, "step": 1890 }, { "epoch": 7.24, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.2932188510894775, "eval_runtime": 132.6976, "eval_samples_per_second": 1.311, "eval_steps_per_second": 0.166, "step": 1890 }, { "epoch": 7.26, "learning_rate": 0.00010339719029374201, "loss": 0.0214, "step": 1895 }, { "epoch": 7.26, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.277075171470642, "eval_runtime": 136.2776, "eval_samples_per_second": 1.277, "eval_steps_per_second": 0.161, "step": 1895 }, { "epoch": 7.28, "learning_rate": 0.00010314176245210729, "loss": 0.2161, "step": 1900 }, { "epoch": 7.28, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.2838515043258667, "eval_runtime": 135.9251, "eval_samples_per_second": 1.28, "eval_steps_per_second": 0.162, "step": 1900 }, { "epoch": 7.3, "learning_rate": 0.00010288633461047254, "loss": 0.129, "step": 1905 }, { "epoch": 7.3, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.3630799055099487, "eval_runtime": 134.44, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 1905 }, { "epoch": 7.32, "learning_rate": 0.0001026309067688378, "loss": 0.174, "step": 1910 }, { "epoch": 7.32, "eval_accuracy": 0.735632183908046, "eval_loss": 1.1274833679199219, "eval_runtime": 132.7649, "eval_samples_per_second": 1.311, "eval_steps_per_second": 0.166, "step": 1910 }, { "epoch": 7.34, "learning_rate": 0.00010237547892720307, "loss": 0.0376, "step": 1915 }, { "epoch": 7.34, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.2306206226348877, "eval_runtime": 133.5924, "eval_samples_per_second": 1.302, "eval_steps_per_second": 0.165, "step": 1915 }, { "epoch": 7.36, "learning_rate": 0.00010212005108556833, "loss": 0.7968, "step": 1920 }, { "epoch": 7.36, "eval_accuracy": 0.764367816091954, "eval_loss": 1.3550326824188232, "eval_runtime": 133.8914, "eval_samples_per_second": 1.3, "eval_steps_per_second": 0.164, "step": 1920 }, { "epoch": 7.38, "learning_rate": 0.00010186462324393359, "loss": 0.1575, "step": 1925 }, { "epoch": 7.38, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.4989960193634033, "eval_runtime": 132.9481, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.165, "step": 1925 }, { "epoch": 7.39, "learning_rate": 0.00010160919540229886, "loss": 0.1735, "step": 1930 }, { "epoch": 7.39, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.6739168167114258, "eval_runtime": 133.8236, "eval_samples_per_second": 1.3, "eval_steps_per_second": 0.164, "step": 1930 }, { "epoch": 7.41, "learning_rate": 0.00010135376756066412, "loss": 0.4192, "step": 1935 }, { "epoch": 7.41, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.6104061603546143, "eval_runtime": 133.0367, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 1935 }, { "epoch": 7.43, "learning_rate": 0.00010109833971902938, "loss": 0.3096, "step": 1940 }, { "epoch": 7.43, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.521593689918518, "eval_runtime": 132.8765, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.166, "step": 1940 }, { "epoch": 7.45, "learning_rate": 0.00010084291187739463, "loss": 0.3327, "step": 1945 }, { "epoch": 7.45, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.6830250024795532, "eval_runtime": 133.0706, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 1945 }, { "epoch": 7.47, "learning_rate": 0.00010058748403575991, "loss": 0.276, "step": 1950 }, { "epoch": 7.47, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.4198530912399292, "eval_runtime": 133.3314, "eval_samples_per_second": 1.305, "eval_steps_per_second": 0.165, "step": 1950 }, { "epoch": 7.49, "learning_rate": 0.00010033205619412516, "loss": 0.0554, "step": 1955 }, { "epoch": 7.49, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.2515894174575806, "eval_runtime": 132.6084, "eval_samples_per_second": 1.312, "eval_steps_per_second": 0.166, "step": 1955 }, { "epoch": 7.51, "learning_rate": 0.00010007662835249044, "loss": 0.2187, "step": 1960 }, { "epoch": 7.51, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.191728115081787, "eval_runtime": 133.6638, "eval_samples_per_second": 1.302, "eval_steps_per_second": 0.165, "step": 1960 }, { "epoch": 7.53, "learning_rate": 9.982120051085569e-05, "loss": 0.2532, "step": 1965 }, { "epoch": 7.53, "eval_accuracy": 0.735632183908046, "eval_loss": 1.1683300733566284, "eval_runtime": 131.8102, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 1965 }, { "epoch": 7.55, "learning_rate": 9.956577266922095e-05, "loss": 0.2872, "step": 1970 }, { "epoch": 7.55, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.311906337738037, "eval_runtime": 131.5674, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1970 }, { "epoch": 7.57, "learning_rate": 9.931034482758621e-05, "loss": 0.1039, "step": 1975 }, { "epoch": 7.57, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.475795030593872, "eval_runtime": 131.7442, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1975 }, { "epoch": 7.59, "learning_rate": 9.905491698595148e-05, "loss": 0.3423, "step": 1980 }, { "epoch": 7.59, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.3955832719802856, "eval_runtime": 131.4992, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1980 }, { "epoch": 7.61, "learning_rate": 9.879948914431674e-05, "loss": 0.0705, "step": 1985 }, { "epoch": 7.61, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.6385270357131958, "eval_runtime": 131.4457, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 1985 }, { "epoch": 7.62, "learning_rate": 9.8544061302682e-05, "loss": 0.0537, "step": 1990 }, { "epoch": 7.62, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.8518561124801636, "eval_runtime": 131.5342, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 1990 }, { "epoch": 7.64, "learning_rate": 9.828863346104727e-05, "loss": 0.3629, "step": 1995 }, { "epoch": 7.64, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.7972184419631958, "eval_runtime": 131.7087, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 1995 }, { "epoch": 7.66, "learning_rate": 9.803320561941252e-05, "loss": 0.7452, "step": 2000 }, { "epoch": 7.66, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.5261802673339844, "eval_runtime": 131.6943, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2000 }, { "epoch": 7.68, "learning_rate": 9.777777777777778e-05, "loss": 0.4275, "step": 2005 }, { "epoch": 7.68, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.3043127059936523, "eval_runtime": 131.6861, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2005 }, { "epoch": 7.7, "learning_rate": 9.752234993614304e-05, "loss": 0.1486, "step": 2010 }, { "epoch": 7.7, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.1594352722167969, "eval_runtime": 131.7597, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2010 }, { "epoch": 7.72, "learning_rate": 9.72669220945083e-05, "loss": 0.1045, "step": 2015 }, { "epoch": 7.72, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.1598647832870483, "eval_runtime": 131.5067, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2015 }, { "epoch": 7.74, "learning_rate": 9.701149425287357e-05, "loss": 0.5649, "step": 2020 }, { "epoch": 7.74, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.1549986600875854, "eval_runtime": 131.7512, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2020 }, { "epoch": 7.76, "learning_rate": 9.675606641123883e-05, "loss": 0.1445, "step": 2025 }, { "epoch": 7.76, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.3097654581069946, "eval_runtime": 131.7447, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2025 }, { "epoch": 7.78, "learning_rate": 9.65006385696041e-05, "loss": 0.0164, "step": 2030 }, { "epoch": 7.78, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.50651216506958, "eval_runtime": 131.8848, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.167, "step": 2030 }, { "epoch": 7.8, "learning_rate": 9.624521072796935e-05, "loss": 0.1815, "step": 2035 }, { "epoch": 7.8, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.5009325742721558, "eval_runtime": 135.4923, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.162, "step": 2035 }, { "epoch": 7.82, "learning_rate": 9.598978288633461e-05, "loss": 0.8265, "step": 2040 }, { "epoch": 7.82, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.1351158618927002, "eval_runtime": 131.6271, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2040 }, { "epoch": 7.84, "learning_rate": 9.573435504469987e-05, "loss": 0.3273, "step": 2045 }, { "epoch": 7.84, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.1325792074203491, "eval_runtime": 131.7848, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2045 }, { "epoch": 7.85, "learning_rate": 9.547892720306514e-05, "loss": 0.0314, "step": 2050 }, { "epoch": 7.85, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.173128366470337, "eval_runtime": 135.703, "eval_samples_per_second": 1.282, "eval_steps_per_second": 0.162, "step": 2050 }, { "epoch": 7.87, "learning_rate": 9.52234993614304e-05, "loss": 0.1498, "step": 2055 }, { "epoch": 7.87, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.2958407402038574, "eval_runtime": 132.023, "eval_samples_per_second": 1.318, "eval_steps_per_second": 0.167, "step": 2055 }, { "epoch": 7.89, "learning_rate": 9.496807151979566e-05, "loss": 0.0174, "step": 2060 }, { "epoch": 7.89, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.4961018562316895, "eval_runtime": 131.6124, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2060 }, { "epoch": 7.91, "learning_rate": 9.471264367816093e-05, "loss": 0.142, "step": 2065 }, { "epoch": 7.91, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.4059162139892578, "eval_runtime": 134.8481, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 2065 }, { "epoch": 7.93, "learning_rate": 9.445721583652618e-05, "loss": 0.3848, "step": 2070 }, { "epoch": 7.93, "eval_accuracy": 0.735632183908046, "eval_loss": 1.1554484367370605, "eval_runtime": 134.5446, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.164, "step": 2070 }, { "epoch": 7.95, "learning_rate": 9.420178799489144e-05, "loss": 0.1568, "step": 2075 }, { "epoch": 7.95, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.1746104955673218, "eval_runtime": 131.6333, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2075 }, { "epoch": 7.97, "learning_rate": 9.39463601532567e-05, "loss": 0.0018, "step": 2080 }, { "epoch": 7.97, "eval_accuracy": 0.735632183908046, "eval_loss": 1.2762763500213623, "eval_runtime": 131.5438, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2080 }, { "epoch": 7.99, "learning_rate": 9.369093231162197e-05, "loss": 0.209, "step": 2085 }, { "epoch": 7.99, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.409183144569397, "eval_runtime": 131.4653, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2085 }, { "epoch": 8.01, "learning_rate": 9.343550446998723e-05, "loss": 0.1243, "step": 2090 }, { "epoch": 8.01, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.392285943031311, "eval_runtime": 131.5546, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2090 }, { "epoch": 8.03, "learning_rate": 9.318007662835249e-05, "loss": 0.1023, "step": 2095 }, { "epoch": 8.03, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.2922409772872925, "eval_runtime": 131.7665, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2095 }, { "epoch": 8.05, "learning_rate": 9.292464878671776e-05, "loss": 0.0129, "step": 2100 }, { "epoch": 8.05, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.3522768020629883, "eval_runtime": 131.5422, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2100 }, { "epoch": 8.07, "learning_rate": 9.266922094508302e-05, "loss": 0.0224, "step": 2105 }, { "epoch": 8.07, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.5482807159423828, "eval_runtime": 131.7205, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2105 }, { "epoch": 8.08, "learning_rate": 9.241379310344827e-05, "loss": 0.2608, "step": 2110 }, { "epoch": 8.08, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.3694931268692017, "eval_runtime": 131.5827, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2110 }, { "epoch": 8.1, "learning_rate": 9.215836526181353e-05, "loss": 0.0016, "step": 2115 }, { "epoch": 8.1, "eval_accuracy": 0.735632183908046, "eval_loss": 1.4523777961730957, "eval_runtime": 131.7817, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2115 }, { "epoch": 8.12, "learning_rate": 9.19029374201788e-05, "loss": 0.0384, "step": 2120 }, { "epoch": 8.12, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.5808136463165283, "eval_runtime": 134.3004, "eval_samples_per_second": 1.296, "eval_steps_per_second": 0.164, "step": 2120 }, { "epoch": 8.14, "learning_rate": 9.164750957854406e-05, "loss": 0.1079, "step": 2125 }, { "epoch": 8.14, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.8028441667556763, "eval_runtime": 131.7133, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2125 }, { "epoch": 8.16, "learning_rate": 9.139208173690932e-05, "loss": 0.314, "step": 2130 }, { "epoch": 8.16, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.6579396724700928, "eval_runtime": 131.38, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2130 }, { "epoch": 8.18, "learning_rate": 9.113665389527459e-05, "loss": 0.0119, "step": 2135 }, { "epoch": 8.18, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.4466235637664795, "eval_runtime": 131.4616, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2135 }, { "epoch": 8.2, "learning_rate": 9.088122605363985e-05, "loss": 0.0145, "step": 2140 }, { "epoch": 8.2, "eval_accuracy": 0.735632183908046, "eval_loss": 1.4529680013656616, "eval_runtime": 131.7249, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2140 }, { "epoch": 8.22, "learning_rate": 9.062579821200511e-05, "loss": 0.2998, "step": 2145 }, { "epoch": 8.22, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.4054944515228271, "eval_runtime": 131.6772, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2145 }, { "epoch": 8.24, "learning_rate": 9.037037037037038e-05, "loss": 0.007, "step": 2150 }, { "epoch": 8.24, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.3814021348953247, "eval_runtime": 131.7918, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2150 }, { "epoch": 8.26, "learning_rate": 9.011494252873564e-05, "loss": 0.0243, "step": 2155 }, { "epoch": 8.26, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.4890694618225098, "eval_runtime": 133.8246, "eval_samples_per_second": 1.3, "eval_steps_per_second": 0.164, "step": 2155 }, { "epoch": 8.28, "learning_rate": 8.98595146871009e-05, "loss": 0.0201, "step": 2160 }, { "epoch": 8.28, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.6387782096862793, "eval_runtime": 135.8171, "eval_samples_per_second": 1.281, "eval_steps_per_second": 0.162, "step": 2160 }, { "epoch": 8.3, "learning_rate": 8.960408684546617e-05, "loss": 0.0394, "step": 2165 }, { "epoch": 8.3, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.6442043781280518, "eval_runtime": 136.9375, "eval_samples_per_second": 1.271, "eval_steps_per_second": 0.161, "step": 2165 }, { "epoch": 8.31, "learning_rate": 8.934865900383143e-05, "loss": 0.049, "step": 2170 }, { "epoch": 8.31, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.4437042474746704, "eval_runtime": 136.8983, "eval_samples_per_second": 1.271, "eval_steps_per_second": 0.161, "step": 2170 }, { "epoch": 8.33, "learning_rate": 8.90932311621967e-05, "loss": 0.0045, "step": 2175 }, { "epoch": 8.33, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.6671456098556519, "eval_runtime": 133.0309, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 2175 }, { "epoch": 8.35, "learning_rate": 8.883780332056194e-05, "loss": 0.0227, "step": 2180 }, { "epoch": 8.35, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.811476230621338, "eval_runtime": 133.9084, "eval_samples_per_second": 1.299, "eval_steps_per_second": 0.164, "step": 2180 }, { "epoch": 8.37, "learning_rate": 8.85823754789272e-05, "loss": 0.0484, "step": 2185 }, { "epoch": 8.37, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5887384414672852, "eval_runtime": 133.5744, "eval_samples_per_second": 1.303, "eval_steps_per_second": 0.165, "step": 2185 }, { "epoch": 8.39, "learning_rate": 8.832694763729247e-05, "loss": 0.2142, "step": 2190 }, { "epoch": 8.39, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5692673921585083, "eval_runtime": 134.0447, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 2190 }, { "epoch": 8.41, "learning_rate": 8.807151979565773e-05, "loss": 0.038, "step": 2195 }, { "epoch": 8.41, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.6501433849334717, "eval_runtime": 133.022, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 2195 }, { "epoch": 8.43, "learning_rate": 8.7816091954023e-05, "loss": 0.1527, "step": 2200 }, { "epoch": 8.43, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.6354538202285767, "eval_runtime": 135.8146, "eval_samples_per_second": 1.281, "eval_steps_per_second": 0.162, "step": 2200 }, { "epoch": 8.45, "learning_rate": 8.756066411238826e-05, "loss": 0.0143, "step": 2205 }, { "epoch": 8.45, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.524357795715332, "eval_runtime": 134.4439, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 2205 }, { "epoch": 8.47, "learning_rate": 8.730523627075352e-05, "loss": 0.019, "step": 2210 }, { "epoch": 8.47, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5004924535751343, "eval_runtime": 136.0359, "eval_samples_per_second": 1.279, "eval_steps_per_second": 0.162, "step": 2210 }, { "epoch": 8.49, "learning_rate": 8.704980842911877e-05, "loss": 0.007, "step": 2215 }, { "epoch": 8.49, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5427993535995483, "eval_runtime": 133.7594, "eval_samples_per_second": 1.301, "eval_steps_per_second": 0.164, "step": 2215 }, { "epoch": 8.51, "learning_rate": 8.679438058748404e-05, "loss": 0.2862, "step": 2220 }, { "epoch": 8.51, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.5085382461547852, "eval_runtime": 133.1685, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 2220 }, { "epoch": 8.52, "learning_rate": 8.65389527458493e-05, "loss": 0.1645, "step": 2225 }, { "epoch": 8.52, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.494451642036438, "eval_runtime": 135.845, "eval_samples_per_second": 1.281, "eval_steps_per_second": 0.162, "step": 2225 }, { "epoch": 8.54, "learning_rate": 8.628352490421456e-05, "loss": 0.0377, "step": 2230 }, { "epoch": 8.54, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.435677409172058, "eval_runtime": 137.2025, "eval_samples_per_second": 1.268, "eval_steps_per_second": 0.16, "step": 2230 }, { "epoch": 8.56, "learning_rate": 8.602809706257983e-05, "loss": 0.0011, "step": 2235 }, { "epoch": 8.56, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.50213623046875, "eval_runtime": 134.1377, "eval_samples_per_second": 1.297, "eval_steps_per_second": 0.164, "step": 2235 }, { "epoch": 8.58, "learning_rate": 8.577266922094509e-05, "loss": 0.0195, "step": 2240 }, { "epoch": 8.58, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.4815926551818848, "eval_runtime": 133.7184, "eval_samples_per_second": 1.301, "eval_steps_per_second": 0.165, "step": 2240 }, { "epoch": 8.6, "learning_rate": 8.551724137931035e-05, "loss": 0.0038, "step": 2245 }, { "epoch": 8.6, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5892810821533203, "eval_runtime": 131.8657, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2245 }, { "epoch": 8.62, "learning_rate": 8.52618135376756e-05, "loss": 0.0028, "step": 2250 }, { "epoch": 8.62, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6429439783096313, "eval_runtime": 131.6556, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2250 }, { "epoch": 8.64, "learning_rate": 8.500638569604087e-05, "loss": 0.2262, "step": 2255 }, { "epoch": 8.64, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6773968935012817, "eval_runtime": 135.4188, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 2255 }, { "epoch": 8.66, "learning_rate": 8.475095785440613e-05, "loss": 0.4226, "step": 2260 }, { "epoch": 8.66, "eval_accuracy": 0.735632183908046, "eval_loss": 1.4581010341644287, "eval_runtime": 131.687, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2260 }, { "epoch": 8.68, "learning_rate": 8.449553001277139e-05, "loss": 0.0025, "step": 2265 }, { "epoch": 8.68, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.3616026639938354, "eval_runtime": 131.5724, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2265 }, { "epoch": 8.7, "learning_rate": 8.424010217113666e-05, "loss": 0.0549, "step": 2270 }, { "epoch": 8.7, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.3944165706634521, "eval_runtime": 135.3983, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 2270 }, { "epoch": 8.72, "learning_rate": 8.398467432950192e-05, "loss": 0.2475, "step": 2275 }, { "epoch": 8.72, "eval_accuracy": 0.735632183908046, "eval_loss": 1.4397807121276855, "eval_runtime": 131.6047, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2275 }, { "epoch": 8.74, "learning_rate": 8.372924648786718e-05, "loss": 0.0432, "step": 2280 }, { "epoch": 8.74, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.4615994691848755, "eval_runtime": 134.6188, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 2280 }, { "epoch": 8.75, "learning_rate": 8.347381864623243e-05, "loss": 0.0076, "step": 2285 }, { "epoch": 8.75, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.4265929460525513, "eval_runtime": 131.5081, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2285 }, { "epoch": 8.77, "learning_rate": 8.32183908045977e-05, "loss": 0.1605, "step": 2290 }, { "epoch": 8.77, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.3677057027816772, "eval_runtime": 134.1102, "eval_samples_per_second": 1.297, "eval_steps_per_second": 0.164, "step": 2290 }, { "epoch": 8.79, "learning_rate": 8.296296296296296e-05, "loss": 0.0192, "step": 2295 }, { "epoch": 8.79, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.3508528470993042, "eval_runtime": 131.6443, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2295 }, { "epoch": 8.81, "learning_rate": 8.270753512132822e-05, "loss": 0.0123, "step": 2300 }, { "epoch": 8.81, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.3890247344970703, "eval_runtime": 131.7602, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2300 }, { "epoch": 8.83, "learning_rate": 8.245210727969349e-05, "loss": 0.0292, "step": 2305 }, { "epoch": 8.83, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.5051121711730957, "eval_runtime": 131.7302, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2305 }, { "epoch": 8.85, "learning_rate": 8.219667943805875e-05, "loss": 0.1464, "step": 2310 }, { "epoch": 8.85, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6223372220993042, "eval_runtime": 131.7641, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2310 }, { "epoch": 8.87, "learning_rate": 8.194125159642401e-05, "loss": 0.3433, "step": 2315 }, { "epoch": 8.87, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.6563609838485718, "eval_runtime": 131.5136, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2315 }, { "epoch": 8.89, "learning_rate": 8.168582375478928e-05, "loss": 0.1714, "step": 2320 }, { "epoch": 8.89, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.622618556022644, "eval_runtime": 131.4805, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2320 }, { "epoch": 8.91, "learning_rate": 8.143039591315454e-05, "loss": 0.0059, "step": 2325 }, { "epoch": 8.91, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.819935917854309, "eval_runtime": 134.5852, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 2325 }, { "epoch": 8.93, "learning_rate": 8.11749680715198e-05, "loss": 0.0172, "step": 2330 }, { "epoch": 8.93, "eval_accuracy": 0.6666666666666666, "eval_loss": 2.0601022243499756, "eval_runtime": 131.6123, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2330 }, { "epoch": 8.95, "learning_rate": 8.091954022988507e-05, "loss": 0.9032, "step": 2335 }, { "epoch": 8.95, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.911239743232727, "eval_runtime": 132.3205, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.166, "step": 2335 }, { "epoch": 8.97, "learning_rate": 8.066411238825033e-05, "loss": 0.2749, "step": 2340 }, { "epoch": 8.97, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.5849545001983643, "eval_runtime": 131.5581, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2340 }, { "epoch": 8.98, "learning_rate": 8.04086845466156e-05, "loss": 0.0033, "step": 2345 }, { "epoch": 8.98, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.447026014328003, "eval_runtime": 131.5482, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2345 }, { "epoch": 9.0, "learning_rate": 8.015325670498086e-05, "loss": 0.1976, "step": 2350 }, { "epoch": 9.0, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5511088371276855, "eval_runtime": 131.6876, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2350 }, { "epoch": 9.02, "learning_rate": 7.989782886334612e-05, "loss": 0.0023, "step": 2355 }, { "epoch": 9.02, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6997263431549072, "eval_runtime": 131.6445, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2355 }, { "epoch": 9.04, "learning_rate": 7.964240102171137e-05, "loss": 0.0125, "step": 2360 }, { "epoch": 9.04, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.8850693702697754, "eval_runtime": 131.5131, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2360 }, { "epoch": 9.06, "learning_rate": 7.938697318007663e-05, "loss": 0.0023, "step": 2365 }, { "epoch": 9.06, "eval_accuracy": 0.6839080459770115, "eval_loss": 2.049333333969116, "eval_runtime": 134.4132, "eval_samples_per_second": 1.295, "eval_steps_per_second": 0.164, "step": 2365 }, { "epoch": 9.08, "learning_rate": 7.91315453384419e-05, "loss": 0.1226, "step": 2370 }, { "epoch": 9.08, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6171669960021973, "eval_runtime": 132.176, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.166, "step": 2370 }, { "epoch": 9.1, "learning_rate": 7.887611749680716e-05, "loss": 0.0011, "step": 2375 }, { "epoch": 9.1, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6242951154708862, "eval_runtime": 131.7057, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2375 }, { "epoch": 9.12, "learning_rate": 7.862068965517242e-05, "loss": 0.001, "step": 2380 }, { "epoch": 9.12, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.707185983657837, "eval_runtime": 131.6149, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2380 }, { "epoch": 9.14, "learning_rate": 7.836526181353769e-05, "loss": 0.0176, "step": 2385 }, { "epoch": 9.14, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7006449699401855, "eval_runtime": 131.5584, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2385 }, { "epoch": 9.16, "learning_rate": 7.810983397190295e-05, "loss": 0.0291, "step": 2390 }, { "epoch": 9.16, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6260244846343994, "eval_runtime": 131.6961, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2390 }, { "epoch": 9.18, "learning_rate": 7.78544061302682e-05, "loss": 0.0003, "step": 2395 }, { "epoch": 9.18, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6396585702896118, "eval_runtime": 135.5626, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.162, "step": 2395 }, { "epoch": 9.2, "learning_rate": 7.759897828863346e-05, "loss": 0.0002, "step": 2400 }, { "epoch": 9.2, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.6500298976898193, "eval_runtime": 131.8182, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2400 }, { "epoch": 9.21, "learning_rate": 7.734355044699873e-05, "loss": 0.0002, "step": 2405 }, { "epoch": 9.21, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.654776692390442, "eval_runtime": 131.6817, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2405 }, { "epoch": 9.23, "learning_rate": 7.708812260536399e-05, "loss": 0.0002, "step": 2410 }, { "epoch": 9.23, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.6623882055282593, "eval_runtime": 131.6808, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2410 }, { "epoch": 9.25, "learning_rate": 7.683269476372925e-05, "loss": 0.0014, "step": 2415 }, { "epoch": 9.25, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.663640022277832, "eval_runtime": 131.723, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2415 }, { "epoch": 9.27, "learning_rate": 7.657726692209452e-05, "loss": 0.0007, "step": 2420 }, { "epoch": 9.27, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.667425513267517, "eval_runtime": 131.8231, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2420 }, { "epoch": 9.29, "learning_rate": 7.632183908045978e-05, "loss": 0.171, "step": 2425 }, { "epoch": 9.29, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6711957454681396, "eval_runtime": 135.4213, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 2425 }, { "epoch": 9.31, "learning_rate": 7.606641123882503e-05, "loss": 0.0019, "step": 2430 }, { "epoch": 9.31, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6811691522598267, "eval_runtime": 131.6252, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2430 }, { "epoch": 9.33, "learning_rate": 7.581098339719029e-05, "loss": 0.1125, "step": 2435 }, { "epoch": 9.33, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6472193002700806, "eval_runtime": 131.6396, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2435 }, { "epoch": 9.35, "learning_rate": 7.555555555555556e-05, "loss": 0.0084, "step": 2440 }, { "epoch": 9.35, "eval_accuracy": 0.735632183908046, "eval_loss": 1.684321403503418, "eval_runtime": 134.6915, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 2440 }, { "epoch": 9.37, "learning_rate": 7.530012771392082e-05, "loss": 0.0039, "step": 2445 }, { "epoch": 9.37, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7159420251846313, "eval_runtime": 131.7929, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2445 }, { "epoch": 9.39, "learning_rate": 7.504469987228608e-05, "loss": 0.1233, "step": 2450 }, { "epoch": 9.39, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.693913221359253, "eval_runtime": 131.6221, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2450 }, { "epoch": 9.41, "learning_rate": 7.478927203065135e-05, "loss": 0.0015, "step": 2455 }, { "epoch": 9.41, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6599289178848267, "eval_runtime": 131.5324, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2455 }, { "epoch": 9.43, "learning_rate": 7.453384418901661e-05, "loss": 0.0031, "step": 2460 }, { "epoch": 9.43, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6476054191589355, "eval_runtime": 131.5993, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2460 }, { "epoch": 9.44, "learning_rate": 7.427841634738186e-05, "loss": 0.0059, "step": 2465 }, { "epoch": 9.44, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7214477062225342, "eval_runtime": 134.7166, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 2465 }, { "epoch": 9.46, "learning_rate": 7.402298850574712e-05, "loss": 0.1826, "step": 2470 }, { "epoch": 9.46, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.7813475131988525, "eval_runtime": 131.5182, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2470 }, { "epoch": 9.48, "learning_rate": 7.376756066411239e-05, "loss": 0.1108, "step": 2475 }, { "epoch": 9.48, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.7746046781539917, "eval_runtime": 131.497, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2475 }, { "epoch": 9.5, "learning_rate": 7.351213282247765e-05, "loss": 0.0244, "step": 2480 }, { "epoch": 9.5, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7000945806503296, "eval_runtime": 134.8163, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 2480 }, { "epoch": 9.52, "learning_rate": 7.325670498084291e-05, "loss": 0.0004, "step": 2485 }, { "epoch": 9.52, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7654314041137695, "eval_runtime": 131.5898, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2485 }, { "epoch": 9.54, "learning_rate": 7.300127713920818e-05, "loss": 0.0017, "step": 2490 }, { "epoch": 9.54, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.8154675960540771, "eval_runtime": 134.6257, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 2490 }, { "epoch": 9.56, "learning_rate": 7.274584929757344e-05, "loss": 0.0048, "step": 2495 }, { "epoch": 9.56, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7586040496826172, "eval_runtime": 131.6022, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2495 }, { "epoch": 9.58, "learning_rate": 7.24904214559387e-05, "loss": 0.0001, "step": 2500 }, { "epoch": 9.58, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7478693723678589, "eval_runtime": 131.5333, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2500 }, { "epoch": 9.6, "learning_rate": 7.223499361430395e-05, "loss": 0.0007, "step": 2505 }, { "epoch": 9.6, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.74885892868042, "eval_runtime": 131.5823, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2505 }, { "epoch": 9.62, "learning_rate": 7.197956577266922e-05, "loss": 0.1523, "step": 2510 }, { "epoch": 9.62, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7376835346221924, "eval_runtime": 131.6011, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2510 }, { "epoch": 9.64, "learning_rate": 7.172413793103448e-05, "loss": 0.0032, "step": 2515 }, { "epoch": 9.64, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6583445072174072, "eval_runtime": 131.6093, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2515 }, { "epoch": 9.66, "learning_rate": 7.146871008939974e-05, "loss": 0.0094, "step": 2520 }, { "epoch": 9.66, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5356999635696411, "eval_runtime": 135.3986, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.162, "step": 2520 }, { "epoch": 9.67, "learning_rate": 7.1213282247765e-05, "loss": 0.001, "step": 2525 }, { "epoch": 9.67, "eval_accuracy": 0.735632183908046, "eval_loss": 1.489303708076477, "eval_runtime": 131.5424, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2525 }, { "epoch": 9.69, "learning_rate": 7.095785440613027e-05, "loss": 0.0053, "step": 2530 }, { "epoch": 9.69, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.4710723161697388, "eval_runtime": 131.6132, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2530 }, { "epoch": 9.71, "learning_rate": 7.070242656449553e-05, "loss": 0.0027, "step": 2535 }, { "epoch": 9.71, "eval_accuracy": 0.735632183908046, "eval_loss": 1.4847438335418701, "eval_runtime": 131.8018, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2535 }, { "epoch": 9.73, "learning_rate": 7.04469987228608e-05, "loss": 0.0031, "step": 2540 }, { "epoch": 9.73, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.5292288064956665, "eval_runtime": 131.6847, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2540 }, { "epoch": 9.75, "learning_rate": 7.019157088122606e-05, "loss": 0.0168, "step": 2545 }, { "epoch": 9.75, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.5313410758972168, "eval_runtime": 131.6087, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2545 }, { "epoch": 9.77, "learning_rate": 6.993614303959132e-05, "loss": 0.0443, "step": 2550 }, { "epoch": 9.77, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5084540843963623, "eval_runtime": 134.9994, "eval_samples_per_second": 1.289, "eval_steps_per_second": 0.163, "step": 2550 }, { "epoch": 9.79, "learning_rate": 6.968071519795659e-05, "loss": 0.0237, "step": 2555 }, { "epoch": 9.79, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.577832818031311, "eval_runtime": 131.7595, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2555 }, { "epoch": 9.81, "learning_rate": 6.942528735632185e-05, "loss": 0.0026, "step": 2560 }, { "epoch": 9.81, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7100647687911987, "eval_runtime": 134.0966, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 2560 }, { "epoch": 9.83, "learning_rate": 6.916985951468711e-05, "loss": 0.0227, "step": 2565 }, { "epoch": 9.83, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.719056248664856, "eval_runtime": 131.4435, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2565 }, { "epoch": 9.85, "learning_rate": 6.891443167305238e-05, "loss": 0.0001, "step": 2570 }, { "epoch": 9.85, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7180625200271606, "eval_runtime": 131.4579, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2570 }, { "epoch": 9.87, "learning_rate": 6.865900383141763e-05, "loss": 0.0013, "step": 2575 }, { "epoch": 9.87, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.7265838384628296, "eval_runtime": 131.6065, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2575 }, { "epoch": 9.89, "learning_rate": 6.840357598978289e-05, "loss": 0.0153, "step": 2580 }, { "epoch": 9.89, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.721193552017212, "eval_runtime": 131.7875, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2580 }, { "epoch": 9.9, "learning_rate": 6.814814814814815e-05, "loss": 0.3362, "step": 2585 }, { "epoch": 9.9, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6475728750228882, "eval_runtime": 131.8419, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2585 }, { "epoch": 9.92, "learning_rate": 6.789272030651342e-05, "loss": 0.0002, "step": 2590 }, { "epoch": 9.92, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6617615222930908, "eval_runtime": 131.7141, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2590 }, { "epoch": 9.94, "learning_rate": 6.763729246487868e-05, "loss": 0.0041, "step": 2595 }, { "epoch": 9.94, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5420438051223755, "eval_runtime": 131.461, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2595 }, { "epoch": 9.96, "learning_rate": 6.738186462324394e-05, "loss": 0.0002, "step": 2600 }, { "epoch": 9.96, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5038961172103882, "eval_runtime": 132.3186, "eval_samples_per_second": 1.315, "eval_steps_per_second": 0.166, "step": 2600 }, { "epoch": 9.98, "learning_rate": 6.71264367816092e-05, "loss": 0.0215, "step": 2605 }, { "epoch": 9.98, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5120642185211182, "eval_runtime": 134.7063, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 2605 }, { "epoch": 10.0, "learning_rate": 6.687100893997446e-05, "loss": 0.1991, "step": 2610 }, { "epoch": 10.0, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5479094982147217, "eval_runtime": 131.4277, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2610 }, { "epoch": 10.02, "learning_rate": 6.661558109833972e-05, "loss": 0.0005, "step": 2615 }, { "epoch": 10.02, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6146314144134521, "eval_runtime": 132.802, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 2615 }, { "epoch": 10.04, "learning_rate": 6.636015325670498e-05, "loss": 0.1721, "step": 2620 }, { "epoch": 10.04, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6754363775253296, "eval_runtime": 133.3319, "eval_samples_per_second": 1.305, "eval_steps_per_second": 0.165, "step": 2620 }, { "epoch": 10.06, "learning_rate": 6.610472541507025e-05, "loss": 0.0144, "step": 2625 }, { "epoch": 10.06, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.727333664894104, "eval_runtime": 133.0937, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 2625 }, { "epoch": 10.08, "learning_rate": 6.584929757343551e-05, "loss": 0.0424, "step": 2630 }, { "epoch": 10.08, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.7386857271194458, "eval_runtime": 133.3356, "eval_samples_per_second": 1.305, "eval_steps_per_second": 0.165, "step": 2630 }, { "epoch": 10.1, "learning_rate": 6.559386973180077e-05, "loss": 0.001, "step": 2635 }, { "epoch": 10.1, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.6673250198364258, "eval_runtime": 132.9196, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.166, "step": 2635 }, { "epoch": 10.11, "learning_rate": 6.533844189016604e-05, "loss": 0.0187, "step": 2640 }, { "epoch": 10.11, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.5967484712600708, "eval_runtime": 132.8447, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 2640 }, { "epoch": 10.13, "learning_rate": 6.508301404853129e-05, "loss": 0.0001, "step": 2645 }, { "epoch": 10.13, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.65236234664917, "eval_runtime": 137.5143, "eval_samples_per_second": 1.265, "eval_steps_per_second": 0.16, "step": 2645 }, { "epoch": 10.15, "learning_rate": 6.482758620689655e-05, "loss": 0.0003, "step": 2650 }, { "epoch": 10.15, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6943364143371582, "eval_runtime": 135.7335, "eval_samples_per_second": 1.282, "eval_steps_per_second": 0.162, "step": 2650 }, { "epoch": 10.17, "learning_rate": 6.457215836526181e-05, "loss": 0.0047, "step": 2655 }, { "epoch": 10.17, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7338972091674805, "eval_runtime": 134.0312, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 2655 }, { "epoch": 10.19, "learning_rate": 6.431673052362708e-05, "loss": 0.0105, "step": 2660 }, { "epoch": 10.19, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.760598063468933, "eval_runtime": 134.4028, "eval_samples_per_second": 1.295, "eval_steps_per_second": 0.164, "step": 2660 }, { "epoch": 10.21, "learning_rate": 6.406130268199234e-05, "loss": 0.0015, "step": 2665 }, { "epoch": 10.21, "eval_accuracy": 0.735632183908046, "eval_loss": 1.768870234489441, "eval_runtime": 133.0003, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 2665 }, { "epoch": 10.23, "learning_rate": 6.38058748403576e-05, "loss": 0.0002, "step": 2670 }, { "epoch": 10.23, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.791227102279663, "eval_runtime": 133.4851, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.165, "step": 2670 }, { "epoch": 10.25, "learning_rate": 6.355044699872287e-05, "loss": 0.002, "step": 2675 }, { "epoch": 10.25, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.777532696723938, "eval_runtime": 133.0873, "eval_samples_per_second": 1.307, "eval_steps_per_second": 0.165, "step": 2675 }, { "epoch": 10.27, "learning_rate": 6.329501915708812e-05, "loss": 0.1839, "step": 2680 }, { "epoch": 10.27, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5833971500396729, "eval_runtime": 132.9348, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.165, "step": 2680 }, { "epoch": 10.29, "learning_rate": 6.303959131545338e-05, "loss": 0.0004, "step": 2685 }, { "epoch": 10.29, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5184221267700195, "eval_runtime": 135.1931, "eval_samples_per_second": 1.287, "eval_steps_per_second": 0.163, "step": 2685 }, { "epoch": 10.31, "learning_rate": 6.278416347381864e-05, "loss": 0.0001, "step": 2690 }, { "epoch": 10.31, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5530625581741333, "eval_runtime": 133.3044, "eval_samples_per_second": 1.305, "eval_steps_per_second": 0.165, "step": 2690 }, { "epoch": 10.33, "learning_rate": 6.25287356321839e-05, "loss": 0.0001, "step": 2695 }, { "epoch": 10.33, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.597658395767212, "eval_runtime": 132.8657, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 2695 }, { "epoch": 10.34, "learning_rate": 6.227330779054917e-05, "loss": 0.2095, "step": 2700 }, { "epoch": 10.34, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6474486589431763, "eval_runtime": 134.5259, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.164, "step": 2700 }, { "epoch": 10.36, "learning_rate": 6.201787994891443e-05, "loss": 0.0044, "step": 2705 }, { "epoch": 10.36, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7372243404388428, "eval_runtime": 132.009, "eval_samples_per_second": 1.318, "eval_steps_per_second": 0.167, "step": 2705 }, { "epoch": 10.38, "learning_rate": 6.17624521072797e-05, "loss": 0.0242, "step": 2710 }, { "epoch": 10.38, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.7031418085098267, "eval_runtime": 131.5485, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2710 }, { "epoch": 10.4, "learning_rate": 6.150702426564496e-05, "loss": 0.0119, "step": 2715 }, { "epoch": 10.4, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6391687393188477, "eval_runtime": 131.5385, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2715 }, { "epoch": 10.42, "learning_rate": 6.125159642401021e-05, "loss": 0.0003, "step": 2720 }, { "epoch": 10.42, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.699074625968933, "eval_runtime": 131.8011, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2720 }, { "epoch": 10.44, "learning_rate": 6.099616858237548e-05, "loss": 0.0001, "step": 2725 }, { "epoch": 10.44, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7589409351348877, "eval_runtime": 134.6631, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 2725 }, { "epoch": 10.46, "learning_rate": 6.074074074074074e-05, "loss": 0.0003, "step": 2730 }, { "epoch": 10.46, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.809535264968872, "eval_runtime": 131.5414, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2730 }, { "epoch": 10.48, "learning_rate": 6.0485312899106007e-05, "loss": 0.0113, "step": 2735 }, { "epoch": 10.48, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7475957870483398, "eval_runtime": 131.6443, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2735 }, { "epoch": 10.5, "learning_rate": 6.022988505747127e-05, "loss": 0.0253, "step": 2740 }, { "epoch": 10.5, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.732975721359253, "eval_runtime": 131.7203, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2740 }, { "epoch": 10.52, "learning_rate": 5.997445721583653e-05, "loss": 0.0001, "step": 2745 }, { "epoch": 10.52, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6785794496536255, "eval_runtime": 134.7004, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 2745 }, { "epoch": 10.54, "learning_rate": 5.97190293742018e-05, "loss": 0.0001, "step": 2750 }, { "epoch": 10.54, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6546560525894165, "eval_runtime": 131.4825, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2750 }, { "epoch": 10.56, "learning_rate": 5.9463601532567046e-05, "loss": 0.0841, "step": 2755 }, { "epoch": 10.56, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5616384744644165, "eval_runtime": 131.5563, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2755 }, { "epoch": 10.57, "learning_rate": 5.920817369093231e-05, "loss": 0.0003, "step": 2760 }, { "epoch": 10.57, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.4814727306365967, "eval_runtime": 131.5966, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2760 }, { "epoch": 10.59, "learning_rate": 5.895274584929757e-05, "loss": 0.0064, "step": 2765 }, { "epoch": 10.59, "eval_accuracy": 0.764367816091954, "eval_loss": 1.4246087074279785, "eval_runtime": 135.6419, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 2765 }, { "epoch": 10.61, "learning_rate": 5.8697318007662837e-05, "loss": 0.0001, "step": 2770 }, { "epoch": 10.61, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.4092402458190918, "eval_runtime": 131.8053, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2770 }, { "epoch": 10.63, "learning_rate": 5.84418901660281e-05, "loss": 0.0015, "step": 2775 }, { "epoch": 10.63, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.4107933044433594, "eval_runtime": 131.7192, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2775 }, { "epoch": 10.65, "learning_rate": 5.818646232439336e-05, "loss": 0.0002, "step": 2780 }, { "epoch": 10.65, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.4259847402572632, "eval_runtime": 131.5637, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2780 }, { "epoch": 10.67, "learning_rate": 5.7931034482758627e-05, "loss": 0.0005, "step": 2785 }, { "epoch": 10.67, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.4420133829116821, "eval_runtime": 131.7609, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2785 }, { "epoch": 10.69, "learning_rate": 5.767560664112388e-05, "loss": 0.2767, "step": 2790 }, { "epoch": 10.69, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.4646365642547607, "eval_runtime": 138.9314, "eval_samples_per_second": 1.252, "eval_steps_per_second": 0.158, "step": 2790 }, { "epoch": 10.71, "learning_rate": 5.7420178799489147e-05, "loss": 0.0721, "step": 2795 }, { "epoch": 10.71, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5203619003295898, "eval_runtime": 131.5083, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2795 }, { "epoch": 10.73, "learning_rate": 5.716475095785441e-05, "loss": 0.0009, "step": 2800 }, { "epoch": 10.73, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6007354259490967, "eval_runtime": 131.8312, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2800 }, { "epoch": 10.75, "learning_rate": 5.690932311621967e-05, "loss": 0.0003, "step": 2805 }, { "epoch": 10.75, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.668529987335205, "eval_runtime": 131.6129, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2805 }, { "epoch": 10.77, "learning_rate": 5.665389527458494e-05, "loss": 0.3266, "step": 2810 }, { "epoch": 10.77, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7353466749191284, "eval_runtime": 131.5759, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2810 }, { "epoch": 10.79, "learning_rate": 5.63984674329502e-05, "loss": 0.0028, "step": 2815 }, { "epoch": 10.79, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.747633457183838, "eval_runtime": 134.916, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 2815 }, { "epoch": 10.8, "learning_rate": 5.614303959131546e-05, "loss": 0.0, "step": 2820 }, { "epoch": 10.8, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7377413511276245, "eval_runtime": 131.5547, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2820 }, { "epoch": 10.82, "learning_rate": 5.588761174968071e-05, "loss": 0.0001, "step": 2825 }, { "epoch": 10.82, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7389466762542725, "eval_runtime": 131.5452, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2825 }, { "epoch": 10.84, "learning_rate": 5.5632183908045976e-05, "loss": 0.0004, "step": 2830 }, { "epoch": 10.84, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.740808367729187, "eval_runtime": 131.5177, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2830 }, { "epoch": 10.86, "learning_rate": 5.537675606641124e-05, "loss": 0.0794, "step": 2835 }, { "epoch": 10.86, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7411603927612305, "eval_runtime": 131.6661, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2835 }, { "epoch": 10.88, "learning_rate": 5.51213282247765e-05, "loss": 0.0001, "step": 2840 }, { "epoch": 10.88, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.747639775276184, "eval_runtime": 134.556, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.164, "step": 2840 }, { "epoch": 10.9, "learning_rate": 5.4865900383141767e-05, "loss": 0.0022, "step": 2845 }, { "epoch": 10.9, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7501295804977417, "eval_runtime": 135.0416, "eval_samples_per_second": 1.288, "eval_steps_per_second": 0.163, "step": 2845 }, { "epoch": 10.92, "learning_rate": 5.461047254150703e-05, "loss": 0.0007, "step": 2850 }, { "epoch": 10.92, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7450084686279297, "eval_runtime": 131.6406, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2850 }, { "epoch": 10.94, "learning_rate": 5.435504469987229e-05, "loss": 0.0162, "step": 2855 }, { "epoch": 10.94, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6570850610733032, "eval_runtime": 131.5925, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2855 }, { "epoch": 10.96, "learning_rate": 5.409961685823754e-05, "loss": 0.0002, "step": 2860 }, { "epoch": 10.96, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5867286920547485, "eval_runtime": 131.8387, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2860 }, { "epoch": 10.98, "learning_rate": 5.3844189016602806e-05, "loss": 0.0004, "step": 2865 }, { "epoch": 10.98, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5566998720169067, "eval_runtime": 133.6513, "eval_samples_per_second": 1.302, "eval_steps_per_second": 0.165, "step": 2865 }, { "epoch": 11.0, "learning_rate": 5.358876117496807e-05, "loss": 0.007, "step": 2870 }, { "epoch": 11.0, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5519108772277832, "eval_runtime": 131.5593, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2870 }, { "epoch": 11.02, "learning_rate": 5.333333333333333e-05, "loss": 0.222, "step": 2875 }, { "epoch": 11.02, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5496468544006348, "eval_runtime": 131.4761, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2875 }, { "epoch": 11.03, "learning_rate": 5.3077905491698597e-05, "loss": 0.175, "step": 2880 }, { "epoch": 11.03, "eval_accuracy": 0.764367816091954, "eval_loss": 1.576553463935852, "eval_runtime": 134.5929, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 2880 }, { "epoch": 11.05, "learning_rate": 5.282247765006386e-05, "loss": 0.0004, "step": 2885 }, { "epoch": 11.05, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.611812949180603, "eval_runtime": 134.8494, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 2885 }, { "epoch": 11.07, "learning_rate": 5.256704980842912e-05, "loss": 0.0047, "step": 2890 }, { "epoch": 11.07, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.635672926902771, "eval_runtime": 131.6624, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2890 }, { "epoch": 11.09, "learning_rate": 5.231162196679439e-05, "loss": 0.0002, "step": 2895 }, { "epoch": 11.09, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.646173119544983, "eval_runtime": 131.6251, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2895 }, { "epoch": 11.11, "learning_rate": 5.205619412515964e-05, "loss": 0.0007, "step": 2900 }, { "epoch": 11.11, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6495263576507568, "eval_runtime": 131.5865, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2900 }, { "epoch": 11.13, "learning_rate": 5.1800766283524907e-05, "loss": 0.0003, "step": 2905 }, { "epoch": 11.13, "eval_accuracy": 0.764367816091954, "eval_loss": 1.6477447748184204, "eval_runtime": 131.7735, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2905 }, { "epoch": 11.15, "learning_rate": 5.154533844189017e-05, "loss": 0.002, "step": 2910 }, { "epoch": 11.15, "eval_accuracy": 0.764367816091954, "eval_loss": 1.644494652748108, "eval_runtime": 131.6514, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2910 }, { "epoch": 11.17, "learning_rate": 5.128991060025543e-05, "loss": 0.0001, "step": 2915 }, { "epoch": 11.17, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.650899052619934, "eval_runtime": 131.342, "eval_samples_per_second": 1.325, "eval_steps_per_second": 0.168, "step": 2915 }, { "epoch": 11.19, "learning_rate": 5.10344827586207e-05, "loss": 0.0026, "step": 2920 }, { "epoch": 11.19, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.6433719396591187, "eval_runtime": 134.4263, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 2920 }, { "epoch": 11.21, "learning_rate": 5.077905491698596e-05, "loss": 0.0001, "step": 2925 }, { "epoch": 11.21, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.6255303621292114, "eval_runtime": 131.6755, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2925 }, { "epoch": 11.23, "learning_rate": 5.052362707535122e-05, "loss": 0.0003, "step": 2930 }, { "epoch": 11.23, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.6109023094177246, "eval_runtime": 131.6429, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2930 }, { "epoch": 11.25, "learning_rate": 5.026819923371647e-05, "loss": 0.0004, "step": 2935 }, { "epoch": 11.25, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.6027604341506958, "eval_runtime": 134.4577, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 2935 }, { "epoch": 11.26, "learning_rate": 5.0012771392081737e-05, "loss": 0.0044, "step": 2940 }, { "epoch": 11.26, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.6084132194519043, "eval_runtime": 134.0619, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 2940 }, { "epoch": 11.28, "learning_rate": 4.9757343550447e-05, "loss": 0.0001, "step": 2945 }, { "epoch": 11.28, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.6188857555389404, "eval_runtime": 131.6719, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2945 }, { "epoch": 11.3, "learning_rate": 4.950191570881226e-05, "loss": 0.0003, "step": 2950 }, { "epoch": 11.3, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.6270678043365479, "eval_runtime": 131.7047, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 2950 }, { "epoch": 11.32, "learning_rate": 4.9246487867177527e-05, "loss": 0.2528, "step": 2955 }, { "epoch": 11.32, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.6437561511993408, "eval_runtime": 131.8311, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 2955 }, { "epoch": 11.34, "learning_rate": 4.899106002554278e-05, "loss": 0.0001, "step": 2960 }, { "epoch": 11.34, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.6640594005584717, "eval_runtime": 131.6151, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2960 }, { "epoch": 11.36, "learning_rate": 4.8735632183908047e-05, "loss": 0.0035, "step": 2965 }, { "epoch": 11.36, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.6824891567230225, "eval_runtime": 131.5262, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 2965 }, { "epoch": 11.38, "learning_rate": 4.848020434227331e-05, "loss": 0.0005, "step": 2970 }, { "epoch": 11.38, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.7031219005584717, "eval_runtime": 135.3436, "eval_samples_per_second": 1.286, "eval_steps_per_second": 0.163, "step": 2970 }, { "epoch": 11.4, "learning_rate": 4.822477650063857e-05, "loss": 0.2952, "step": 2975 }, { "epoch": 11.4, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6996899843215942, "eval_runtime": 131.6532, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2975 }, { "epoch": 11.42, "learning_rate": 4.796934865900383e-05, "loss": 0.0001, "step": 2980 }, { "epoch": 11.42, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6979587078094482, "eval_runtime": 131.4663, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 2980 }, { "epoch": 11.44, "learning_rate": 4.771392081736909e-05, "loss": 0.0004, "step": 2985 }, { "epoch": 11.44, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6984186172485352, "eval_runtime": 134.7298, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 2985 }, { "epoch": 11.46, "learning_rate": 4.7458492975734357e-05, "loss": 0.0015, "step": 2990 }, { "epoch": 11.46, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6948944330215454, "eval_runtime": 131.6251, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2990 }, { "epoch": 11.48, "learning_rate": 4.720306513409962e-05, "loss": 0.0248, "step": 2995 }, { "epoch": 11.48, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6734544038772583, "eval_runtime": 131.6311, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 2995 }, { "epoch": 11.49, "learning_rate": 4.694763729246488e-05, "loss": 0.0, "step": 3000 }, { "epoch": 11.49, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.658267617225647, "eval_runtime": 131.5592, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3000 }, { "epoch": 11.51, "learning_rate": 4.669220945083015e-05, "loss": 0.0129, "step": 3005 }, { "epoch": 11.51, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6415587663650513, "eval_runtime": 131.567, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3005 }, { "epoch": 11.53, "learning_rate": 4.643678160919541e-05, "loss": 0.1049, "step": 3010 }, { "epoch": 11.53, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6351646184921265, "eval_runtime": 134.0039, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 3010 }, { "epoch": 11.55, "learning_rate": 4.6181353767560667e-05, "loss": 0.0001, "step": 3015 }, { "epoch": 11.55, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6312400102615356, "eval_runtime": 131.5975, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3015 }, { "epoch": 11.57, "learning_rate": 4.592592592592593e-05, "loss": 0.0001, "step": 3020 }, { "epoch": 11.57, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6293408870697021, "eval_runtime": 131.6558, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3020 }, { "epoch": 11.59, "learning_rate": 4.567049808429119e-05, "loss": 0.0004, "step": 3025 }, { "epoch": 11.59, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5769641399383545, "eval_runtime": 131.6201, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3025 }, { "epoch": 11.61, "learning_rate": 4.541507024265646e-05, "loss": 0.0001, "step": 3030 }, { "epoch": 11.61, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.5390961170196533, "eval_runtime": 131.381, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3030 }, { "epoch": 11.63, "learning_rate": 4.515964240102171e-05, "loss": 0.0005, "step": 3035 }, { "epoch": 11.63, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.5512661933898926, "eval_runtime": 131.575, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3035 }, { "epoch": 11.65, "learning_rate": 4.4904214559386977e-05, "loss": 0.0007, "step": 3040 }, { "epoch": 11.65, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5458471775054932, "eval_runtime": 131.5999, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3040 }, { "epoch": 11.67, "learning_rate": 4.464878671775224e-05, "loss": 0.0002, "step": 3045 }, { "epoch": 11.67, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5148214101791382, "eval_runtime": 131.7032, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3045 }, { "epoch": 11.69, "learning_rate": 4.4393358876117497e-05, "loss": 0.0084, "step": 3050 }, { "epoch": 11.69, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.4826661348342896, "eval_runtime": 134.959, "eval_samples_per_second": 1.289, "eval_steps_per_second": 0.163, "step": 3050 }, { "epoch": 11.7, "learning_rate": 4.413793103448276e-05, "loss": 0.0002, "step": 3055 }, { "epoch": 11.7, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.4733022451400757, "eval_runtime": 131.4231, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3055 }, { "epoch": 11.72, "learning_rate": 4.388250319284802e-05, "loss": 0.0001, "step": 3060 }, { "epoch": 11.72, "eval_accuracy": 0.7816091954022989, "eval_loss": 1.4764585494995117, "eval_runtime": 131.5735, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3060 }, { "epoch": 11.74, "learning_rate": 4.362707535121329e-05, "loss": 0.0005, "step": 3065 }, { "epoch": 11.74, "eval_accuracy": 0.7816091954022989, "eval_loss": 1.48111891746521, "eval_runtime": 131.6303, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3065 }, { "epoch": 11.76, "learning_rate": 4.337164750957854e-05, "loss": 0.0037, "step": 3070 }, { "epoch": 11.76, "eval_accuracy": 0.7816091954022989, "eval_loss": 1.452279806137085, "eval_runtime": 135.5032, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.162, "step": 3070 }, { "epoch": 11.78, "learning_rate": 4.3116219667943807e-05, "loss": 0.0001, "step": 3075 }, { "epoch": 11.78, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.4755744934082031, "eval_runtime": 135.2628, "eval_samples_per_second": 1.286, "eval_steps_per_second": 0.163, "step": 3075 }, { "epoch": 11.8, "learning_rate": 4.286079182630907e-05, "loss": 0.0001, "step": 3080 }, { "epoch": 11.8, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.498329997062683, "eval_runtime": 131.6123, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3080 }, { "epoch": 11.82, "learning_rate": 4.2605363984674326e-05, "loss": 0.0002, "step": 3085 }, { "epoch": 11.82, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.5150718688964844, "eval_runtime": 131.5818, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3085 }, { "epoch": 11.84, "learning_rate": 4.234993614303959e-05, "loss": 0.0009, "step": 3090 }, { "epoch": 11.84, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.5199931859970093, "eval_runtime": 131.7399, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3090 }, { "epoch": 11.86, "learning_rate": 4.209450830140485e-05, "loss": 0.0001, "step": 3095 }, { "epoch": 11.86, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.5113189220428467, "eval_runtime": 131.7523, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3095 }, { "epoch": 11.88, "learning_rate": 4.1839080459770117e-05, "loss": 0.0001, "step": 3100 }, { "epoch": 11.88, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.5138452053070068, "eval_runtime": 131.5049, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3100 }, { "epoch": 11.9, "learning_rate": 4.158365261813538e-05, "loss": 0.001, "step": 3105 }, { "epoch": 11.9, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.440112590789795, "eval_runtime": 131.3922, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3105 }, { "epoch": 11.92, "learning_rate": 4.132822477650064e-05, "loss": 0.0001, "step": 3110 }, { "epoch": 11.92, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.4551225900650024, "eval_runtime": 131.4821, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3110 }, { "epoch": 11.93, "learning_rate": 4.107279693486591e-05, "loss": 0.0001, "step": 3115 }, { "epoch": 11.93, "eval_accuracy": 0.7873563218390804, "eval_loss": 1.4636459350585938, "eval_runtime": 132.1656, "eval_samples_per_second": 1.317, "eval_steps_per_second": 0.166, "step": 3115 }, { "epoch": 11.95, "learning_rate": 4.081736909323116e-05, "loss": 0.0007, "step": 3120 }, { "epoch": 11.95, "eval_accuracy": 0.7816091954022989, "eval_loss": 1.4635924100875854, "eval_runtime": 131.5512, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3120 }, { "epoch": 11.97, "learning_rate": 4.0561941251596427e-05, "loss": 0.0002, "step": 3125 }, { "epoch": 11.97, "eval_accuracy": 0.7988505747126436, "eval_loss": 1.462807536125183, "eval_runtime": 131.5756, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3125 }, { "epoch": 11.99, "learning_rate": 4.030651340996169e-05, "loss": 0.0001, "step": 3130 }, { "epoch": 11.99, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.4629733562469482, "eval_runtime": 131.6187, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3130 }, { "epoch": 12.01, "learning_rate": 4.005108556832695e-05, "loss": 0.0001, "step": 3135 }, { "epoch": 12.01, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.463382601737976, "eval_runtime": 131.7186, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3135 }, { "epoch": 12.03, "learning_rate": 3.979565772669221e-05, "loss": 0.0001, "step": 3140 }, { "epoch": 12.03, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.4633333683013916, "eval_runtime": 131.6306, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3140 }, { "epoch": 12.05, "learning_rate": 3.954022988505747e-05, "loss": 0.0001, "step": 3145 }, { "epoch": 12.05, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.463578462600708, "eval_runtime": 131.6753, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3145 }, { "epoch": 12.07, "learning_rate": 3.9284802043422737e-05, "loss": 0.1353, "step": 3150 }, { "epoch": 12.07, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.4747191667556763, "eval_runtime": 131.6197, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3150 }, { "epoch": 12.09, "learning_rate": 3.9029374201788e-05, "loss": 0.0001, "step": 3155 }, { "epoch": 12.09, "eval_accuracy": 0.7931034482758621, "eval_loss": 1.4827746152877808, "eval_runtime": 131.4765, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3155 }, { "epoch": 12.11, "learning_rate": 3.8773946360153257e-05, "loss": 0.0001, "step": 3160 }, { "epoch": 12.11, "eval_accuracy": 0.7873563218390804, "eval_loss": 1.4878242015838623, "eval_runtime": 131.5682, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3160 }, { "epoch": 12.13, "learning_rate": 3.851851851851852e-05, "loss": 0.001, "step": 3165 }, { "epoch": 12.13, "eval_accuracy": 0.7873563218390804, "eval_loss": 1.4895766973495483, "eval_runtime": 131.5675, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3165 }, { "epoch": 12.15, "learning_rate": 3.826309067688378e-05, "loss": 0.0719, "step": 3170 }, { "epoch": 12.15, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.5220507383346558, "eval_runtime": 134.8358, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 3170 }, { "epoch": 12.16, "learning_rate": 3.800766283524904e-05, "loss": 0.0002, "step": 3175 }, { "epoch": 12.16, "eval_accuracy": 0.764367816091954, "eval_loss": 1.595354437828064, "eval_runtime": 137.4557, "eval_samples_per_second": 1.266, "eval_steps_per_second": 0.16, "step": 3175 }, { "epoch": 12.18, "learning_rate": 3.77522349936143e-05, "loss": 0.0001, "step": 3180 }, { "epoch": 12.18, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.636945366859436, "eval_runtime": 132.8292, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 3180 }, { "epoch": 12.2, "learning_rate": 3.7496807151979567e-05, "loss": 0.0005, "step": 3185 }, { "epoch": 12.2, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6581592559814453, "eval_runtime": 136.5024, "eval_samples_per_second": 1.275, "eval_steps_per_second": 0.161, "step": 3185 }, { "epoch": 12.22, "learning_rate": 3.724137931034483e-05, "loss": 0.0001, "step": 3190 }, { "epoch": 12.22, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6685248613357544, "eval_runtime": 133.3428, "eval_samples_per_second": 1.305, "eval_steps_per_second": 0.165, "step": 3190 }, { "epoch": 12.24, "learning_rate": 3.6985951468710087e-05, "loss": 0.0001, "step": 3195 }, { "epoch": 12.24, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6744290590286255, "eval_runtime": 133.4932, "eval_samples_per_second": 1.303, "eval_steps_per_second": 0.165, "step": 3195 }, { "epoch": 12.26, "learning_rate": 3.673052362707535e-05, "loss": 0.0001, "step": 3200 }, { "epoch": 12.26, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6778743267059326, "eval_runtime": 134.2586, "eval_samples_per_second": 1.296, "eval_steps_per_second": 0.164, "step": 3200 }, { "epoch": 12.28, "learning_rate": 3.647509578544061e-05, "loss": 0.0921, "step": 3205 }, { "epoch": 12.28, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6830893754959106, "eval_runtime": 134.2289, "eval_samples_per_second": 1.296, "eval_steps_per_second": 0.164, "step": 3205 }, { "epoch": 12.3, "learning_rate": 3.6219667943805877e-05, "loss": 0.0002, "step": 3210 }, { "epoch": 12.3, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.698684573173523, "eval_runtime": 134.2954, "eval_samples_per_second": 1.296, "eval_steps_per_second": 0.164, "step": 3210 }, { "epoch": 12.32, "learning_rate": 3.596424010217114e-05, "loss": 0.0005, "step": 3215 }, { "epoch": 12.32, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.705964207649231, "eval_runtime": 132.857, "eval_samples_per_second": 1.31, "eval_steps_per_second": 0.166, "step": 3215 }, { "epoch": 12.34, "learning_rate": 3.57088122605364e-05, "loss": 0.0006, "step": 3220 }, { "epoch": 12.34, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7057161331176758, "eval_runtime": 133.2456, "eval_samples_per_second": 1.306, "eval_steps_per_second": 0.165, "step": 3220 }, { "epoch": 12.36, "learning_rate": 3.545338441890167e-05, "loss": 0.0004, "step": 3225 }, { "epoch": 12.36, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7048311233520508, "eval_runtime": 132.9324, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.165, "step": 3225 }, { "epoch": 12.38, "learning_rate": 3.519795657726692e-05, "loss": 0.0, "step": 3230 }, { "epoch": 12.38, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7053695917129517, "eval_runtime": 134.6248, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 3230 }, { "epoch": 12.39, "learning_rate": 3.4942528735632187e-05, "loss": 0.0683, "step": 3235 }, { "epoch": 12.39, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7123521566390991, "eval_runtime": 133.0527, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 3235 }, { "epoch": 12.41, "learning_rate": 3.468710089399745e-05, "loss": 0.0834, "step": 3240 }, { "epoch": 12.41, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7344695329666138, "eval_runtime": 132.9362, "eval_samples_per_second": 1.309, "eval_steps_per_second": 0.165, "step": 3240 }, { "epoch": 12.43, "learning_rate": 3.443167305236271e-05, "loss": 0.0007, "step": 3245 }, { "epoch": 12.43, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7519522905349731, "eval_runtime": 133.0604, "eval_samples_per_second": 1.308, "eval_steps_per_second": 0.165, "step": 3245 }, { "epoch": 12.45, "learning_rate": 3.417624521072797e-05, "loss": 0.0005, "step": 3250 }, { "epoch": 12.45, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7614890336990356, "eval_runtime": 133.4235, "eval_samples_per_second": 1.304, "eval_steps_per_second": 0.165, "step": 3250 }, { "epoch": 12.47, "learning_rate": 3.392081736909323e-05, "loss": 0.1469, "step": 3255 }, { "epoch": 12.47, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7582188844680786, "eval_runtime": 133.2039, "eval_samples_per_second": 1.306, "eval_steps_per_second": 0.165, "step": 3255 }, { "epoch": 12.49, "learning_rate": 3.36653895274585e-05, "loss": 0.0001, "step": 3260 }, { "epoch": 12.49, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7566941976547241, "eval_runtime": 136.0308, "eval_samples_per_second": 1.279, "eval_steps_per_second": 0.162, "step": 3260 }, { "epoch": 12.51, "learning_rate": 3.340996168582375e-05, "loss": 0.0013, "step": 3265 }, { "epoch": 12.51, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7564201354980469, "eval_runtime": 131.5808, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3265 }, { "epoch": 12.53, "learning_rate": 3.3154533844189017e-05, "loss": 0.0009, "step": 3270 }, { "epoch": 12.53, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7547907829284668, "eval_runtime": 131.5031, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3270 }, { "epoch": 12.55, "learning_rate": 3.289910600255428e-05, "loss": 0.0014, "step": 3275 }, { "epoch": 12.55, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7478688955307007, "eval_runtime": 131.6966, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3275 }, { "epoch": 12.57, "learning_rate": 3.264367816091954e-05, "loss": 0.0358, "step": 3280 }, { "epoch": 12.57, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7233160734176636, "eval_runtime": 134.66, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 3280 }, { "epoch": 12.59, "learning_rate": 3.23882503192848e-05, "loss": 0.0002, "step": 3285 }, { "epoch": 12.59, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6970218420028687, "eval_runtime": 131.3525, "eval_samples_per_second": 1.325, "eval_steps_per_second": 0.167, "step": 3285 }, { "epoch": 12.61, "learning_rate": 3.213282247765006e-05, "loss": 0.0001, "step": 3290 }, { "epoch": 12.61, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6798888444900513, "eval_runtime": 131.4043, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3290 }, { "epoch": 12.62, "learning_rate": 3.1877394636015327e-05, "loss": 0.012, "step": 3295 }, { "epoch": 12.62, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6489917039871216, "eval_runtime": 133.8875, "eval_samples_per_second": 1.3, "eval_steps_per_second": 0.164, "step": 3295 }, { "epoch": 12.64, "learning_rate": 3.162196679438058e-05, "loss": 0.0055, "step": 3300 }, { "epoch": 12.64, "eval_accuracy": 0.764367816091954, "eval_loss": 1.6284756660461426, "eval_runtime": 134.5307, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.164, "step": 3300 }, { "epoch": 12.66, "learning_rate": 3.1366538952745847e-05, "loss": 0.0002, "step": 3305 }, { "epoch": 12.66, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6172682046890259, "eval_runtime": 131.376, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3305 }, { "epoch": 12.68, "learning_rate": 3.111111111111111e-05, "loss": 0.0001, "step": 3310 }, { "epoch": 12.68, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6175791025161743, "eval_runtime": 131.4088, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3310 }, { "epoch": 12.7, "learning_rate": 3.085568326947637e-05, "loss": 0.0, "step": 3315 }, { "epoch": 12.7, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6215569972991943, "eval_runtime": 131.6808, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3315 }, { "epoch": 12.72, "learning_rate": 3.0600255427841637e-05, "loss": 0.0001, "step": 3320 }, { "epoch": 12.72, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6240766048431396, "eval_runtime": 131.5056, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3320 }, { "epoch": 12.74, "learning_rate": 3.0344827586206897e-05, "loss": 0.0009, "step": 3325 }, { "epoch": 12.74, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6174923181533813, "eval_runtime": 134.7652, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 3325 }, { "epoch": 12.76, "learning_rate": 3.008939974457216e-05, "loss": 0.0001, "step": 3330 }, { "epoch": 12.76, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5911328792572021, "eval_runtime": 131.6881, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3330 }, { "epoch": 12.78, "learning_rate": 2.9833971902937423e-05, "loss": 0.0, "step": 3335 }, { "epoch": 12.78, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.5766935348510742, "eval_runtime": 131.5549, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3335 }, { "epoch": 12.8, "learning_rate": 2.9578544061302683e-05, "loss": 0.0003, "step": 3340 }, { "epoch": 12.8, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.5662565231323242, "eval_runtime": 131.8731, "eval_samples_per_second": 1.319, "eval_steps_per_second": 0.167, "step": 3340 }, { "epoch": 12.82, "learning_rate": 2.9323116219667947e-05, "loss": 0.0001, "step": 3345 }, { "epoch": 12.82, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.5604114532470703, "eval_runtime": 131.5616, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3345 }, { "epoch": 12.84, "learning_rate": 2.906768837803321e-05, "loss": 0.0002, "step": 3350 }, { "epoch": 12.84, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5551427602767944, "eval_runtime": 131.4326, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3350 }, { "epoch": 12.85, "learning_rate": 2.8812260536398467e-05, "loss": 0.0001, "step": 3355 }, { "epoch": 12.85, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5520435571670532, "eval_runtime": 131.5755, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3355 }, { "epoch": 12.87, "learning_rate": 2.855683269476373e-05, "loss": 0.0002, "step": 3360 }, { "epoch": 12.87, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5491315126419067, "eval_runtime": 131.5638, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3360 }, { "epoch": 12.89, "learning_rate": 2.8301404853128993e-05, "loss": 0.0001, "step": 3365 }, { "epoch": 12.89, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5480037927627563, "eval_runtime": 134.5953, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 3365 }, { "epoch": 12.91, "learning_rate": 2.8045977011494257e-05, "loss": 0.0003, "step": 3370 }, { "epoch": 12.91, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5459556579589844, "eval_runtime": 131.726, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3370 }, { "epoch": 12.93, "learning_rate": 2.7790549169859513e-05, "loss": 0.0001, "step": 3375 }, { "epoch": 12.93, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5453726053237915, "eval_runtime": 134.0392, "eval_samples_per_second": 1.298, "eval_steps_per_second": 0.164, "step": 3375 }, { "epoch": 12.95, "learning_rate": 2.7535121328224777e-05, "loss": 0.0001, "step": 3380 }, { "epoch": 12.95, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5440211296081543, "eval_runtime": 131.4982, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3380 }, { "epoch": 12.97, "learning_rate": 2.727969348659004e-05, "loss": 0.0001, "step": 3385 }, { "epoch": 12.97, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5426923036575317, "eval_runtime": 134.7213, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 3385 }, { "epoch": 12.99, "learning_rate": 2.70242656449553e-05, "loss": 0.0002, "step": 3390 }, { "epoch": 12.99, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5425446033477783, "eval_runtime": 131.7157, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3390 }, { "epoch": 13.01, "learning_rate": 2.6768837803320563e-05, "loss": 0.0, "step": 3395 }, { "epoch": 13.01, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5422731637954712, "eval_runtime": 131.664, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3395 }, { "epoch": 13.03, "learning_rate": 2.6513409961685827e-05, "loss": 0.0002, "step": 3400 }, { "epoch": 13.03, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.540529489517212, "eval_runtime": 131.4555, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3400 }, { "epoch": 13.05, "learning_rate": 2.625798212005109e-05, "loss": 0.0001, "step": 3405 }, { "epoch": 13.05, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5385899543762207, "eval_runtime": 131.4962, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3405 }, { "epoch": 13.07, "learning_rate": 2.6002554278416347e-05, "loss": 0.0001, "step": 3410 }, { "epoch": 13.07, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5374879837036133, "eval_runtime": 131.5346, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3410 }, { "epoch": 13.08, "learning_rate": 2.574712643678161e-05, "loss": 0.0001, "step": 3415 }, { "epoch": 13.08, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5360500812530518, "eval_runtime": 131.7287, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3415 }, { "epoch": 13.1, "learning_rate": 2.5491698595146873e-05, "loss": 0.0001, "step": 3420 }, { "epoch": 13.1, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5362660884857178, "eval_runtime": 131.6875, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3420 }, { "epoch": 13.12, "learning_rate": 2.5236270753512137e-05, "loss": 0.0, "step": 3425 }, { "epoch": 13.12, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5362850427627563, "eval_runtime": 131.432, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3425 }, { "epoch": 13.14, "learning_rate": 2.4980842911877393e-05, "loss": 0.0001, "step": 3430 }, { "epoch": 13.14, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5356768369674683, "eval_runtime": 131.5867, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3430 }, { "epoch": 13.16, "learning_rate": 2.4725415070242657e-05, "loss": 0.0001, "step": 3435 }, { "epoch": 13.16, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5358167886734009, "eval_runtime": 134.5615, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 3435 }, { "epoch": 13.18, "learning_rate": 2.446998722860792e-05, "loss": 0.0001, "step": 3440 }, { "epoch": 13.18, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.535969853401184, "eval_runtime": 131.7354, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3440 }, { "epoch": 13.2, "learning_rate": 2.4214559386973183e-05, "loss": 0.1681, "step": 3445 }, { "epoch": 13.2, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5371443033218384, "eval_runtime": 131.3478, "eval_samples_per_second": 1.325, "eval_steps_per_second": 0.167, "step": 3445 }, { "epoch": 13.22, "learning_rate": 2.3959131545338443e-05, "loss": 0.0, "step": 3450 }, { "epoch": 13.22, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5422464609146118, "eval_runtime": 134.7795, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 3450 }, { "epoch": 13.24, "learning_rate": 2.3703703703703707e-05, "loss": 0.0001, "step": 3455 }, { "epoch": 13.24, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.546493649482727, "eval_runtime": 131.4931, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3455 }, { "epoch": 13.26, "learning_rate": 2.3448275862068967e-05, "loss": 0.0001, "step": 3460 }, { "epoch": 13.26, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5489312410354614, "eval_runtime": 131.5674, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3460 }, { "epoch": 13.28, "learning_rate": 2.319284802043423e-05, "loss": 0.0, "step": 3465 }, { "epoch": 13.28, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.550827145576477, "eval_runtime": 134.4819, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 3465 }, { "epoch": 13.3, "learning_rate": 2.293742017879949e-05, "loss": 0.0, "step": 3470 }, { "epoch": 13.3, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5521552562713623, "eval_runtime": 131.4751, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3470 }, { "epoch": 13.31, "learning_rate": 2.268199233716475e-05, "loss": 0.0001, "step": 3475 }, { "epoch": 13.31, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5527838468551636, "eval_runtime": 133.8598, "eval_samples_per_second": 1.3, "eval_steps_per_second": 0.164, "step": 3475 }, { "epoch": 13.33, "learning_rate": 2.2426564495530013e-05, "loss": 0.0012, "step": 3480 }, { "epoch": 13.33, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.551941990852356, "eval_runtime": 133.9177, "eval_samples_per_second": 1.299, "eval_steps_per_second": 0.164, "step": 3480 }, { "epoch": 13.35, "learning_rate": 2.2171136653895273e-05, "loss": 0.116, "step": 3485 }, { "epoch": 13.35, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5588303804397583, "eval_runtime": 131.58, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3485 }, { "epoch": 13.37, "learning_rate": 2.1915708812260537e-05, "loss": 0.0017, "step": 3490 }, { "epoch": 13.37, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5604760646820068, "eval_runtime": 131.2524, "eval_samples_per_second": 1.326, "eval_steps_per_second": 0.168, "step": 3490 }, { "epoch": 13.39, "learning_rate": 2.16602809706258e-05, "loss": 0.0001, "step": 3495 }, { "epoch": 13.39, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.55997896194458, "eval_runtime": 131.3434, "eval_samples_per_second": 1.325, "eval_steps_per_second": 0.167, "step": 3495 }, { "epoch": 13.41, "learning_rate": 2.1404853128991063e-05, "loss": 0.0, "step": 3500 }, { "epoch": 13.41, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5588353872299194, "eval_runtime": 135.0074, "eval_samples_per_second": 1.289, "eval_steps_per_second": 0.163, "step": 3500 }, { "epoch": 13.43, "learning_rate": 2.1149425287356323e-05, "loss": 0.0001, "step": 3505 }, { "epoch": 13.43, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.559090256690979, "eval_runtime": 131.5397, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3505 }, { "epoch": 13.45, "learning_rate": 2.0893997445721587e-05, "loss": 0.1047, "step": 3510 }, { "epoch": 13.45, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.565811038017273, "eval_runtime": 135.6105, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 3510 }, { "epoch": 13.47, "learning_rate": 2.0638569604086847e-05, "loss": 0.0023, "step": 3515 }, { "epoch": 13.47, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.568742036819458, "eval_runtime": 131.5718, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3515 }, { "epoch": 13.49, "learning_rate": 2.0383141762452107e-05, "loss": 0.0001, "step": 3520 }, { "epoch": 13.49, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.569899320602417, "eval_runtime": 131.5005, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3520 }, { "epoch": 13.51, "learning_rate": 2.012771392081737e-05, "loss": 0.0001, "step": 3525 }, { "epoch": 13.51, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5698680877685547, "eval_runtime": 132.1871, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.166, "step": 3525 }, { "epoch": 13.52, "learning_rate": 1.987228607918263e-05, "loss": 0.0001, "step": 3530 }, { "epoch": 13.52, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.57002592086792, "eval_runtime": 131.6043, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3530 }, { "epoch": 13.54, "learning_rate": 1.9616858237547893e-05, "loss": 0.0001, "step": 3535 }, { "epoch": 13.54, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.57023286819458, "eval_runtime": 131.5771, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3535 }, { "epoch": 13.56, "learning_rate": 1.9361430395913153e-05, "loss": 0.0001, "step": 3540 }, { "epoch": 13.56, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5700526237487793, "eval_runtime": 131.5081, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3540 }, { "epoch": 13.58, "learning_rate": 1.9106002554278417e-05, "loss": 0.0001, "step": 3545 }, { "epoch": 13.58, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.568669080734253, "eval_runtime": 131.5846, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3545 }, { "epoch": 13.6, "learning_rate": 1.885057471264368e-05, "loss": 0.0001, "step": 3550 }, { "epoch": 13.6, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5692752599716187, "eval_runtime": 131.4298, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3550 }, { "epoch": 13.62, "learning_rate": 1.8595146871008943e-05, "loss": 0.0001, "step": 3555 }, { "epoch": 13.62, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5690228939056396, "eval_runtime": 135.56, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.162, "step": 3555 }, { "epoch": 13.64, "learning_rate": 1.8339719029374203e-05, "loss": 0.0001, "step": 3560 }, { "epoch": 13.64, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5685721635818481, "eval_runtime": 131.7676, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3560 }, { "epoch": 13.66, "learning_rate": 1.8084291187739463e-05, "loss": 0.0002, "step": 3565 }, { "epoch": 13.66, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5687892436981201, "eval_runtime": 131.381, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3565 }, { "epoch": 13.68, "learning_rate": 1.7828863346104727e-05, "loss": 0.0001, "step": 3570 }, { "epoch": 13.68, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.569113850593567, "eval_runtime": 131.5094, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3570 }, { "epoch": 13.7, "learning_rate": 1.7573435504469987e-05, "loss": 0.0001, "step": 3575 }, { "epoch": 13.7, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5693120956420898, "eval_runtime": 131.6036, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3575 }, { "epoch": 13.72, "learning_rate": 1.731800766283525e-05, "loss": 0.0001, "step": 3580 }, { "epoch": 13.72, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5689599514007568, "eval_runtime": 135.599, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 3580 }, { "epoch": 13.74, "learning_rate": 1.706257982120051e-05, "loss": 0.0, "step": 3585 }, { "epoch": 13.74, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5693317651748657, "eval_runtime": 134.7788, "eval_samples_per_second": 1.291, "eval_steps_per_second": 0.163, "step": 3585 }, { "epoch": 13.75, "learning_rate": 1.6807151979565773e-05, "loss": 0.0631, "step": 3590 }, { "epoch": 13.75, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5679577589035034, "eval_runtime": 134.5103, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 3590 }, { "epoch": 13.77, "learning_rate": 1.6551724137931037e-05, "loss": 0.0, "step": 3595 }, { "epoch": 13.77, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5653505325317383, "eval_runtime": 131.652, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3595 }, { "epoch": 13.79, "learning_rate": 1.62962962962963e-05, "loss": 0.0001, "step": 3600 }, { "epoch": 13.79, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5649654865264893, "eval_runtime": 131.6632, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3600 }, { "epoch": 13.81, "learning_rate": 1.604086845466156e-05, "loss": 0.0001, "step": 3605 }, { "epoch": 13.81, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5650653839111328, "eval_runtime": 131.6911, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3605 }, { "epoch": 13.83, "learning_rate": 1.578544061302682e-05, "loss": 0.0407, "step": 3610 }, { "epoch": 13.83, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5672154426574707, "eval_runtime": 131.4084, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3610 }, { "epoch": 13.85, "learning_rate": 1.5530012771392083e-05, "loss": 0.0001, "step": 3615 }, { "epoch": 13.85, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5693352222442627, "eval_runtime": 131.3899, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3615 }, { "epoch": 13.87, "learning_rate": 1.5274584929757343e-05, "loss": 0.0, "step": 3620 }, { "epoch": 13.87, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5711853504180908, "eval_runtime": 131.5375, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3620 }, { "epoch": 13.89, "learning_rate": 1.5019157088122607e-05, "loss": 0.0, "step": 3625 }, { "epoch": 13.89, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.572008490562439, "eval_runtime": 135.5757, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.162, "step": 3625 }, { "epoch": 13.91, "learning_rate": 1.4763729246487867e-05, "loss": 0.0001, "step": 3630 }, { "epoch": 13.91, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5722723007202148, "eval_runtime": 134.6601, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.163, "step": 3630 }, { "epoch": 13.93, "learning_rate": 1.450830140485313e-05, "loss": 0.0002, "step": 3635 }, { "epoch": 13.93, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5724270343780518, "eval_runtime": 131.7012, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3635 }, { "epoch": 13.95, "learning_rate": 1.4252873563218392e-05, "loss": 0.0, "step": 3640 }, { "epoch": 13.95, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5720359086990356, "eval_runtime": 134.6156, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 3640 }, { "epoch": 13.97, "learning_rate": 1.3997445721583655e-05, "loss": 0.0, "step": 3645 }, { "epoch": 13.97, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5716608762741089, "eval_runtime": 131.5241, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3645 }, { "epoch": 13.98, "learning_rate": 1.3742017879948915e-05, "loss": 0.0947, "step": 3650 }, { "epoch": 13.98, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5741726160049438, "eval_runtime": 131.7395, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3650 }, { "epoch": 14.0, "learning_rate": 1.3486590038314175e-05, "loss": 0.0042, "step": 3655 }, { "epoch": 14.0, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.571437120437622, "eval_runtime": 131.5082, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3655 }, { "epoch": 14.02, "learning_rate": 1.3231162196679438e-05, "loss": 0.0791, "step": 3660 }, { "epoch": 14.02, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5750482082366943, "eval_runtime": 131.5289, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3660 }, { "epoch": 14.04, "learning_rate": 1.29757343550447e-05, "loss": 0.0, "step": 3665 }, { "epoch": 14.04, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.575944185256958, "eval_runtime": 131.463, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3665 }, { "epoch": 14.06, "learning_rate": 1.2720306513409963e-05, "loss": 0.0001, "step": 3670 }, { "epoch": 14.06, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.57651948928833, "eval_runtime": 131.6655, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3670 }, { "epoch": 14.08, "learning_rate": 1.2464878671775223e-05, "loss": 0.0, "step": 3675 }, { "epoch": 14.08, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5765440464019775, "eval_runtime": 131.5009, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3675 }, { "epoch": 14.1, "learning_rate": 1.2209450830140485e-05, "loss": 0.0001, "step": 3680 }, { "epoch": 14.1, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.576804280281067, "eval_runtime": 131.5344, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3680 }, { "epoch": 14.12, "learning_rate": 1.1954022988505748e-05, "loss": 0.0001, "step": 3685 }, { "epoch": 14.12, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5764987468719482, "eval_runtime": 131.7171, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3685 }, { "epoch": 14.14, "learning_rate": 1.169859514687101e-05, "loss": 0.0765, "step": 3690 }, { "epoch": 14.14, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5836353302001953, "eval_runtime": 131.6805, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3690 }, { "epoch": 14.16, "learning_rate": 1.1443167305236272e-05, "loss": 0.0, "step": 3695 }, { "epoch": 14.16, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5884052515029907, "eval_runtime": 131.778, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 3695 }, { "epoch": 14.18, "learning_rate": 1.1187739463601533e-05, "loss": 0.0, "step": 3700 }, { "epoch": 14.18, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5915166139602661, "eval_runtime": 131.777, "eval_samples_per_second": 1.32, "eval_steps_per_second": 0.167, "step": 3700 }, { "epoch": 14.2, "learning_rate": 1.0932311621966795e-05, "loss": 0.0001, "step": 3705 }, { "epoch": 14.2, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5935451984405518, "eval_runtime": 131.5481, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3705 }, { "epoch": 14.21, "learning_rate": 1.0676883780332057e-05, "loss": 0.0001, "step": 3710 }, { "epoch": 14.21, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5938645601272583, "eval_runtime": 131.6509, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3710 }, { "epoch": 14.23, "learning_rate": 1.0421455938697318e-05, "loss": 0.0, "step": 3715 }, { "epoch": 14.23, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5943220853805542, "eval_runtime": 131.574, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3715 }, { "epoch": 14.25, "learning_rate": 1.016602809706258e-05, "loss": 0.0647, "step": 3720 }, { "epoch": 14.25, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5929863452911377, "eval_runtime": 131.7035, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3720 }, { "epoch": 14.27, "learning_rate": 9.910600255427842e-06, "loss": 0.0003, "step": 3725 }, { "epoch": 14.27, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5920848846435547, "eval_runtime": 131.5269, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3725 }, { "epoch": 14.29, "learning_rate": 9.655172413793103e-06, "loss": 0.0001, "step": 3730 }, { "epoch": 14.29, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5917651653289795, "eval_runtime": 131.5915, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3730 }, { "epoch": 14.31, "learning_rate": 9.399744572158365e-06, "loss": 0.0001, "step": 3735 }, { "epoch": 14.31, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5915542840957642, "eval_runtime": 131.612, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3735 }, { "epoch": 14.33, "learning_rate": 9.144316730523628e-06, "loss": 0.0002, "step": 3740 }, { "epoch": 14.33, "eval_accuracy": 0.764367816091954, "eval_loss": 1.5907422304153442, "eval_runtime": 131.6774, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3740 }, { "epoch": 14.35, "learning_rate": 8.88888888888889e-06, "loss": 0.0075, "step": 3745 }, { "epoch": 14.35, "eval_accuracy": 0.764367816091954, "eval_loss": 1.585798978805542, "eval_runtime": 131.6382, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3745 }, { "epoch": 14.37, "learning_rate": 8.633461047254152e-06, "loss": 0.0, "step": 3750 }, { "epoch": 14.37, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5811045169830322, "eval_runtime": 131.3857, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3750 }, { "epoch": 14.39, "learning_rate": 8.378033205619413e-06, "loss": 0.0033, "step": 3755 }, { "epoch": 14.39, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.572999119758606, "eval_runtime": 131.4285, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3755 }, { "epoch": 14.41, "learning_rate": 8.122605363984675e-06, "loss": 0.0814, "step": 3760 }, { "epoch": 14.41, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5697554349899292, "eval_runtime": 131.595, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3760 }, { "epoch": 14.43, "learning_rate": 7.867177522349937e-06, "loss": 0.0001, "step": 3765 }, { "epoch": 14.43, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5690526962280273, "eval_runtime": 131.6676, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3765 }, { "epoch": 14.44, "learning_rate": 7.611749680715198e-06, "loss": 0.0002, "step": 3770 }, { "epoch": 14.44, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5678595304489136, "eval_runtime": 131.4813, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3770 }, { "epoch": 14.46, "learning_rate": 7.35632183908046e-06, "loss": 0.0002, "step": 3775 }, { "epoch": 14.46, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5677728652954102, "eval_runtime": 131.4831, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3775 }, { "epoch": 14.48, "learning_rate": 7.100893997445722e-06, "loss": 0.0001, "step": 3780 }, { "epoch": 14.48, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.5681380033493042, "eval_runtime": 131.5915, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3780 }, { "epoch": 14.5, "learning_rate": 6.845466155810984e-06, "loss": 0.0001, "step": 3785 }, { "epoch": 14.5, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.567604899406433, "eval_runtime": 131.6719, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3785 }, { "epoch": 14.52, "learning_rate": 6.590038314176246e-06, "loss": 0.0058, "step": 3790 }, { "epoch": 14.52, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.564001441001892, "eval_runtime": 135.5216, "eval_samples_per_second": 1.284, "eval_steps_per_second": 0.162, "step": 3790 }, { "epoch": 14.54, "learning_rate": 6.3346104725415075e-06, "loss": 0.0002, "step": 3795 }, { "epoch": 14.54, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5616205930709839, "eval_runtime": 131.5494, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3795 }, { "epoch": 14.56, "learning_rate": 6.079182630906769e-06, "loss": 0.0001, "step": 3800 }, { "epoch": 14.56, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5597946643829346, "eval_runtime": 131.5269, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3800 }, { "epoch": 14.58, "learning_rate": 5.823754789272031e-06, "loss": 0.0001, "step": 3805 }, { "epoch": 14.58, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.559468388557434, "eval_runtime": 131.6538, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3805 }, { "epoch": 14.6, "learning_rate": 5.568326947637293e-06, "loss": 0.0001, "step": 3810 }, { "epoch": 14.6, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5583645105361938, "eval_runtime": 131.7004, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.167, "step": 3810 }, { "epoch": 14.62, "learning_rate": 5.312899106002554e-06, "loss": 0.0001, "step": 3815 }, { "epoch": 14.62, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5580310821533203, "eval_runtime": 131.4921, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3815 }, { "epoch": 14.64, "learning_rate": 5.057471264367817e-06, "loss": 0.0001, "step": 3820 }, { "epoch": 14.64, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5573028326034546, "eval_runtime": 131.4684, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3820 }, { "epoch": 14.66, "learning_rate": 4.802043422733078e-06, "loss": 0.0001, "step": 3825 }, { "epoch": 14.66, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5569429397583008, "eval_runtime": 131.5291, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3825 }, { "epoch": 14.67, "learning_rate": 4.54661558109834e-06, "loss": 0.0001, "step": 3830 }, { "epoch": 14.67, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5563690662384033, "eval_runtime": 131.4559, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3830 }, { "epoch": 14.69, "learning_rate": 4.291187739463602e-06, "loss": 0.0001, "step": 3835 }, { "epoch": 14.69, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5566602945327759, "eval_runtime": 131.5184, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3835 }, { "epoch": 14.71, "learning_rate": 4.035759897828863e-06, "loss": 0.0001, "step": 3840 }, { "epoch": 14.71, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5568325519561768, "eval_runtime": 131.496, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3840 }, { "epoch": 14.73, "learning_rate": 3.7803320561941254e-06, "loss": 0.0205, "step": 3845 }, { "epoch": 14.73, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.557239055633545, "eval_runtime": 131.3743, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3845 }, { "epoch": 14.75, "learning_rate": 3.5249042145593875e-06, "loss": 0.0, "step": 3850 }, { "epoch": 14.75, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5573699474334717, "eval_runtime": 131.5683, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3850 }, { "epoch": 14.77, "learning_rate": 3.269476372924649e-06, "loss": 0.0001, "step": 3855 }, { "epoch": 14.77, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5578587055206299, "eval_runtime": 134.8968, "eval_samples_per_second": 1.29, "eval_steps_per_second": 0.163, "step": 3855 }, { "epoch": 14.79, "learning_rate": 3.014048531289911e-06, "loss": 0.0, "step": 3860 }, { "epoch": 14.79, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.557448148727417, "eval_runtime": 131.5372, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3860 }, { "epoch": 14.81, "learning_rate": 2.7586206896551725e-06, "loss": 0.0002, "step": 3865 }, { "epoch": 14.81, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5573031902313232, "eval_runtime": 134.5697, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.163, "step": 3865 }, { "epoch": 14.83, "learning_rate": 2.5031928480204346e-06, "loss": 0.0001, "step": 3870 }, { "epoch": 14.83, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5574105978012085, "eval_runtime": 131.472, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3870 }, { "epoch": 14.85, "learning_rate": 2.2477650063856962e-06, "loss": 0.0, "step": 3875 }, { "epoch": 14.85, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5576403141021729, "eval_runtime": 131.5008, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3875 }, { "epoch": 14.87, "learning_rate": 1.992337164750958e-06, "loss": 0.0002, "step": 3880 }, { "epoch": 14.87, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5580261945724487, "eval_runtime": 131.6634, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3880 }, { "epoch": 14.89, "learning_rate": 1.7369093231162196e-06, "loss": 0.0, "step": 3885 }, { "epoch": 14.89, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5581773519515991, "eval_runtime": 132.4108, "eval_samples_per_second": 1.314, "eval_steps_per_second": 0.166, "step": 3885 }, { "epoch": 14.9, "learning_rate": 1.4814814814814817e-06, "loss": 0.0001, "step": 3890 }, { "epoch": 14.9, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5580536127090454, "eval_runtime": 131.3966, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3890 }, { "epoch": 14.92, "learning_rate": 1.2260536398467433e-06, "loss": 0.0001, "step": 3895 }, { "epoch": 14.92, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.558103322982788, "eval_runtime": 131.4551, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3895 }, { "epoch": 14.94, "learning_rate": 9.706257982120052e-07, "loss": 0.0001, "step": 3900 }, { "epoch": 14.94, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5582367181777954, "eval_runtime": 131.4681, "eval_samples_per_second": 1.324, "eval_steps_per_second": 0.167, "step": 3900 }, { "epoch": 14.96, "learning_rate": 7.15197956577267e-07, "loss": 0.0, "step": 3905 }, { "epoch": 14.96, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5579785108566284, "eval_runtime": 131.5649, "eval_samples_per_second": 1.323, "eval_steps_per_second": 0.167, "step": 3905 }, { "epoch": 14.98, "learning_rate": 4.5977011494252875e-07, "loss": 0.0969, "step": 3910 }, { "epoch": 14.98, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5584545135498047, "eval_runtime": 134.4637, "eval_samples_per_second": 1.294, "eval_steps_per_second": 0.164, "step": 3910 }, { "epoch": 15.0, "learning_rate": 2.0434227330779057e-07, "loss": 0.0, "step": 3915 }, { "epoch": 15.0, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.5588274002075195, "eval_runtime": 131.5989, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.167, "step": 3915 }, { "epoch": 15.0, "step": 3915, "total_flos": 1.592360968692695e+18, "train_loss": 0.45436480764952814, "train_runtime": 152136.0702, "train_samples_per_second": 0.154, "train_steps_per_second": 0.026 } ], "logging_steps": 5, "max_steps": 3915, "num_train_epochs": 15, "save_steps": 5, "total_flos": 1.592360968692695e+18, "trial_name": null, "trial_params": null }