{ "best_metric": 2.7100777626037598, "best_model_checkpoint": "./model_tweets_2020_Q2/checkpoint-192000", "epoch": 19.569471624266146, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "eval_loss": 2.977957248687744, "eval_runtime": 125.5213, "eval_samples_per_second": 822.769, "eval_steps_per_second": 51.426, "step": 8000 }, { "epoch": 0.13, "learning_rate": 9.939131159843243e-06, "loss": 3.1296, "step": 16000 }, { "epoch": 0.13, "eval_loss": 2.894831418991089, "eval_runtime": 126.0129, "eval_samples_per_second": 819.559, "eval_steps_per_second": 51.225, "step": 16000 }, { "epoch": 0.2, "eval_loss": 2.8589611053466797, "eval_runtime": 125.8909, "eval_samples_per_second": 820.353, "eval_steps_per_second": 51.275, "step": 24000 }, { "epoch": 0.26, "learning_rate": 9.872425581589261e-06, "loss": 2.9018, "step": 32000 }, { "epoch": 0.26, "eval_loss": 2.8033480644226074, "eval_runtime": 125.7264, "eval_samples_per_second": 821.427, "eval_steps_per_second": 51.342, "step": 32000 }, { "epoch": 0.33, "eval_loss": 2.7938032150268555, "eval_runtime": 125.7192, "eval_samples_per_second": 821.474, "eval_steps_per_second": 51.345, "step": 40000 }, { "epoch": 0.39, "learning_rate": 9.80572000333528e-06, "loss": 2.8331, "step": 48000 }, { "epoch": 0.39, "eval_loss": 2.7694976329803467, "eval_runtime": 127.0405, "eval_samples_per_second": 812.93, "eval_steps_per_second": 50.811, "step": 48000 }, { "epoch": 0.46, "eval_loss": 2.7614457607269287, "eval_runtime": 125.7185, "eval_samples_per_second": 821.478, "eval_steps_per_second": 51.345, "step": 56000 }, { "epoch": 0.52, "learning_rate": 9.739014425081299e-06, "loss": 2.7723, "step": 64000 }, { "epoch": 0.52, "eval_loss": 2.7416625022888184, "eval_runtime": 126.1624, "eval_samples_per_second": 818.588, "eval_steps_per_second": 51.164, "step": 64000 }, { "epoch": 0.59, "eval_loss": 2.7248806953430176, "eval_runtime": 126.0454, "eval_samples_per_second": 819.348, "eval_steps_per_second": 51.212, "step": 72000 }, { "epoch": 0.65, "learning_rate": 9.672308846827316e-06, "loss": 2.75, "step": 80000 }, { "epoch": 0.65, "eval_loss": 2.7202229499816895, "eval_runtime": 126.948, "eval_samples_per_second": 813.522, "eval_steps_per_second": 50.848, "step": 80000 }, { "epoch": 0.72, "eval_loss": 2.7112038135528564, "eval_runtime": 126.8524, "eval_samples_per_second": 814.135, "eval_steps_per_second": 50.886, "step": 88000 }, { "epoch": 0.78, "learning_rate": 9.605603268573334e-06, "loss": 2.735, "step": 96000 }, { "epoch": 0.78, "eval_loss": 2.7228710651397705, "eval_runtime": 126.981, "eval_samples_per_second": 813.311, "eval_steps_per_second": 50.834, "step": 96000 }, { "epoch": 0.85, "eval_loss": 2.7370951175689697, "eval_runtime": 126.6893, "eval_samples_per_second": 815.183, "eval_steps_per_second": 50.951, "step": 104000 }, { "epoch": 0.91, "learning_rate": 9.538897690319354e-06, "loss": 2.7137, "step": 112000 }, { "epoch": 0.91, "eval_loss": 2.7059037685394287, "eval_runtime": 126.3306, "eval_samples_per_second": 817.498, "eval_steps_per_second": 51.096, "step": 112000 }, { "epoch": 0.98, "eval_loss": 2.7120730876922607, "eval_runtime": 126.2744, "eval_samples_per_second": 817.862, "eval_steps_per_second": 51.119, "step": 120000 }, { "epoch": 1.04, "learning_rate": 9.472192112065373e-06, "loss": 2.7155, "step": 128000 }, { "epoch": 1.04, "eval_loss": 2.7248668670654297, "eval_runtime": 126.8126, "eval_samples_per_second": 814.391, "eval_steps_per_second": 50.902, "step": 128000 }, { "epoch": 1.11, "eval_loss": 2.7130985260009766, "eval_runtime": 126.6262, "eval_samples_per_second": 815.589, "eval_steps_per_second": 50.977, "step": 136000 }, { "epoch": 1.17, "learning_rate": 9.405486533811392e-06, "loss": 2.7152, "step": 144000 }, { "epoch": 1.17, "eval_loss": 2.6999881267547607, "eval_runtime": 126.4279, "eval_samples_per_second": 816.868, "eval_steps_per_second": 51.057, "step": 144000 }, { "epoch": 1.24, "eval_loss": 2.703012704849243, "eval_runtime": 126.2932, "eval_samples_per_second": 817.74, "eval_steps_per_second": 51.111, "step": 152000 }, { "epoch": 1.3, "learning_rate": 9.338780955557409e-06, "loss": 2.7151, "step": 160000 }, { "epoch": 1.3, "eval_loss": 2.721385955810547, "eval_runtime": 126.566, "eval_samples_per_second": 815.977, "eval_steps_per_second": 51.001, "step": 160000 }, { "epoch": 1.37, "eval_loss": 2.707641839981079, "eval_runtime": 126.5896, "eval_samples_per_second": 815.826, "eval_steps_per_second": 50.992, "step": 168000 }, { "epoch": 1.44, "learning_rate": 9.272075377303427e-06, "loss": 2.7166, "step": 176000 }, { "epoch": 1.44, "eval_loss": 2.7106387615203857, "eval_runtime": 126.9356, "eval_samples_per_second": 813.602, "eval_steps_per_second": 50.853, "step": 176000 }, { "epoch": 1.5, "eval_loss": 2.719717025756836, "eval_runtime": 127.5317, "eval_samples_per_second": 809.798, "eval_steps_per_second": 50.615, "step": 184000 }, { "epoch": 1.57, "learning_rate": 9.205369799049446e-06, "loss": 2.7144, "step": 192000 }, { "epoch": 1.57, "eval_loss": 2.7100777626037598, "eval_runtime": 126.3318, "eval_samples_per_second": 817.49, "eval_steps_per_second": 51.096, "step": 192000 }, { "epoch": 1.63, "eval_loss": 2.723472833633423, "eval_runtime": 127.1568, "eval_samples_per_second": 812.186, "eval_steps_per_second": 50.764, "step": 200000 }, { "epoch": 1.7, "learning_rate": 9.138664220795464e-06, "loss": 2.7179, "step": 208000 }, { "epoch": 1.7, "eval_loss": 2.706564426422119, "eval_runtime": 127.089, "eval_samples_per_second": 812.62, "eval_steps_per_second": 50.791, "step": 208000 }, { "epoch": 1.76, "eval_loss": 2.7282984256744385, "eval_runtime": 127.4927, "eval_samples_per_second": 810.047, "eval_steps_per_second": 50.63, "step": 216000 }, { "epoch": 1.83, "learning_rate": 9.071958642541483e-06, "loss": 2.7231, "step": 224000 }, { "epoch": 1.83, "eval_loss": 2.7203216552734375, "eval_runtime": 127.4298, "eval_samples_per_second": 810.446, "eval_steps_per_second": 50.655, "step": 224000 }, { "epoch": 1.89, "eval_loss": 2.711085319519043, "eval_runtime": 126.4739, "eval_samples_per_second": 816.571, "eval_steps_per_second": 51.038, "step": 232000 }, { "epoch": 1.96, "learning_rate": 9.005253064287502e-06, "loss": 2.7284, "step": 240000 }, { "epoch": 1.96, "eval_loss": 2.721714973449707, "eval_runtime": 126.401, "eval_samples_per_second": 817.043, "eval_steps_per_second": 51.068, "step": 240000 }, { "epoch": 2.02, "eval_loss": 2.725090265274048, "eval_runtime": 127.0199, "eval_samples_per_second": 813.061, "eval_steps_per_second": 50.819, "step": 248000 }, { "epoch": 2.09, "learning_rate": 8.93854748603352e-06, "loss": 2.7242, "step": 256000 }, { "epoch": 2.09, "eval_loss": 2.718090057373047, "eval_runtime": 127.9402, "eval_samples_per_second": 807.213, "eval_steps_per_second": 50.453, "step": 256000 }, { "epoch": 2.15, "eval_loss": 2.723750591278076, "eval_runtime": 127.1287, "eval_samples_per_second": 812.366, "eval_steps_per_second": 50.775, "step": 264000 }, { "epoch": 2.22, "learning_rate": 8.871841907779539e-06, "loss": 2.7171, "step": 272000 }, { "epoch": 2.22, "eval_loss": 2.748772144317627, "eval_runtime": 128.6406, "eval_samples_per_second": 802.818, "eval_steps_per_second": 50.179, "step": 272000 }, { "epoch": 2.28, "eval_loss": 2.731541633605957, "eval_runtime": 127.5368, "eval_samples_per_second": 809.766, "eval_steps_per_second": 50.613, "step": 280000 }, { "epoch": 2.35, "learning_rate": 8.805136329525557e-06, "loss": 2.7312, "step": 288000 }, { "epoch": 2.35, "eval_loss": 2.746854305267334, "eval_runtime": 127.6128, "eval_samples_per_second": 809.284, "eval_steps_per_second": 50.583, "step": 288000 }, { "epoch": 2.41, "eval_loss": 2.7363078594207764, "eval_runtime": 127.9259, "eval_samples_per_second": 807.303, "eval_steps_per_second": 50.459, "step": 296000 }, { "epoch": 2.48, "learning_rate": 8.738430751271576e-06, "loss": 2.7386, "step": 304000 }, { "epoch": 2.48, "eval_loss": 2.7398250102996826, "eval_runtime": 127.3013, "eval_samples_per_second": 811.264, "eval_steps_per_second": 50.706, "step": 304000 }, { "epoch": 2.54, "eval_loss": 2.747743844985962, "eval_runtime": 127.9865, "eval_samples_per_second": 806.921, "eval_steps_per_second": 50.435, "step": 312000 }, { "epoch": 2.61, "learning_rate": 8.671725173017595e-06, "loss": 2.7457, "step": 320000 }, { "epoch": 2.61, "eval_loss": 2.753558397293091, "eval_runtime": 128.9208, "eval_samples_per_second": 801.073, "eval_steps_per_second": 50.069, "step": 320000 }, { "epoch": 2.67, "eval_loss": 2.748337984085083, "eval_runtime": 128.758, "eval_samples_per_second": 802.086, "eval_steps_per_second": 50.133, "step": 328000 }, { "epoch": 2.74, "learning_rate": 8.605019594763613e-06, "loss": 2.7496, "step": 336000 }, { "epoch": 2.74, "eval_loss": 2.752856969833374, "eval_runtime": 128.3684, "eval_samples_per_second": 804.521, "eval_steps_per_second": 50.285, "step": 336000 }, { "epoch": 2.8, "eval_loss": 2.749178171157837, "eval_runtime": 129.8422, "eval_samples_per_second": 795.388, "eval_steps_per_second": 49.714, "step": 344000 }, { "epoch": 2.87, "learning_rate": 8.538314016509632e-06, "loss": 2.7521, "step": 352000 }, { "epoch": 2.87, "eval_loss": 2.761200189590454, "eval_runtime": 127.6309, "eval_samples_per_second": 809.169, "eval_steps_per_second": 50.576, "step": 352000 }, { "epoch": 2.94, "eval_loss": 2.7700963020324707, "eval_runtime": 128.3946, "eval_samples_per_second": 804.356, "eval_steps_per_second": 50.275, "step": 360000 }, { "epoch": 3.0, "learning_rate": 8.471608438255649e-06, "loss": 2.7649, "step": 368000 }, { "epoch": 3.0, "eval_loss": 2.7705161571502686, "eval_runtime": 128.8577, "eval_samples_per_second": 801.466, "eval_steps_per_second": 50.094, "step": 368000 }, { "epoch": 3.07, "eval_loss": 2.782761335372925, "eval_runtime": 129.17, "eval_samples_per_second": 799.528, "eval_steps_per_second": 49.973, "step": 376000 }, { "epoch": 3.13, "learning_rate": 8.404902860001667e-06, "loss": 2.7516, "step": 384000 }, { "epoch": 3.13, "eval_loss": 2.7680482864379883, "eval_runtime": 128.9028, "eval_samples_per_second": 801.185, "eval_steps_per_second": 50.077, "step": 384000 }, { "epoch": 3.2, "eval_loss": 2.784294605255127, "eval_runtime": 128.4737, "eval_samples_per_second": 803.861, "eval_steps_per_second": 50.244, "step": 392000 }, { "epoch": 3.26, "learning_rate": 8.338197281747686e-06, "loss": 2.762, "step": 400000 }, { "epoch": 3.26, "eval_loss": 2.7915961742401123, "eval_runtime": 128.2651, "eval_samples_per_second": 805.168, "eval_steps_per_second": 50.325, "step": 400000 }, { "epoch": 3.33, "eval_loss": 2.7691826820373535, "eval_runtime": 128.6705, "eval_samples_per_second": 802.632, "eval_steps_per_second": 50.167, "step": 408000 }, { "epoch": 3.39, "learning_rate": 8.271491703493705e-06, "loss": 2.7789, "step": 416000 }, { "epoch": 3.39, "eval_loss": 2.783369302749634, "eval_runtime": 128.6603, "eval_samples_per_second": 802.695, "eval_steps_per_second": 50.171, "step": 416000 }, { "epoch": 3.46, "eval_loss": 2.7788405418395996, "eval_runtime": 129.7209, "eval_samples_per_second": 796.132, "eval_steps_per_second": 49.761, "step": 424000 }, { "epoch": 3.52, "learning_rate": 8.204786125239725e-06, "loss": 2.7879, "step": 432000 }, { "epoch": 3.52, "eval_loss": 2.803699493408203, "eval_runtime": 128.2575, "eval_samples_per_second": 805.216, "eval_steps_per_second": 50.328, "step": 432000 }, { "epoch": 3.59, "eval_loss": 2.791905403137207, "eval_runtime": 129.4159, "eval_samples_per_second": 798.009, "eval_steps_per_second": 49.878, "step": 440000 }, { "epoch": 3.65, "learning_rate": 8.138080546985743e-06, "loss": 2.7853, "step": 448000 }, { "epoch": 3.65, "eval_loss": 2.8077127933502197, "eval_runtime": 127.9753, "eval_samples_per_second": 806.992, "eval_steps_per_second": 50.439, "step": 448000 }, { "epoch": 3.72, "eval_loss": 2.7903032302856445, "eval_runtime": 128.9005, "eval_samples_per_second": 801.2, "eval_steps_per_second": 50.077, "step": 456000 }, { "epoch": 3.78, "learning_rate": 8.07137496873176e-06, "loss": 2.7976, "step": 464000 }, { "epoch": 3.78, "eval_loss": 2.810896158218384, "eval_runtime": 129.0626, "eval_samples_per_second": 800.193, "eval_steps_per_second": 50.014, "step": 464000 }, { "epoch": 3.85, "eval_loss": 2.795713424682617, "eval_runtime": 128.0638, "eval_samples_per_second": 806.434, "eval_steps_per_second": 50.405, "step": 472000 }, { "epoch": 3.91, "learning_rate": 8.004669390477779e-06, "loss": 2.789, "step": 480000 }, { "epoch": 3.91, "eval_loss": 2.8023178577423096, "eval_runtime": 128.1962, "eval_samples_per_second": 805.601, "eval_steps_per_second": 50.353, "step": 480000 }, { "epoch": 3.98, "eval_loss": 2.8125839233398438, "eval_runtime": 128.7992, "eval_samples_per_second": 801.83, "eval_steps_per_second": 50.117, "step": 488000 }, { "epoch": 4.04, "learning_rate": 7.937963812223798e-06, "loss": 2.8089, "step": 496000 }, { "epoch": 4.04, "eval_loss": 2.815424919128418, "eval_runtime": 128.7985, "eval_samples_per_second": 801.834, "eval_steps_per_second": 50.117, "step": 496000 }, { "epoch": 4.11, "eval_loss": 2.8122923374176025, "eval_runtime": 127.4092, "eval_samples_per_second": 810.577, "eval_steps_per_second": 50.664, "step": 504000 }, { "epoch": 4.17, "learning_rate": 7.871258233969816e-06, "loss": 2.7915, "step": 512000 }, { "epoch": 4.17, "eval_loss": 2.8145976066589355, "eval_runtime": 128.9266, "eval_samples_per_second": 801.037, "eval_steps_per_second": 50.067, "step": 512000 }, { "epoch": 4.24, "eval_loss": 2.8249683380126953, "eval_runtime": 129.1348, "eval_samples_per_second": 799.746, "eval_steps_per_second": 49.987, "step": 520000 }, { "epoch": 4.31, "learning_rate": 7.804552655715835e-06, "loss": 2.8094, "step": 528000 }, { "epoch": 4.31, "eval_loss": 2.820560932159424, "eval_runtime": 129.6096, "eval_samples_per_second": 796.816, "eval_steps_per_second": 49.803, "step": 528000 }, { "epoch": 4.37, "eval_loss": 2.818159341812134, "eval_runtime": 128.5096, "eval_samples_per_second": 803.637, "eval_steps_per_second": 50.23, "step": 536000 }, { "epoch": 4.44, "learning_rate": 7.737847077461853e-06, "loss": 2.8196, "step": 544000 }, { "epoch": 4.44, "eval_loss": 2.8351361751556396, "eval_runtime": 129.1287, "eval_samples_per_second": 799.783, "eval_steps_per_second": 49.989, "step": 544000 }, { "epoch": 4.5, "eval_loss": 2.839430570602417, "eval_runtime": 129.5203, "eval_samples_per_second": 797.365, "eval_steps_per_second": 49.838, "step": 552000 }, { "epoch": 4.57, "learning_rate": 7.671141499207872e-06, "loss": 2.8316, "step": 560000 }, { "epoch": 4.57, "eval_loss": 2.8396623134613037, "eval_runtime": 128.6713, "eval_samples_per_second": 802.627, "eval_steps_per_second": 50.167, "step": 560000 }, { "epoch": 4.63, "eval_loss": 2.8402562141418457, "eval_runtime": 128.654, "eval_samples_per_second": 802.735, "eval_steps_per_second": 50.173, "step": 568000 }, { "epoch": 4.7, "learning_rate": 7.604435920953891e-06, "loss": 2.8444, "step": 576000 }, { "epoch": 4.7, "eval_loss": 2.8350980281829834, "eval_runtime": 129.3424, "eval_samples_per_second": 798.462, "eval_steps_per_second": 49.906, "step": 576000 }, { "epoch": 4.76, "eval_loss": 2.8574254512786865, "eval_runtime": 129.6206, "eval_samples_per_second": 796.748, "eval_steps_per_second": 49.799, "step": 584000 }, { "epoch": 4.83, "learning_rate": 7.537730342699909e-06, "loss": 2.833, "step": 592000 }, { "epoch": 4.83, "eval_loss": 2.86171293258667, "eval_runtime": 129.2684, "eval_samples_per_second": 798.919, "eval_steps_per_second": 49.935, "step": 592000 }, { "epoch": 4.89, "eval_loss": 2.857750654220581, "eval_runtime": 128.5027, "eval_samples_per_second": 803.679, "eval_steps_per_second": 50.232, "step": 600000 }, { "epoch": 4.96, "learning_rate": 7.471024764445928e-06, "loss": 2.839, "step": 608000 }, { "epoch": 4.96, "eval_loss": 2.8577184677124023, "eval_runtime": 128.7081, "eval_samples_per_second": 802.397, "eval_steps_per_second": 50.152, "step": 608000 }, { "epoch": 5.02, "eval_loss": 2.8726649284362793, "eval_runtime": 128.6474, "eval_samples_per_second": 802.776, "eval_steps_per_second": 50.176, "step": 616000 }, { "epoch": 5.09, "learning_rate": 7.4043191861919465e-06, "loss": 2.8427, "step": 624000 }, { "epoch": 5.09, "eval_loss": 2.858550786972046, "eval_runtime": 129.0947, "eval_samples_per_second": 799.994, "eval_steps_per_second": 50.002, "step": 624000 }, { "epoch": 5.15, "eval_loss": 2.880849599838257, "eval_runtime": 128.221, "eval_samples_per_second": 805.445, "eval_steps_per_second": 50.343, "step": 632000 }, { "epoch": 5.22, "learning_rate": 7.337613607937964e-06, "loss": 2.8599, "step": 640000 }, { "epoch": 5.22, "eval_loss": 2.8959789276123047, "eval_runtime": 129.9831, "eval_samples_per_second": 794.527, "eval_steps_per_second": 49.66, "step": 640000 }, { "epoch": 5.28, "eval_loss": 2.8883421421051025, "eval_runtime": 129.4941, "eval_samples_per_second": 797.527, "eval_steps_per_second": 49.848, "step": 648000 }, { "epoch": 5.35, "learning_rate": 7.270908029683983e-06, "loss": 2.8694, "step": 656000 }, { "epoch": 5.35, "eval_loss": 2.8884825706481934, "eval_runtime": 129.3172, "eval_samples_per_second": 798.618, "eval_steps_per_second": 49.916, "step": 656000 }, { "epoch": 5.41, "eval_loss": 2.887291431427002, "eval_runtime": 129.1298, "eval_samples_per_second": 799.777, "eval_steps_per_second": 49.988, "step": 664000 }, { "epoch": 5.48, "learning_rate": 7.2042024514300015e-06, "loss": 2.8626, "step": 672000 }, { "epoch": 5.48, "eval_loss": 2.8929550647735596, "eval_runtime": 129.4886, "eval_samples_per_second": 797.56, "eval_steps_per_second": 49.85, "step": 672000 }, { "epoch": 5.54, "eval_loss": 2.8987772464752197, "eval_runtime": 129.8683, "eval_samples_per_second": 795.229, "eval_steps_per_second": 49.704, "step": 680000 }, { "epoch": 5.61, "learning_rate": 7.13749687317602e-06, "loss": 2.8921, "step": 688000 }, { "epoch": 5.61, "eval_loss": 2.9117259979248047, "eval_runtime": 128.3205, "eval_samples_per_second": 804.821, "eval_steps_per_second": 50.304, "step": 688000 }, { "epoch": 5.68, "eval_loss": 2.912231206893921, "eval_runtime": 128.7871, "eval_samples_per_second": 801.905, "eval_steps_per_second": 50.121, "step": 696000 }, { "epoch": 5.74, "learning_rate": 7.070791294922038e-06, "loss": 2.8884, "step": 704000 }, { "epoch": 5.74, "eval_loss": 2.900118827819824, "eval_runtime": 130.1834, "eval_samples_per_second": 793.304, "eval_steps_per_second": 49.584, "step": 704000 }, { "epoch": 5.81, "eval_loss": 2.9093644618988037, "eval_runtime": 129.4918, "eval_samples_per_second": 797.541, "eval_steps_per_second": 49.849, "step": 712000 }, { "epoch": 5.87, "learning_rate": 7.0040857166680564e-06, "loss": 2.8974, "step": 720000 }, { "epoch": 5.87, "eval_loss": 2.9110264778137207, "eval_runtime": 129.9051, "eval_samples_per_second": 795.003, "eval_steps_per_second": 49.69, "step": 720000 }, { "epoch": 5.94, "eval_loss": 2.9044594764709473, "eval_runtime": 129.2324, "eval_samples_per_second": 799.141, "eval_steps_per_second": 49.949, "step": 728000 }, { "epoch": 6.0, "learning_rate": 6.937380138414076e-06, "loss": 2.903, "step": 736000 }, { "epoch": 6.0, "eval_loss": 2.933678388595581, "eval_runtime": 130.3644, "eval_samples_per_second": 792.202, "eval_steps_per_second": 49.515, "step": 736000 }, { "epoch": 6.07, "eval_loss": 2.931581735610962, "eval_runtime": 128.3976, "eval_samples_per_second": 804.337, "eval_steps_per_second": 50.274, "step": 744000 }, { "epoch": 6.13, "learning_rate": 6.8706745601600945e-06, "loss": 2.9057, "step": 752000 }, { "epoch": 6.13, "eval_loss": 2.944746971130371, "eval_runtime": 128.9912, "eval_samples_per_second": 800.636, "eval_steps_per_second": 50.042, "step": 752000 }, { "epoch": 6.2, "eval_loss": 2.936281681060791, "eval_runtime": 129.9533, "eval_samples_per_second": 794.709, "eval_steps_per_second": 49.672, "step": 760000 }, { "epoch": 6.26, "learning_rate": 6.803968981906113e-06, "loss": 2.9146, "step": 768000 }, { "epoch": 6.26, "eval_loss": 2.943751096725464, "eval_runtime": 129.9494, "eval_samples_per_second": 794.732, "eval_steps_per_second": 49.673, "step": 768000 }, { "epoch": 6.33, "eval_loss": 2.9474806785583496, "eval_runtime": 130.0993, "eval_samples_per_second": 793.817, "eval_steps_per_second": 49.616, "step": 776000 }, { "epoch": 6.39, "learning_rate": 6.737263403652131e-06, "loss": 2.9221, "step": 784000 }, { "epoch": 6.39, "eval_loss": 2.9394171237945557, "eval_runtime": 129.1928, "eval_samples_per_second": 799.387, "eval_steps_per_second": 49.964, "step": 784000 }, { "epoch": 6.46, "eval_loss": 2.937087297439575, "eval_runtime": 129.9118, "eval_samples_per_second": 794.963, "eval_steps_per_second": 49.688, "step": 792000 }, { "epoch": 6.52, "learning_rate": 6.6705578253981495e-06, "loss": 2.9316, "step": 800000 }, { "epoch": 6.52, "eval_loss": 2.949429512023926, "eval_runtime": 129.8602, "eval_samples_per_second": 795.278, "eval_steps_per_second": 49.707, "step": 800000 }, { "epoch": 6.59, "eval_loss": 2.9727399349212646, "eval_runtime": 130.9441, "eval_samples_per_second": 788.695, "eval_steps_per_second": 49.296, "step": 808000 }, { "epoch": 6.65, "learning_rate": 6.603852247144168e-06, "loss": 2.9421, "step": 816000 }, { "epoch": 6.65, "eval_loss": 2.9758830070495605, "eval_runtime": 129.8861, "eval_samples_per_second": 795.12, "eval_steps_per_second": 49.697, "step": 816000 }, { "epoch": 6.72, "eval_loss": 2.966480016708374, "eval_runtime": 129.44, "eval_samples_per_second": 797.86, "eval_steps_per_second": 49.869, "step": 824000 }, { "epoch": 6.78, "learning_rate": 6.537146668890187e-06, "loss": 2.9538, "step": 832000 }, { "epoch": 6.78, "eval_loss": 2.9650251865386963, "eval_runtime": 129.4919, "eval_samples_per_second": 797.54, "eval_steps_per_second": 49.849, "step": 832000 }, { "epoch": 6.85, "eval_loss": 2.976144313812256, "eval_runtime": 129.8294, "eval_samples_per_second": 795.467, "eval_steps_per_second": 49.719, "step": 840000 }, { "epoch": 6.91, "learning_rate": 6.4704410906362044e-06, "loss": 2.9594, "step": 848000 }, { "epoch": 6.91, "eval_loss": 2.990086317062378, "eval_runtime": 129.827, "eval_samples_per_second": 795.482, "eval_steps_per_second": 49.72, "step": 848000 }, { "epoch": 6.98, "eval_loss": 2.973181962966919, "eval_runtime": 131.5126, "eval_samples_per_second": 785.286, "eval_steps_per_second": 49.083, "step": 856000 }, { "epoch": 7.05, "learning_rate": 6.403735512382223e-06, "loss": 2.9564, "step": 864000 }, { "epoch": 7.05, "eval_loss": 2.9896528720855713, "eval_runtime": 129.878, "eval_samples_per_second": 795.169, "eval_steps_per_second": 49.7, "step": 864000 }, { "epoch": 7.11, "eval_loss": 2.980059862136841, "eval_runtime": 129.5351, "eval_samples_per_second": 797.274, "eval_steps_per_second": 49.832, "step": 872000 }, { "epoch": 7.18, "learning_rate": 6.337029934128242e-06, "loss": 2.9561, "step": 880000 }, { "epoch": 7.18, "eval_loss": 2.983869791030884, "eval_runtime": 130.0357, "eval_samples_per_second": 794.205, "eval_steps_per_second": 49.64, "step": 880000 }, { "epoch": 7.24, "eval_loss": 2.9887585639953613, "eval_runtime": 130.015, "eval_samples_per_second": 794.331, "eval_steps_per_second": 49.648, "step": 888000 }, { "epoch": 7.31, "learning_rate": 6.270324355874261e-06, "loss": 2.9669, "step": 896000 }, { "epoch": 7.31, "eval_loss": 2.99999737739563, "eval_runtime": 130.6345, "eval_samples_per_second": 790.564, "eval_steps_per_second": 49.413, "step": 896000 }, { "epoch": 7.37, "eval_loss": 2.9786183834075928, "eval_runtime": 129.9739, "eval_samples_per_second": 794.582, "eval_steps_per_second": 49.664, "step": 904000 }, { "epoch": 7.44, "learning_rate": 6.20361877762028e-06, "loss": 2.9649, "step": 912000 }, { "epoch": 7.44, "eval_loss": 2.994581460952759, "eval_runtime": 131.0156, "eval_samples_per_second": 788.265, "eval_steps_per_second": 49.269, "step": 912000 }, { "epoch": 7.5, "eval_loss": 3.0002031326293945, "eval_runtime": 131.7355, "eval_samples_per_second": 783.957, "eval_steps_per_second": 49.0, "step": 920000 }, { "epoch": 7.57, "learning_rate": 6.1369131993662975e-06, "loss": 2.9665, "step": 928000 }, { "epoch": 7.57, "eval_loss": 2.9960474967956543, "eval_runtime": 131.6559, "eval_samples_per_second": 784.431, "eval_steps_per_second": 49.029, "step": 928000 }, { "epoch": 7.63, "eval_loss": 3.0067989826202393, "eval_runtime": 131.8152, "eval_samples_per_second": 783.483, "eval_steps_per_second": 48.97, "step": 936000 }, { "epoch": 7.7, "learning_rate": 6.070207621112316e-06, "loss": 2.9708, "step": 944000 }, { "epoch": 7.7, "eval_loss": 2.993788242340088, "eval_runtime": 130.3799, "eval_samples_per_second": 792.108, "eval_steps_per_second": 49.509, "step": 944000 }, { "epoch": 7.76, "eval_loss": 3.0126230716705322, "eval_runtime": 130.4447, "eval_samples_per_second": 791.715, "eval_steps_per_second": 49.485, "step": 952000 }, { "epoch": 7.83, "learning_rate": 6.003502042858335e-06, "loss": 2.981, "step": 960000 }, { "epoch": 7.83, "eval_loss": 2.9959194660186768, "eval_runtime": 132.0738, "eval_samples_per_second": 781.949, "eval_steps_per_second": 48.874, "step": 960000 }, { "epoch": 7.89, "eval_loss": 2.995976448059082, "eval_runtime": 130.9412, "eval_samples_per_second": 788.713, "eval_steps_per_second": 49.297, "step": 968000 }, { "epoch": 7.96, "learning_rate": 5.936796464604353e-06, "loss": 2.9805, "step": 976000 }, { "epoch": 7.96, "eval_loss": 2.991947889328003, "eval_runtime": 130.0819, "eval_samples_per_second": 793.923, "eval_steps_per_second": 49.623, "step": 976000 }, { "epoch": 8.02, "eval_loss": 3.0058255195617676, "eval_runtime": 130.7007, "eval_samples_per_second": 790.164, "eval_steps_per_second": 49.388, "step": 984000 }, { "epoch": 8.09, "learning_rate": 5.870090886350371e-06, "loss": 2.9705, "step": 992000 }, { "epoch": 8.09, "eval_loss": 3.0232017040252686, "eval_runtime": 129.9163, "eval_samples_per_second": 794.935, "eval_steps_per_second": 49.686, "step": 992000 }, { "epoch": 8.15, "eval_loss": 3.0046939849853516, "eval_runtime": 130.7903, "eval_samples_per_second": 789.623, "eval_steps_per_second": 49.354, "step": 1000000 }, { "epoch": 8.22, "learning_rate": 5.80338530809639e-06, "loss": 2.9715, "step": 1008000 }, { "epoch": 8.22, "eval_loss": 3.0068600177764893, "eval_runtime": 131.6119, "eval_samples_per_second": 784.693, "eval_steps_per_second": 49.046, "step": 1008000 }, { "epoch": 8.28, "eval_loss": 3.0018742084503174, "eval_runtime": 131.7567, "eval_samples_per_second": 783.831, "eval_steps_per_second": 48.992, "step": 1016000 }, { "epoch": 8.35, "learning_rate": 5.736679729842408e-06, "loss": 2.9695, "step": 1024000 }, { "epoch": 8.35, "eval_loss": 3.021596670150757, "eval_runtime": 131.2334, "eval_samples_per_second": 786.956, "eval_steps_per_second": 49.187, "step": 1024000 }, { "epoch": 8.41, "eval_loss": 3.0219063758850098, "eval_runtime": 131.6228, "eval_samples_per_second": 784.629, "eval_steps_per_second": 49.042, "step": 1032000 }, { "epoch": 8.48, "learning_rate": 5.669974151588427e-06, "loss": 2.9762, "step": 1040000 }, { "epoch": 8.48, "eval_loss": 3.018242597579956, "eval_runtime": 131.898, "eval_samples_per_second": 782.991, "eval_steps_per_second": 48.939, "step": 1040000 }, { "epoch": 8.55, "eval_loss": 3.0332210063934326, "eval_runtime": 132.3771, "eval_samples_per_second": 780.158, "eval_steps_per_second": 48.762, "step": 1048000 }, { "epoch": 8.61, "learning_rate": 5.603268573334446e-06, "loss": 2.9786, "step": 1056000 }, { "epoch": 8.61, "eval_loss": 3.001666307449341, "eval_runtime": 131.4368, "eval_samples_per_second": 785.739, "eval_steps_per_second": 49.111, "step": 1056000 }, { "epoch": 8.68, "eval_loss": 3.0236458778381348, "eval_runtime": 130.9562, "eval_samples_per_second": 788.622, "eval_steps_per_second": 49.291, "step": 1064000 }, { "epoch": 8.74, "learning_rate": 5.536562995080464e-06, "loss": 2.9889, "step": 1072000 }, { "epoch": 8.74, "eval_loss": 3.0273077487945557, "eval_runtime": 131.9047, "eval_samples_per_second": 782.952, "eval_steps_per_second": 48.937, "step": 1072000 }, { "epoch": 8.81, "eval_loss": 3.01967191696167, "eval_runtime": 131.9615, "eval_samples_per_second": 782.615, "eval_steps_per_second": 48.916, "step": 1080000 }, { "epoch": 8.87, "learning_rate": 5.469857416826483e-06, "loss": 2.9842, "step": 1088000 }, { "epoch": 8.87, "eval_loss": 3.037600040435791, "eval_runtime": 131.9507, "eval_samples_per_second": 782.679, "eval_steps_per_second": 48.92, "step": 1088000 }, { "epoch": 8.94, "eval_loss": 3.032285213470459, "eval_runtime": 131.7234, "eval_samples_per_second": 784.029, "eval_steps_per_second": 49.004, "step": 1096000 }, { "epoch": 9.0, "learning_rate": 5.403151838572501e-06, "loss": 2.9912, "step": 1104000 }, { "epoch": 9.0, "eval_loss": 3.031731367111206, "eval_runtime": 131.8868, "eval_samples_per_second": 783.058, "eval_steps_per_second": 48.944, "step": 1104000 }, { "epoch": 9.07, "eval_loss": 3.022475481033325, "eval_runtime": 131.0568, "eval_samples_per_second": 788.017, "eval_steps_per_second": 49.253, "step": 1112000 }, { "epoch": 9.13, "learning_rate": 5.33644626031852e-06, "loss": 2.9919, "step": 1120000 }, { "epoch": 9.13, "eval_loss": 3.036106824874878, "eval_runtime": 132.2182, "eval_samples_per_second": 781.095, "eval_steps_per_second": 48.821, "step": 1120000 }, { "epoch": 9.2, "eval_loss": 3.0432300567626953, "eval_runtime": 131.9088, "eval_samples_per_second": 782.927, "eval_steps_per_second": 48.935, "step": 1128000 }, { "epoch": 9.26, "learning_rate": 5.269740682064538e-06, "loss": 2.9872, "step": 1136000 }, { "epoch": 9.26, "eval_loss": 3.0306613445281982, "eval_runtime": 131.2348, "eval_samples_per_second": 786.948, "eval_steps_per_second": 49.187, "step": 1136000 }, { "epoch": 9.33, "eval_loss": 3.0481879711151123, "eval_runtime": 131.7205, "eval_samples_per_second": 784.046, "eval_steps_per_second": 49.005, "step": 1144000 }, { "epoch": 9.39, "learning_rate": 5.203035103810556e-06, "loss": 2.9823, "step": 1152000 }, { "epoch": 9.39, "eval_loss": 3.035399913787842, "eval_runtime": 131.2188, "eval_samples_per_second": 787.044, "eval_steps_per_second": 49.193, "step": 1152000 }, { "epoch": 9.46, "eval_loss": 3.0419015884399414, "eval_runtime": 131.8024, "eval_samples_per_second": 783.559, "eval_steps_per_second": 48.975, "step": 1160000 }, { "epoch": 9.52, "learning_rate": 5.136329525556575e-06, "loss": 2.9882, "step": 1168000 }, { "epoch": 9.52, "eval_loss": 3.0567431449890137, "eval_runtime": 132.7773, "eval_samples_per_second": 777.806, "eval_steps_per_second": 48.615, "step": 1168000 }, { "epoch": 9.59, "eval_loss": 3.0395400524139404, "eval_runtime": 131.6554, "eval_samples_per_second": 784.434, "eval_steps_per_second": 49.03, "step": 1176000 }, { "epoch": 9.65, "learning_rate": 5.0696239473025935e-06, "loss": 3.0079, "step": 1184000 }, { "epoch": 9.65, "eval_loss": 3.0572261810302734, "eval_runtime": 132.0184, "eval_samples_per_second": 782.278, "eval_steps_per_second": 48.895, "step": 1184000 }, { "epoch": 9.72, "eval_loss": 3.04028058052063, "eval_runtime": 131.8056, "eval_samples_per_second": 783.54, "eval_steps_per_second": 48.974, "step": 1192000 }, { "epoch": 9.78, "learning_rate": 5.002918369048611e-06, "loss": 3.0243, "step": 1200000 }, { "epoch": 9.78, "eval_loss": 3.047227621078491, "eval_runtime": 131.9863, "eval_samples_per_second": 782.467, "eval_steps_per_second": 48.907, "step": 1200000 }, { "epoch": 9.85, "eval_loss": 3.052279472351074, "eval_runtime": 132.2017, "eval_samples_per_second": 781.193, "eval_steps_per_second": 48.827, "step": 1208000 }, { "epoch": 9.92, "learning_rate": 4.936212790794631e-06, "loss": 3.0127, "step": 1216000 }, { "epoch": 9.92, "eval_loss": 3.053439140319824, "eval_runtime": 131.3363, "eval_samples_per_second": 786.34, "eval_steps_per_second": 49.149, "step": 1216000 }, { "epoch": 9.98, "eval_loss": 3.0434141159057617, "eval_runtime": 131.7363, "eval_samples_per_second": 783.952, "eval_steps_per_second": 48.999, "step": 1224000 }, { "epoch": 10.05, "learning_rate": 4.869507212540649e-06, "loss": 3.0106, "step": 1232000 }, { "epoch": 10.05, "eval_loss": 3.0687036514282227, "eval_runtime": 131.4287, "eval_samples_per_second": 785.788, "eval_steps_per_second": 49.114, "step": 1232000 }, { "epoch": 10.11, "eval_loss": 3.0677733421325684, "eval_runtime": 132.6312, "eval_samples_per_second": 778.663, "eval_steps_per_second": 48.669, "step": 1240000 }, { "epoch": 10.18, "learning_rate": 4.802801634286667e-06, "loss": 3.0063, "step": 1248000 }, { "epoch": 10.18, "eval_loss": 3.0652401447296143, "eval_runtime": 132.5035, "eval_samples_per_second": 779.413, "eval_steps_per_second": 48.716, "step": 1248000 }, { "epoch": 10.24, "eval_loss": 3.0768234729766846, "eval_runtime": 131.7104, "eval_samples_per_second": 784.107, "eval_steps_per_second": 49.009, "step": 1256000 }, { "epoch": 10.31, "learning_rate": 4.7360960560326865e-06, "loss": 3.0187, "step": 1264000 }, { "epoch": 10.31, "eval_loss": 3.069179058074951, "eval_runtime": 132.7895, "eval_samples_per_second": 777.735, "eval_steps_per_second": 48.611, "step": 1264000 }, { "epoch": 10.37, "eval_loss": 3.0621213912963867, "eval_runtime": 132.041, "eval_samples_per_second": 782.144, "eval_steps_per_second": 48.886, "step": 1272000 }, { "epoch": 10.44, "learning_rate": 4.669390477778704e-06, "loss": 3.0202, "step": 1280000 }, { "epoch": 10.44, "eval_loss": 3.0663187503814697, "eval_runtime": 132.2635, "eval_samples_per_second": 780.828, "eval_steps_per_second": 48.804, "step": 1280000 }, { "epoch": 10.5, "eval_loss": 3.0537171363830566, "eval_runtime": 132.2536, "eval_samples_per_second": 780.886, "eval_steps_per_second": 48.808, "step": 1288000 }, { "epoch": 10.57, "learning_rate": 4.602684899524723e-06, "loss": 3.0219, "step": 1296000 }, { "epoch": 10.57, "eval_loss": 3.072500705718994, "eval_runtime": 132.0295, "eval_samples_per_second": 782.212, "eval_steps_per_second": 48.891, "step": 1296000 }, { "epoch": 10.63, "eval_loss": 3.0664169788360596, "eval_runtime": 131.9651, "eval_samples_per_second": 782.593, "eval_steps_per_second": 48.914, "step": 1304000 }, { "epoch": 10.7, "learning_rate": 4.5359793212707415e-06, "loss": 3.0232, "step": 1312000 }, { "epoch": 10.7, "eval_loss": 3.0724074840545654, "eval_runtime": 133.2104, "eval_samples_per_second": 775.277, "eval_steps_per_second": 48.457, "step": 1312000 }, { "epoch": 10.76, "eval_loss": 3.0476126670837402, "eval_runtime": 132.7171, "eval_samples_per_second": 778.159, "eval_steps_per_second": 48.637, "step": 1320000 }, { "epoch": 10.83, "learning_rate": 4.46927374301676e-06, "loss": 3.0247, "step": 1328000 }, { "epoch": 10.83, "eval_loss": 3.0729353427886963, "eval_runtime": 132.4018, "eval_samples_per_second": 780.012, "eval_steps_per_second": 48.753, "step": 1328000 }, { "epoch": 10.89, "eval_loss": 3.0645902156829834, "eval_runtime": 133.3334, "eval_samples_per_second": 774.562, "eval_steps_per_second": 48.412, "step": 1336000 }, { "epoch": 10.96, "learning_rate": 4.402568164762779e-06, "loss": 3.0335, "step": 1344000 }, { "epoch": 10.96, "eval_loss": 3.0603559017181396, "eval_runtime": 131.9232, "eval_samples_per_second": 782.842, "eval_steps_per_second": 48.93, "step": 1344000 }, { "epoch": 11.02, "eval_loss": 3.0630509853363037, "eval_runtime": 132.4502, "eval_samples_per_second": 779.727, "eval_steps_per_second": 48.735, "step": 1352000 }, { "epoch": 11.09, "learning_rate": 4.335862586508797e-06, "loss": 3.0182, "step": 1360000 }, { "epoch": 11.09, "eval_loss": 3.0669026374816895, "eval_runtime": 133.3499, "eval_samples_per_second": 774.466, "eval_steps_per_second": 48.406, "step": 1360000 }, { "epoch": 11.15, "eval_loss": 3.0626471042633057, "eval_runtime": 133.0041, "eval_samples_per_second": 776.48, "eval_steps_per_second": 48.532, "step": 1368000 }, { "epoch": 11.22, "learning_rate": 4.269157008254816e-06, "loss": 3.0124, "step": 1376000 }, { "epoch": 11.22, "eval_loss": 3.053469181060791, "eval_runtime": 133.5969, "eval_samples_per_second": 773.034, "eval_steps_per_second": 48.317, "step": 1376000 }, { "epoch": 11.29, "eval_loss": 3.076792001724243, "eval_runtime": 133.0672, "eval_samples_per_second": 776.112, "eval_steps_per_second": 48.509, "step": 1384000 }, { "epoch": 11.35, "learning_rate": 4.202451430000834e-06, "loss": 3.016, "step": 1392000 }, { "epoch": 11.35, "eval_loss": 3.0615081787109375, "eval_runtime": 133.9693, "eval_samples_per_second": 770.886, "eval_steps_per_second": 48.183, "step": 1392000 }, { "epoch": 11.42, "eval_loss": 3.0689148902893066, "eval_runtime": 134.418, "eval_samples_per_second": 768.312, "eval_steps_per_second": 48.022, "step": 1400000 }, { "epoch": 11.48, "learning_rate": 4.135745851746852e-06, "loss": 3.0133, "step": 1408000 }, { "epoch": 11.48, "eval_loss": 3.069943428039551, "eval_runtime": 133.7409, "eval_samples_per_second": 772.202, "eval_steps_per_second": 48.265, "step": 1408000 }, { "epoch": 11.55, "eval_loss": 3.0647213459014893, "eval_runtime": 134.5422, "eval_samples_per_second": 767.603, "eval_steps_per_second": 47.977, "step": 1416000 }, { "epoch": 11.61, "learning_rate": 4.069040273492872e-06, "loss": 3.0227, "step": 1424000 }, { "epoch": 11.61, "eval_loss": 3.0704684257507324, "eval_runtime": 135.8934, "eval_samples_per_second": 759.97, "eval_steps_per_second": 47.5, "step": 1424000 }, { "epoch": 11.68, "eval_loss": 3.0705504417419434, "eval_runtime": 133.4155, "eval_samples_per_second": 774.086, "eval_steps_per_second": 48.383, "step": 1432000 }, { "epoch": 11.74, "learning_rate": 4.0023346952388895e-06, "loss": 3.0267, "step": 1440000 }, { "epoch": 11.74, "eval_loss": 3.069384813308716, "eval_runtime": 133.2021, "eval_samples_per_second": 775.326, "eval_steps_per_second": 48.46, "step": 1440000 }, { "epoch": 11.81, "eval_loss": 3.0720527172088623, "eval_runtime": 133.9349, "eval_samples_per_second": 771.083, "eval_steps_per_second": 48.195, "step": 1448000 }, { "epoch": 11.87, "learning_rate": 3.935629116984908e-06, "loss": 3.021, "step": 1456000 }, { "epoch": 11.87, "eval_loss": 3.068966865539551, "eval_runtime": 132.597, "eval_samples_per_second": 778.864, "eval_steps_per_second": 48.681, "step": 1456000 }, { "epoch": 11.94, "eval_loss": 3.060349702835083, "eval_runtime": 134.1972, "eval_samples_per_second": 769.576, "eval_steps_per_second": 48.101, "step": 1464000 }, { "epoch": 12.0, "learning_rate": 3.868923538730927e-06, "loss": 3.0144, "step": 1472000 }, { "epoch": 12.0, "eval_loss": 3.065760374069214, "eval_runtime": 134.4544, "eval_samples_per_second": 768.104, "eval_steps_per_second": 48.009, "step": 1472000 }, { "epoch": 12.07, "eval_loss": 3.0719916820526123, "eval_runtime": 133.6199, "eval_samples_per_second": 772.902, "eval_steps_per_second": 48.309, "step": 1480000 }, { "epoch": 12.13, "learning_rate": 3.8022179604769453e-06, "loss": 3.0204, "step": 1488000 }, { "epoch": 12.13, "eval_loss": 3.066779851913452, "eval_runtime": 133.3793, "eval_samples_per_second": 774.296, "eval_steps_per_second": 48.396, "step": 1488000 }, { "epoch": 12.2, "eval_loss": 3.0773117542266846, "eval_runtime": 135.2249, "eval_samples_per_second": 763.728, "eval_steps_per_second": 47.735, "step": 1496000 }, { "epoch": 12.26, "learning_rate": 3.735512382222964e-06, "loss": 3.0085, "step": 1504000 }, { "epoch": 12.26, "eval_loss": 3.0847675800323486, "eval_runtime": 133.4406, "eval_samples_per_second": 773.94, "eval_steps_per_second": 48.374, "step": 1504000 }, { "epoch": 12.33, "eval_loss": 3.0567853450775146, "eval_runtime": 136.5184, "eval_samples_per_second": 756.492, "eval_steps_per_second": 47.283, "step": 1512000 }, { "epoch": 12.39, "learning_rate": 3.668806803968982e-06, "loss": 3.0146, "step": 1520000 }, { "epoch": 12.39, "eval_loss": 3.0783281326293945, "eval_runtime": 134.8805, "eval_samples_per_second": 765.678, "eval_steps_per_second": 47.857, "step": 1520000 }, { "epoch": 12.46, "eval_loss": 3.073552370071411, "eval_runtime": 133.8542, "eval_samples_per_second": 771.549, "eval_steps_per_second": 48.224, "step": 1528000 }, { "epoch": 12.52, "learning_rate": 3.6021012257150007e-06, "loss": 3.02, "step": 1536000 }, { "epoch": 12.52, "eval_loss": 3.0533952713012695, "eval_runtime": 133.5934, "eval_samples_per_second": 773.055, "eval_steps_per_second": 48.318, "step": 1536000 }, { "epoch": 12.59, "eval_loss": 3.0684494972229004, "eval_runtime": 133.2901, "eval_samples_per_second": 774.814, "eval_steps_per_second": 48.428, "step": 1544000 }, { "epoch": 12.65, "learning_rate": 3.535395647461019e-06, "loss": 3.0229, "step": 1552000 }, { "epoch": 12.65, "eval_loss": 3.07673978805542, "eval_runtime": 134.115, "eval_samples_per_second": 770.048, "eval_steps_per_second": 48.13, "step": 1552000 }, { "epoch": 12.72, "eval_loss": 3.0568747520446777, "eval_runtime": 134.3484, "eval_samples_per_second": 768.71, "eval_steps_per_second": 48.047, "step": 1560000 }, { "epoch": 12.79, "learning_rate": 3.468690069207038e-06, "loss": 3.0152, "step": 1568000 }, { "epoch": 12.79, "eval_loss": 3.0787863731384277, "eval_runtime": 133.764, "eval_samples_per_second": 772.068, "eval_steps_per_second": 48.257, "step": 1568000 }, { "epoch": 12.85, "eval_loss": 3.066344738006592, "eval_runtime": 133.9216, "eval_samples_per_second": 771.16, "eval_steps_per_second": 48.2, "step": 1576000 }, { "epoch": 12.92, "learning_rate": 3.4019844909530565e-06, "loss": 3.02, "step": 1584000 }, { "epoch": 12.92, "eval_loss": 3.067016839981079, "eval_runtime": 133.9971, "eval_samples_per_second": 770.725, "eval_steps_per_second": 48.173, "step": 1584000 }, { "epoch": 12.98, "eval_loss": 3.0683343410491943, "eval_runtime": 134.2208, "eval_samples_per_second": 769.441, "eval_steps_per_second": 48.092, "step": 1592000 }, { "epoch": 13.05, "learning_rate": 3.3352789126990747e-06, "loss": 3.0128, "step": 1600000 }, { "epoch": 13.05, "eval_loss": 3.071779489517212, "eval_runtime": 134.2033, "eval_samples_per_second": 769.541, "eval_steps_per_second": 48.099, "step": 1600000 }, { "epoch": 13.11, "eval_loss": 3.0846707820892334, "eval_runtime": 134.6625, "eval_samples_per_second": 766.917, "eval_steps_per_second": 47.935, "step": 1608000 }, { "epoch": 13.18, "learning_rate": 3.2685733344450933e-06, "loss": 3.016, "step": 1616000 }, { "epoch": 13.18, "eval_loss": 3.066356897354126, "eval_runtime": 134.4556, "eval_samples_per_second": 768.097, "eval_steps_per_second": 48.008, "step": 1616000 }, { "epoch": 13.24, "eval_loss": 3.0687520503997803, "eval_runtime": 134.3299, "eval_samples_per_second": 768.816, "eval_steps_per_second": 48.053, "step": 1624000 }, { "epoch": 13.31, "learning_rate": 3.2018677561911115e-06, "loss": 3.0007, "step": 1632000 }, { "epoch": 13.31, "eval_loss": 3.0740671157836914, "eval_runtime": 134.4424, "eval_samples_per_second": 768.173, "eval_steps_per_second": 48.013, "step": 1632000 }, { "epoch": 13.37, "eval_loss": 3.0663323402404785, "eval_runtime": 134.2383, "eval_samples_per_second": 769.341, "eval_steps_per_second": 48.086, "step": 1640000 }, { "epoch": 13.44, "learning_rate": 3.1351621779371306e-06, "loss": 3.0241, "step": 1648000 }, { "epoch": 13.44, "eval_loss": 3.0607213973999023, "eval_runtime": 134.0502, "eval_samples_per_second": 770.42, "eval_steps_per_second": 48.154, "step": 1648000 }, { "epoch": 13.5, "eval_loss": 3.0635085105895996, "eval_runtime": 133.9453, "eval_samples_per_second": 771.024, "eval_steps_per_second": 48.191, "step": 1656000 }, { "epoch": 13.57, "learning_rate": 3.0684565996831487e-06, "loss": 3.0103, "step": 1664000 }, { "epoch": 13.57, "eval_loss": 3.0730724334716797, "eval_runtime": 135.0683, "eval_samples_per_second": 764.613, "eval_steps_per_second": 47.791, "step": 1664000 }, { "epoch": 13.63, "eval_loss": 3.0649466514587402, "eval_runtime": 134.138, "eval_samples_per_second": 769.916, "eval_steps_per_second": 48.122, "step": 1672000 }, { "epoch": 13.7, "learning_rate": 3.0017510214291673e-06, "loss": 3.0188, "step": 1680000 }, { "epoch": 13.7, "eval_loss": 3.058675765991211, "eval_runtime": 134.4659, "eval_samples_per_second": 768.039, "eval_steps_per_second": 48.005, "step": 1680000 }, { "epoch": 13.76, "eval_loss": 3.0703861713409424, "eval_runtime": 134.1628, "eval_samples_per_second": 769.773, "eval_steps_per_second": 48.113, "step": 1688000 }, { "epoch": 13.83, "learning_rate": 2.9350454431751855e-06, "loss": 3.0217, "step": 1696000 }, { "epoch": 13.83, "eval_loss": 3.066443920135498, "eval_runtime": 135.8944, "eval_samples_per_second": 759.965, "eval_steps_per_second": 47.5, "step": 1696000 }, { "epoch": 13.89, "eval_loss": 3.0626626014709473, "eval_runtime": 135.45, "eval_samples_per_second": 762.458, "eval_steps_per_second": 47.656, "step": 1704000 }, { "epoch": 13.96, "learning_rate": 2.868339864921204e-06, "loss": 3.0282, "step": 1712000 }, { "epoch": 13.96, "eval_loss": 3.071357488632202, "eval_runtime": 134.3182, "eval_samples_per_second": 768.883, "eval_steps_per_second": 48.058, "step": 1712000 }, { "epoch": 14.02, "eval_loss": 3.0688371658325195, "eval_runtime": 135.2782, "eval_samples_per_second": 763.427, "eval_steps_per_second": 47.716, "step": 1720000 }, { "epoch": 14.09, "learning_rate": 2.801634286667223e-06, "loss": 3.0166, "step": 1728000 }, { "epoch": 14.09, "eval_loss": 3.05212664604187, "eval_runtime": 135.0648, "eval_samples_per_second": 764.633, "eval_steps_per_second": 47.792, "step": 1728000 }, { "epoch": 14.16, "eval_loss": 3.0538179874420166, "eval_runtime": 134.2844, "eval_samples_per_second": 769.076, "eval_steps_per_second": 48.07, "step": 1736000 }, { "epoch": 14.22, "learning_rate": 2.7349287084132413e-06, "loss": 3.0134, "step": 1744000 }, { "epoch": 14.22, "eval_loss": 3.064086437225342, "eval_runtime": 135.4053, "eval_samples_per_second": 762.71, "eval_steps_per_second": 47.672, "step": 1744000 }, { "epoch": 14.29, "eval_loss": 3.063884735107422, "eval_runtime": 134.2537, "eval_samples_per_second": 769.253, "eval_steps_per_second": 48.081, "step": 1752000 }, { "epoch": 14.35, "learning_rate": 2.66822313015926e-06, "loss": 3.0032, "step": 1760000 }, { "epoch": 14.35, "eval_loss": 3.0587823390960693, "eval_runtime": 135.0451, "eval_samples_per_second": 764.745, "eval_steps_per_second": 47.799, "step": 1760000 }, { "epoch": 14.42, "eval_loss": 3.064620018005371, "eval_runtime": 134.9837, "eval_samples_per_second": 765.092, "eval_steps_per_second": 47.821, "step": 1768000 }, { "epoch": 14.48, "learning_rate": 2.601517551905278e-06, "loss": 3.0136, "step": 1776000 }, { "epoch": 14.48, "eval_loss": 3.062889337539673, "eval_runtime": 134.9119, "eval_samples_per_second": 765.499, "eval_steps_per_second": 47.846, "step": 1776000 }, { "epoch": 14.55, "eval_loss": 3.0578110218048096, "eval_runtime": 136.5221, "eval_samples_per_second": 756.471, "eval_steps_per_second": 47.282, "step": 1784000 }, { "epoch": 14.61, "learning_rate": 2.5348119736512967e-06, "loss": 3.0086, "step": 1792000 }, { "epoch": 14.61, "eval_loss": 3.0528934001922607, "eval_runtime": 135.6145, "eval_samples_per_second": 761.534, "eval_steps_per_second": 47.598, "step": 1792000 }, { "epoch": 14.68, "eval_loss": 3.0615251064300537, "eval_runtime": 135.3281, "eval_samples_per_second": 763.145, "eval_steps_per_second": 47.699, "step": 1800000 }, { "epoch": 14.74, "learning_rate": 2.4681063953973154e-06, "loss": 3.019, "step": 1808000 }, { "epoch": 14.74, "eval_loss": 3.0565857887268066, "eval_runtime": 134.9377, "eval_samples_per_second": 765.353, "eval_steps_per_second": 47.837, "step": 1808000 }, { "epoch": 14.81, "eval_loss": 3.0658679008483887, "eval_runtime": 135.2159, "eval_samples_per_second": 763.778, "eval_steps_per_second": 47.738, "step": 1816000 }, { "epoch": 14.87, "learning_rate": 2.4014008171433335e-06, "loss": 3.024, "step": 1824000 }, { "epoch": 14.87, "eval_loss": 3.061464786529541, "eval_runtime": 135.2789, "eval_samples_per_second": 763.423, "eval_steps_per_second": 47.716, "step": 1824000 }, { "epoch": 14.94, "eval_loss": 3.0530033111572266, "eval_runtime": 135.9081, "eval_samples_per_second": 759.889, "eval_steps_per_second": 47.495, "step": 1832000 }, { "epoch": 15.0, "learning_rate": 2.334695238889352e-06, "loss": 3.0089, "step": 1840000 }, { "epoch": 15.0, "eval_loss": 3.0796985626220703, "eval_runtime": 135.2715, "eval_samples_per_second": 763.465, "eval_steps_per_second": 47.719, "step": 1840000 }, { "epoch": 15.07, "eval_loss": 3.0700411796569824, "eval_runtime": 136.6273, "eval_samples_per_second": 755.888, "eval_steps_per_second": 47.245, "step": 1848000 }, { "epoch": 15.13, "learning_rate": 2.2679896606353707e-06, "loss": 3.0174, "step": 1856000 }, { "epoch": 15.13, "eval_loss": 3.0748071670532227, "eval_runtime": 136.44, "eval_samples_per_second": 756.926, "eval_steps_per_second": 47.31, "step": 1856000 }, { "epoch": 15.2, "eval_loss": 3.064267635345459, "eval_runtime": 135.3728, "eval_samples_per_second": 762.894, "eval_steps_per_second": 47.683, "step": 1864000 }, { "epoch": 15.26, "learning_rate": 2.2012840823813894e-06, "loss": 3.0176, "step": 1872000 }, { "epoch": 15.26, "eval_loss": 3.0627517700195312, "eval_runtime": 135.5713, "eval_samples_per_second": 761.776, "eval_steps_per_second": 47.613, "step": 1872000 }, { "epoch": 15.33, "eval_loss": 3.0629563331604004, "eval_runtime": 135.894, "eval_samples_per_second": 759.967, "eval_steps_per_second": 47.5, "step": 1880000 }, { "epoch": 15.39, "learning_rate": 2.134578504127408e-06, "loss": 3.0164, "step": 1888000 }, { "epoch": 15.39, "eval_loss": 3.0721538066864014, "eval_runtime": 135.9329, "eval_samples_per_second": 759.75, "eval_steps_per_second": 47.487, "step": 1888000 }, { "epoch": 15.46, "eval_loss": 3.0744197368621826, "eval_runtime": 135.4506, "eval_samples_per_second": 762.455, "eval_steps_per_second": 47.656, "step": 1896000 }, { "epoch": 15.53, "learning_rate": 2.067872925873426e-06, "loss": 3.0302, "step": 1904000 }, { "epoch": 15.53, "eval_loss": 3.0739452838897705, "eval_runtime": 135.8281, "eval_samples_per_second": 760.336, "eval_steps_per_second": 47.523, "step": 1904000 }, { "epoch": 15.59, "eval_loss": 3.0700225830078125, "eval_runtime": 136.0724, "eval_samples_per_second": 758.971, "eval_steps_per_second": 47.438, "step": 1912000 }, { "epoch": 15.66, "learning_rate": 2.0011673476194448e-06, "loss": 3.0204, "step": 1920000 }, { "epoch": 15.66, "eval_loss": 3.0751476287841797, "eval_runtime": 136.119, "eval_samples_per_second": 758.711, "eval_steps_per_second": 47.422, "step": 1920000 }, { "epoch": 15.72, "eval_loss": 3.0597870349884033, "eval_runtime": 136.3427, "eval_samples_per_second": 757.466, "eval_steps_per_second": 47.344, "step": 1928000 }, { "epoch": 15.79, "learning_rate": 1.9344617693654634e-06, "loss": 3.0147, "step": 1936000 }, { "epoch": 15.79, "eval_loss": 3.0522122383117676, "eval_runtime": 136.0082, "eval_samples_per_second": 759.329, "eval_steps_per_second": 47.46, "step": 1936000 }, { "epoch": 15.85, "eval_loss": 3.065509557723999, "eval_runtime": 136.1421, "eval_samples_per_second": 758.582, "eval_steps_per_second": 47.414, "step": 1944000 }, { "epoch": 15.92, "learning_rate": 1.867756191111482e-06, "loss": 3.0245, "step": 1952000 }, { "epoch": 15.92, "eval_loss": 3.0568597316741943, "eval_runtime": 136.6776, "eval_samples_per_second": 755.61, "eval_steps_per_second": 47.228, "step": 1952000 }, { "epoch": 15.98, "eval_loss": 3.062300205230713, "eval_runtime": 136.0258, "eval_samples_per_second": 759.231, "eval_steps_per_second": 47.454, "step": 1960000 }, { "epoch": 16.05, "learning_rate": 1.8010506128575004e-06, "loss": 3.0069, "step": 1968000 }, { "epoch": 16.05, "eval_loss": 3.059983730316162, "eval_runtime": 136.4638, "eval_samples_per_second": 756.794, "eval_steps_per_second": 47.302, "step": 1968000 }, { "epoch": 16.11, "eval_loss": 3.0638678073883057, "eval_runtime": 137.569, "eval_samples_per_second": 750.714, "eval_steps_per_second": 46.922, "step": 1976000 }, { "epoch": 16.18, "learning_rate": 1.734345034603519e-06, "loss": 3.0068, "step": 1984000 }, { "epoch": 16.18, "eval_loss": 3.077465534210205, "eval_runtime": 136.0507, "eval_samples_per_second": 759.092, "eval_steps_per_second": 47.446, "step": 1984000 }, { "epoch": 16.24, "eval_loss": 3.0668864250183105, "eval_runtime": 136.2552, "eval_samples_per_second": 757.953, "eval_steps_per_second": 47.374, "step": 1992000 }, { "epoch": 16.31, "learning_rate": 1.6676394563495374e-06, "loss": 3.0275, "step": 2000000 }, { "epoch": 16.31, "eval_loss": 3.062725782394409, "eval_runtime": 136.3436, "eval_samples_per_second": 757.461, "eval_steps_per_second": 47.344, "step": 2000000 }, { "epoch": 16.37, "eval_loss": 3.0644514560699463, "eval_runtime": 137.752, "eval_samples_per_second": 749.717, "eval_steps_per_second": 46.86, "step": 2008000 }, { "epoch": 16.44, "learning_rate": 1.6009338780955558e-06, "loss": 3.0164, "step": 2016000 }, { "epoch": 16.44, "eval_loss": 3.0666866302490234, "eval_runtime": 135.9171, "eval_samples_per_second": 759.838, "eval_steps_per_second": 47.492, "step": 2016000 }, { "epoch": 16.5, "eval_loss": 3.048987627029419, "eval_runtime": 136.0156, "eval_samples_per_second": 759.288, "eval_steps_per_second": 47.458, "step": 2024000 }, { "epoch": 16.57, "learning_rate": 1.5342282998415744e-06, "loss": 3.0148, "step": 2032000 }, { "epoch": 16.57, "eval_loss": 3.061800003051758, "eval_runtime": 137.187, "eval_samples_per_second": 752.805, "eval_steps_per_second": 47.053, "step": 2032000 }, { "epoch": 16.63, "eval_loss": 3.0544731616973877, "eval_runtime": 137.5014, "eval_samples_per_second": 751.083, "eval_steps_per_second": 46.945, "step": 2040000 }, { "epoch": 16.7, "learning_rate": 1.4675227215875928e-06, "loss": 3.022, "step": 2048000 }, { "epoch": 16.7, "eval_loss": 3.0651352405548096, "eval_runtime": 137.0124, "eval_samples_per_second": 753.764, "eval_steps_per_second": 47.113, "step": 2048000 }, { "epoch": 16.76, "eval_loss": 3.068650484085083, "eval_runtime": 137.324, "eval_samples_per_second": 752.053, "eval_steps_per_second": 47.006, "step": 2056000 }, { "epoch": 16.83, "learning_rate": 1.4008171433336116e-06, "loss": 3.0235, "step": 2064000 }, { "epoch": 16.83, "eval_loss": 3.0515873432159424, "eval_runtime": 137.8405, "eval_samples_per_second": 749.235, "eval_steps_per_second": 46.829, "step": 2064000 }, { "epoch": 16.89, "eval_loss": 3.0761473178863525, "eval_runtime": 137.435, "eval_samples_per_second": 751.446, "eval_steps_per_second": 46.968, "step": 2072000 }, { "epoch": 16.96, "learning_rate": 1.33411156507963e-06, "loss": 3.0194, "step": 2080000 }, { "epoch": 16.96, "eval_loss": 3.0807414054870605, "eval_runtime": 136.8928, "eval_samples_per_second": 754.423, "eval_steps_per_second": 47.154, "step": 2080000 }, { "epoch": 17.03, "eval_loss": 3.060075283050537, "eval_runtime": 136.6441, "eval_samples_per_second": 755.796, "eval_steps_per_second": 47.24, "step": 2088000 }, { "epoch": 17.09, "learning_rate": 1.2674059868256484e-06, "loss": 3.0142, "step": 2096000 }, { "epoch": 17.09, "eval_loss": 3.0721395015716553, "eval_runtime": 136.5201, "eval_samples_per_second": 756.482, "eval_steps_per_second": 47.282, "step": 2096000 }, { "epoch": 17.16, "eval_loss": 3.0653316974639893, "eval_runtime": 138.2812, "eval_samples_per_second": 746.848, "eval_steps_per_second": 46.68, "step": 2104000 }, { "epoch": 17.22, "learning_rate": 1.2007004085716668e-06, "loss": 3.0183, "step": 2112000 }, { "epoch": 17.22, "eval_loss": 3.061683416366577, "eval_runtime": 136.6654, "eval_samples_per_second": 755.678, "eval_steps_per_second": 47.232, "step": 2112000 }, { "epoch": 17.29, "eval_loss": 3.062178373336792, "eval_runtime": 137.9621, "eval_samples_per_second": 748.575, "eval_steps_per_second": 46.788, "step": 2120000 }, { "epoch": 17.35, "learning_rate": 1.1339948303176854e-06, "loss": 3.0092, "step": 2128000 }, { "epoch": 17.35, "eval_loss": 3.068242311477661, "eval_runtime": 137.4752, "eval_samples_per_second": 751.227, "eval_steps_per_second": 46.954, "step": 2128000 }, { "epoch": 17.42, "eval_loss": 3.073157787322998, "eval_runtime": 136.5003, "eval_samples_per_second": 756.592, "eval_steps_per_second": 47.289, "step": 2136000 }, { "epoch": 17.48, "learning_rate": 1.067289252063704e-06, "loss": 3.0071, "step": 2144000 }, { "epoch": 17.48, "eval_loss": 3.0763022899627686, "eval_runtime": 137.95, "eval_samples_per_second": 748.641, "eval_steps_per_second": 46.792, "step": 2144000 }, { "epoch": 17.55, "eval_loss": 3.0674524307250977, "eval_runtime": 137.3106, "eval_samples_per_second": 752.127, "eval_steps_per_second": 47.01, "step": 2152000 }, { "epoch": 17.61, "learning_rate": 1.0005836738097224e-06, "loss": 3.0272, "step": 2160000 }, { "epoch": 17.61, "eval_loss": 3.0671498775482178, "eval_runtime": 138.0717, "eval_samples_per_second": 747.981, "eval_steps_per_second": 46.751, "step": 2160000 }, { "epoch": 17.68, "eval_loss": 3.062239170074463, "eval_runtime": 138.0499, "eval_samples_per_second": 748.099, "eval_steps_per_second": 46.758, "step": 2168000 }, { "epoch": 17.74, "learning_rate": 9.33878095555741e-07, "loss": 3.0235, "step": 2176000 }, { "epoch": 17.74, "eval_loss": 3.0789263248443604, "eval_runtime": 137.5626, "eval_samples_per_second": 750.749, "eval_steps_per_second": 46.924, "step": 2176000 }, { "epoch": 17.81, "eval_loss": 3.062295436859131, "eval_runtime": 138.8694, "eval_samples_per_second": 743.684, "eval_steps_per_second": 46.483, "step": 2184000 }, { "epoch": 17.87, "learning_rate": 8.671725173017595e-07, "loss": 3.0179, "step": 2192000 }, { "epoch": 17.87, "eval_loss": 3.078376054763794, "eval_runtime": 136.985, "eval_samples_per_second": 753.914, "eval_steps_per_second": 47.122, "step": 2192000 }, { "epoch": 17.94, "eval_loss": 3.062905788421631, "eval_runtime": 137.7472, "eval_samples_per_second": 749.743, "eval_steps_per_second": 46.861, "step": 2200000 }, { "epoch": 18.0, "learning_rate": 8.004669390477779e-07, "loss": 3.0209, "step": 2208000 }, { "epoch": 18.0, "eval_loss": 3.0731070041656494, "eval_runtime": 138.0906, "eval_samples_per_second": 747.878, "eval_steps_per_second": 46.745, "step": 2208000 }, { "epoch": 18.07, "eval_loss": 3.0945563316345215, "eval_runtime": 137.4959, "eval_samples_per_second": 751.113, "eval_steps_per_second": 46.947, "step": 2216000 }, { "epoch": 18.13, "learning_rate": 7.337613607937964e-07, "loss": 3.0237, "step": 2224000 }, { "epoch": 18.13, "eval_loss": 3.065315008163452, "eval_runtime": 138.0159, "eval_samples_per_second": 748.283, "eval_steps_per_second": 46.77, "step": 2224000 }, { "epoch": 18.2, "eval_loss": 3.0589962005615234, "eval_runtime": 137.6387, "eval_samples_per_second": 750.334, "eval_steps_per_second": 46.898, "step": 2232000 }, { "epoch": 18.26, "learning_rate": 6.67055782539815e-07, "loss": 3.0164, "step": 2240000 }, { "epoch": 18.26, "eval_loss": 3.070741891860962, "eval_runtime": 138.4523, "eval_samples_per_second": 745.925, "eval_steps_per_second": 46.623, "step": 2240000 }, { "epoch": 18.33, "eval_loss": 3.0545763969421387, "eval_runtime": 138.1194, "eval_samples_per_second": 747.723, "eval_steps_per_second": 46.735, "step": 2248000 }, { "epoch": 18.4, "learning_rate": 6.003502042858334e-07, "loss": 3.0206, "step": 2256000 }, { "epoch": 18.4, "eval_loss": 3.0741806030273438, "eval_runtime": 138.8634, "eval_samples_per_second": 743.717, "eval_steps_per_second": 46.485, "step": 2256000 }, { "epoch": 18.46, "eval_loss": 3.0793333053588867, "eval_runtime": 138.6181, "eval_samples_per_second": 745.032, "eval_steps_per_second": 46.567, "step": 2264000 }, { "epoch": 18.53, "learning_rate": 5.33644626031852e-07, "loss": 3.0138, "step": 2272000 }, { "epoch": 18.53, "eval_loss": 3.05604887008667, "eval_runtime": 139.1325, "eval_samples_per_second": 742.278, "eval_steps_per_second": 46.395, "step": 2272000 }, { "epoch": 18.59, "eval_loss": 3.086977958679199, "eval_runtime": 137.8163, "eval_samples_per_second": 749.367, "eval_steps_per_second": 46.838, "step": 2280000 }, { "epoch": 18.66, "learning_rate": 4.669390477778705e-07, "loss": 3.0377, "step": 2288000 }, { "epoch": 18.66, "eval_loss": 3.07423996925354, "eval_runtime": 137.4738, "eval_samples_per_second": 751.234, "eval_steps_per_second": 46.954, "step": 2288000 }, { "epoch": 18.72, "eval_loss": 3.0675508975982666, "eval_runtime": 138.0596, "eval_samples_per_second": 748.046, "eval_steps_per_second": 46.755, "step": 2296000 }, { "epoch": 18.79, "learning_rate": 4.0023346952388894e-07, "loss": 3.0227, "step": 2304000 }, { "epoch": 18.79, "eval_loss": 3.06254506111145, "eval_runtime": 139.3504, "eval_samples_per_second": 741.117, "eval_steps_per_second": 46.322, "step": 2304000 }, { "epoch": 18.85, "eval_loss": 3.0736207962036133, "eval_runtime": 139.5433, "eval_samples_per_second": 740.093, "eval_steps_per_second": 46.258, "step": 2312000 }, { "epoch": 18.92, "learning_rate": 3.335278912699075e-07, "loss": 3.0359, "step": 2320000 }, { "epoch": 18.92, "eval_loss": 3.0800607204437256, "eval_runtime": 138.2846, "eval_samples_per_second": 746.829, "eval_steps_per_second": 46.679, "step": 2320000 }, { "epoch": 18.98, "eval_loss": 3.0709972381591797, "eval_runtime": 139.0505, "eval_samples_per_second": 742.716, "eval_steps_per_second": 46.422, "step": 2328000 }, { "epoch": 19.05, "learning_rate": 2.66822313015926e-07, "loss": 3.0248, "step": 2336000 }, { "epoch": 19.05, "eval_loss": 3.069218158721924, "eval_runtime": 138.9779, "eval_samples_per_second": 743.104, "eval_steps_per_second": 46.446, "step": 2336000 }, { "epoch": 19.11, "eval_loss": 3.067660331726074, "eval_runtime": 138.4099, "eval_samples_per_second": 746.154, "eval_steps_per_second": 46.637, "step": 2344000 }, { "epoch": 19.18, "learning_rate": 2.0011673476194447e-07, "loss": 3.0235, "step": 2352000 }, { "epoch": 19.18, "eval_loss": 3.089552879333496, "eval_runtime": 138.573, "eval_samples_per_second": 745.275, "eval_steps_per_second": 46.582, "step": 2352000 }, { "epoch": 19.24, "eval_loss": 3.0777699947357178, "eval_runtime": 140.4362, "eval_samples_per_second": 735.388, "eval_steps_per_second": 45.964, "step": 2360000 }, { "epoch": 19.31, "learning_rate": 1.33411156507963e-07, "loss": 3.0187, "step": 2368000 }, { "epoch": 19.31, "eval_loss": 3.069951295852661, "eval_runtime": 140.0319, "eval_samples_per_second": 737.511, "eval_steps_per_second": 46.097, "step": 2368000 }, { "epoch": 19.37, "eval_loss": 3.0742506980895996, "eval_runtime": 139.3192, "eval_samples_per_second": 741.283, "eval_steps_per_second": 46.332, "step": 2376000 }, { "epoch": 19.44, "learning_rate": 6.67055782539815e-08, "loss": 3.0189, "step": 2384000 }, { "epoch": 19.44, "eval_loss": 3.0780065059661865, "eval_runtime": 138.4114, "eval_samples_per_second": 746.145, "eval_steps_per_second": 46.636, "step": 2384000 }, { "epoch": 19.5, "eval_loss": 3.0866599082946777, "eval_runtime": 138.3665, "eval_samples_per_second": 746.387, "eval_steps_per_second": 46.651, "step": 2392000 }, { "epoch": 19.57, "learning_rate": 0.0, "loss": 3.0184, "step": 2400000 }, { "epoch": 19.57, "eval_loss": 3.079288959503174, "eval_runtime": 138.4519, "eval_samples_per_second": 745.927, "eval_steps_per_second": 46.623, "step": 2400000 }, { "epoch": 19.57, "step": 2400000, "total_flos": 7.178820925216543e+17, "train_loss": 2.9400340771484377, "train_runtime": 198144.865, "train_samples_per_second": 193.798, "train_steps_per_second": 12.112 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 20, "save_steps": 32000, "total_flos": 7.178820925216543e+17, "trial_name": null, "trial_params": null }