{ "best_metric": 2.277308464050293, "best_model_checkpoint": "./model_tweets_2020_Q3_25/checkpoint-1984000", "epoch": 6.762182494400068, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_loss": 2.5705409049987793, "eval_runtime": 340.6483, "eval_samples_per_second": 877.371, "eval_steps_per_second": 54.837, "step": 8000 }, { "epoch": 0.05, "learning_rate": 4.0726666666666665e-07, "loss": 2.7559, "step": 16000 }, { "epoch": 0.05, "eval_loss": 2.4932284355163574, "eval_runtime": 339.9977, "eval_samples_per_second": 879.05, "eval_steps_per_second": 54.942, "step": 16000 }, { "epoch": 0.07, "eval_loss": 2.4515769481658936, "eval_runtime": 337.6978, "eval_samples_per_second": 885.037, "eval_steps_per_second": 55.316, "step": 24000 }, { "epoch": 0.09, "learning_rate": 4.0453333333333336e-07, "loss": 2.5786, "step": 32000 }, { "epoch": 0.09, "eval_loss": 2.4173614978790283, "eval_runtime": 343.3465, "eval_samples_per_second": 870.476, "eval_steps_per_second": 54.406, "step": 32000 }, { "epoch": 0.11, "eval_loss": 2.4070701599121094, "eval_runtime": 349.1432, "eval_samples_per_second": 856.024, "eval_steps_per_second": 53.502, "step": 40000 }, { "epoch": 0.14, "learning_rate": 4.018e-07, "loss": 2.5316, "step": 48000 }, { "epoch": 0.14, "eval_loss": 2.390305757522583, "eval_runtime": 348.7654, "eval_samples_per_second": 856.952, "eval_steps_per_second": 53.56, "step": 48000 }, { "epoch": 0.16, "eval_loss": 2.374408006668091, "eval_runtime": 348.5551, "eval_samples_per_second": 857.468, "eval_steps_per_second": 53.593, "step": 56000 }, { "epoch": 0.18, "learning_rate": 3.9906666666666667e-07, "loss": 2.5006, "step": 64000 }, { "epoch": 0.18, "eval_loss": 2.3650131225585938, "eval_runtime": 346.6068, "eval_samples_per_second": 862.288, "eval_steps_per_second": 53.894, "step": 64000 }, { "epoch": 0.2, "eval_loss": 2.3599953651428223, "eval_runtime": 350.9331, "eval_samples_per_second": 851.658, "eval_steps_per_second": 53.23, "step": 72000 }, { "epoch": 0.23, "learning_rate": 3.963333333333333e-07, "loss": 2.483, "step": 80000 }, { "epoch": 0.23, "eval_loss": 2.354825735092163, "eval_runtime": 352.2074, "eval_samples_per_second": 848.577, "eval_steps_per_second": 53.037, "step": 80000 }, { "epoch": 0.25, "eval_loss": 2.3485031127929688, "eval_runtime": 345.7111, "eval_samples_per_second": 864.522, "eval_steps_per_second": 54.034, "step": 88000 }, { "epoch": 0.27, "learning_rate": 3.936e-07, "loss": 2.4703, "step": 96000 }, { "epoch": 0.27, "eval_loss": 2.3474812507629395, "eval_runtime": 347.9607, "eval_samples_per_second": 858.933, "eval_steps_per_second": 53.684, "step": 96000 }, { "epoch": 0.29, "eval_loss": 2.338416337966919, "eval_runtime": 346.0214, "eval_samples_per_second": 863.747, "eval_steps_per_second": 53.985, "step": 104000 }, { "epoch": 0.32, "learning_rate": 3.908666666666667e-07, "loss": 2.47, "step": 112000 }, { "epoch": 0.32, "eval_loss": 2.3330440521240234, "eval_runtime": 341.3891, "eval_samples_per_second": 875.467, "eval_steps_per_second": 54.718, "step": 112000 }, { "epoch": 0.34, "eval_loss": 2.33542799949646, "eval_runtime": 344.1118, "eval_samples_per_second": 868.54, "eval_steps_per_second": 54.285, "step": 120000 }, { "epoch": 0.36, "learning_rate": 3.8813333333333334e-07, "loss": 2.4601, "step": 128000 }, { "epoch": 0.36, "eval_loss": 2.3343234062194824, "eval_runtime": 343.1112, "eval_samples_per_second": 871.073, "eval_steps_per_second": 54.443, "step": 128000 }, { "epoch": 0.38, "eval_loss": 2.3282012939453125, "eval_runtime": 342.8269, "eval_samples_per_second": 871.796, "eval_steps_per_second": 54.488, "step": 136000 }, { "epoch": 0.41, "learning_rate": 3.854e-07, "loss": 2.4486, "step": 144000 }, { "epoch": 0.41, "eval_loss": 2.3315742015838623, "eval_runtime": 344.5912, "eval_samples_per_second": 867.332, "eval_steps_per_second": 54.209, "step": 144000 }, { "epoch": 0.43, "eval_loss": 2.318028688430786, "eval_runtime": 344.5181, "eval_samples_per_second": 867.516, "eval_steps_per_second": 54.221, "step": 152000 }, { "epoch": 0.45, "learning_rate": 3.8266666666666665e-07, "loss": 2.4536, "step": 160000 }, { "epoch": 0.45, "eval_loss": 2.325669288635254, "eval_runtime": 347.0797, "eval_samples_per_second": 861.113, "eval_steps_per_second": 53.82, "step": 160000 }, { "epoch": 0.47, "eval_loss": 2.322175979614258, "eval_runtime": 347.6851, "eval_samples_per_second": 859.614, "eval_steps_per_second": 53.727, "step": 168000 }, { "epoch": 0.5, "learning_rate": 3.799333333333333e-07, "loss": 2.4523, "step": 176000 }, { "epoch": 0.5, "eval_loss": 2.3208346366882324, "eval_runtime": 349.9433, "eval_samples_per_second": 854.067, "eval_steps_per_second": 53.38, "step": 176000 }, { "epoch": 0.52, "eval_loss": 2.321834087371826, "eval_runtime": 350.0075, "eval_samples_per_second": 853.91, "eval_steps_per_second": 53.37, "step": 184000 }, { "epoch": 0.54, "learning_rate": 3.772e-07, "loss": 2.4489, "step": 192000 }, { "epoch": 0.54, "eval_loss": 2.318420171737671, "eval_runtime": 347.4326, "eval_samples_per_second": 860.239, "eval_steps_per_second": 53.766, "step": 192000 }, { "epoch": 0.56, "eval_loss": 2.3225138187408447, "eval_runtime": 343.2632, "eval_samples_per_second": 870.687, "eval_steps_per_second": 54.419, "step": 200000 }, { "epoch": 0.59, "learning_rate": 3.7446666666666667e-07, "loss": 2.4448, "step": 208000 }, { "epoch": 0.59, "eval_loss": 2.3184542655944824, "eval_runtime": 342.1646, "eval_samples_per_second": 873.483, "eval_steps_per_second": 54.594, "step": 208000 }, { "epoch": 0.61, "eval_loss": 2.3138577938079834, "eval_runtime": 343.0424, "eval_samples_per_second": 871.248, "eval_steps_per_second": 54.454, "step": 216000 }, { "epoch": 0.63, "learning_rate": 3.7173333333333333e-07, "loss": 2.4412, "step": 224000 }, { "epoch": 0.63, "eval_loss": 2.3235347270965576, "eval_runtime": 343.6882, "eval_samples_per_second": 869.611, "eval_steps_per_second": 54.352, "step": 224000 }, { "epoch": 0.65, "eval_loss": 2.3148436546325684, "eval_runtime": 340.8233, "eval_samples_per_second": 876.921, "eval_steps_per_second": 54.808, "step": 232000 }, { "epoch": 0.68, "learning_rate": 3.69e-07, "loss": 2.442, "step": 240000 }, { "epoch": 0.68, "eval_loss": 2.314605236053467, "eval_runtime": 338.4743, "eval_samples_per_second": 883.006, "eval_steps_per_second": 55.189, "step": 240000 }, { "epoch": 0.7, "eval_loss": 2.314452886581421, "eval_runtime": 340.3719, "eval_samples_per_second": 878.084, "eval_steps_per_second": 54.881, "step": 248000 }, { "epoch": 0.72, "learning_rate": 3.6626666666666664e-07, "loss": 2.4408, "step": 256000 }, { "epoch": 0.72, "eval_loss": 2.3082728385925293, "eval_runtime": 339.4224, "eval_samples_per_second": 880.54, "eval_steps_per_second": 55.035, "step": 256000 }, { "epoch": 0.74, "eval_loss": 2.3067545890808105, "eval_runtime": 346.5868, "eval_samples_per_second": 862.338, "eval_steps_per_second": 53.897, "step": 264000 }, { "epoch": 0.77, "learning_rate": 3.6353333333333335e-07, "loss": 2.4336, "step": 272000 }, { "epoch": 0.77, "eval_loss": 2.3104145526885986, "eval_runtime": 347.2315, "eval_samples_per_second": 860.737, "eval_steps_per_second": 53.797, "step": 272000 }, { "epoch": 0.79, "eval_loss": 2.314737319946289, "eval_runtime": 344.3546, "eval_samples_per_second": 867.928, "eval_steps_per_second": 54.246, "step": 280000 }, { "epoch": 0.81, "learning_rate": 3.608e-07, "loss": 2.4394, "step": 288000 }, { "epoch": 0.81, "eval_loss": 2.310542583465576, "eval_runtime": 343.2435, "eval_samples_per_second": 870.737, "eval_steps_per_second": 54.422, "step": 288000 }, { "epoch": 0.83, "eval_loss": 2.313537120819092, "eval_runtime": 345.148, "eval_samples_per_second": 865.933, "eval_steps_per_second": 54.122, "step": 296000 }, { "epoch": 0.86, "learning_rate": 3.5806666666666666e-07, "loss": 2.4363, "step": 304000 }, { "epoch": 0.86, "eval_loss": 2.3056890964508057, "eval_runtime": 345.9753, "eval_samples_per_second": 863.862, "eval_steps_per_second": 53.992, "step": 304000 }, { "epoch": 0.88, "eval_loss": 2.305032730102539, "eval_runtime": 343.0535, "eval_samples_per_second": 871.22, "eval_steps_per_second": 54.452, "step": 312000 }, { "epoch": 0.9, "learning_rate": 3.553333333333333e-07, "loss": 2.4403, "step": 320000 }, { "epoch": 0.9, "eval_loss": 2.3065528869628906, "eval_runtime": 339.0465, "eval_samples_per_second": 881.516, "eval_steps_per_second": 55.096, "step": 320000 }, { "epoch": 0.92, "eval_loss": 2.307610511779785, "eval_runtime": 341.4071, "eval_samples_per_second": 875.421, "eval_steps_per_second": 54.715, "step": 328000 }, { "epoch": 0.95, "learning_rate": 3.5259999999999997e-07, "loss": 2.4409, "step": 336000 }, { "epoch": 0.95, "eval_loss": 2.3025970458984375, "eval_runtime": 342.5635, "eval_samples_per_second": 872.466, "eval_steps_per_second": 54.53, "step": 336000 }, { "epoch": 0.97, "eval_loss": 2.3044984340667725, "eval_runtime": 339.4728, "eval_samples_per_second": 880.409, "eval_steps_per_second": 55.026, "step": 344000 }, { "epoch": 0.99, "learning_rate": 3.498666666666667e-07, "loss": 2.4434, "step": 352000 }, { "epoch": 0.99, "eval_loss": 2.304666519165039, "eval_runtime": 341.314, "eval_samples_per_second": 875.66, "eval_steps_per_second": 54.73, "step": 352000 }, { "epoch": 1.01, "eval_loss": 2.307954788208008, "eval_runtime": 339.3879, "eval_samples_per_second": 880.63, "eval_steps_per_second": 55.04, "step": 360000 }, { "epoch": 1.04, "learning_rate": 3.4713333333333333e-07, "loss": 2.4372, "step": 368000 }, { "epoch": 1.04, "eval_loss": 2.314342737197876, "eval_runtime": 341.0675, "eval_samples_per_second": 876.293, "eval_steps_per_second": 54.769, "step": 368000 }, { "epoch": 1.06, "eval_loss": 2.304889678955078, "eval_runtime": 343.6689, "eval_samples_per_second": 869.66, "eval_steps_per_second": 54.355, "step": 376000 }, { "epoch": 1.08, "learning_rate": 3.444e-07, "loss": 2.4329, "step": 384000 }, { "epoch": 1.08, "eval_loss": 2.3065786361694336, "eval_runtime": 343.1677, "eval_samples_per_second": 870.93, "eval_steps_per_second": 54.434, "step": 384000 }, { "epoch": 1.1, "eval_loss": 2.3049540519714355, "eval_runtime": 345.0271, "eval_samples_per_second": 866.236, "eval_steps_per_second": 54.141, "step": 392000 }, { "epoch": 1.13, "learning_rate": 3.416666666666667e-07, "loss": 2.437, "step": 400000 }, { "epoch": 1.13, "eval_loss": 2.30120587348938, "eval_runtime": 349.6852, "eval_samples_per_second": 854.697, "eval_steps_per_second": 53.419, "step": 400000 }, { "epoch": 1.15, "eval_loss": 2.303290367126465, "eval_runtime": 347.5661, "eval_samples_per_second": 859.908, "eval_steps_per_second": 53.745, "step": 408000 }, { "epoch": 1.17, "learning_rate": 3.3893333333333335e-07, "loss": 2.4378, "step": 416000 }, { "epoch": 1.17, "eval_loss": 2.306403160095215, "eval_runtime": 351.5615, "eval_samples_per_second": 850.136, "eval_steps_per_second": 53.134, "step": 416000 }, { "epoch": 1.19, "eval_loss": 2.29843807220459, "eval_runtime": 344.6834, "eval_samples_per_second": 867.1, "eval_steps_per_second": 54.195, "step": 424000 }, { "epoch": 1.22, "learning_rate": 3.3619999999999995e-07, "loss": 2.4386, "step": 432000 }, { "epoch": 1.22, "eval_loss": 2.3056929111480713, "eval_runtime": 342.527, "eval_samples_per_second": 872.559, "eval_steps_per_second": 54.536, "step": 432000 }, { "epoch": 1.24, "eval_loss": 2.303525447845459, "eval_runtime": 343.5122, "eval_samples_per_second": 870.056, "eval_steps_per_second": 54.379, "step": 440000 }, { "epoch": 1.26, "learning_rate": 3.3346666666666666e-07, "loss": 2.4411, "step": 448000 }, { "epoch": 1.26, "eval_loss": 2.296870231628418, "eval_runtime": 341.4444, "eval_samples_per_second": 875.326, "eval_steps_per_second": 54.709, "step": 448000 }, { "epoch": 1.28, "eval_loss": 2.2930004596710205, "eval_runtime": 340.6318, "eval_samples_per_second": 877.414, "eval_steps_per_second": 54.839, "step": 456000 }, { "epoch": 1.31, "learning_rate": 3.307333333333333e-07, "loss": 2.4466, "step": 464000 }, { "epoch": 1.31, "eval_loss": 2.3004744052886963, "eval_runtime": 339.5984, "eval_samples_per_second": 880.084, "eval_steps_per_second": 55.006, "step": 464000 }, { "epoch": 1.33, "eval_loss": 2.2974722385406494, "eval_runtime": 339.4236, "eval_samples_per_second": 880.537, "eval_steps_per_second": 55.034, "step": 472000 }, { "epoch": 1.35, "learning_rate": 3.28e-07, "loss": 2.4451, "step": 480000 }, { "epoch": 1.35, "eval_loss": 2.304170608520508, "eval_runtime": 342.9256, "eval_samples_per_second": 871.545, "eval_steps_per_second": 54.472, "step": 480000 }, { "epoch": 1.37, "eval_loss": 2.306100845336914, "eval_runtime": 343.2906, "eval_samples_per_second": 870.618, "eval_steps_per_second": 54.415, "step": 488000 }, { "epoch": 1.4, "learning_rate": 3.252666666666667e-07, "loss": 2.4399, "step": 496000 }, { "epoch": 1.4, "eval_loss": 2.2986841201782227, "eval_runtime": 344.9178, "eval_samples_per_second": 866.511, "eval_steps_per_second": 54.158, "step": 496000 }, { "epoch": 1.42, "eval_loss": 2.29666805267334, "eval_runtime": 344.4845, "eval_samples_per_second": 867.601, "eval_steps_per_second": 54.226, "step": 504000 }, { "epoch": 1.44, "learning_rate": 3.2253333333333334e-07, "loss": 2.4397, "step": 512000 }, { "epoch": 1.44, "eval_loss": 2.300994873046875, "eval_runtime": 345.9506, "eval_samples_per_second": 863.924, "eval_steps_per_second": 53.996, "step": 512000 }, { "epoch": 1.47, "eval_loss": 2.3019394874572754, "eval_runtime": 349.661, "eval_samples_per_second": 854.756, "eval_steps_per_second": 53.423, "step": 520000 }, { "epoch": 1.49, "learning_rate": 3.198e-07, "loss": 2.4483, "step": 528000 }, { "epoch": 1.49, "eval_loss": 2.3009159564971924, "eval_runtime": 351.037, "eval_samples_per_second": 851.406, "eval_steps_per_second": 53.214, "step": 528000 }, { "epoch": 1.51, "eval_loss": 2.304800271987915, "eval_runtime": 346.881, "eval_samples_per_second": 861.607, "eval_steps_per_second": 53.851, "step": 536000 }, { "epoch": 1.53, "learning_rate": 3.1706666666666665e-07, "loss": 2.4436, "step": 544000 }, { "epoch": 1.53, "eval_loss": 2.3028886318206787, "eval_runtime": 351.2836, "eval_samples_per_second": 850.808, "eval_steps_per_second": 53.176, "step": 544000 }, { "epoch": 1.56, "eval_loss": 2.302586317062378, "eval_runtime": 346.7542, "eval_samples_per_second": 861.922, "eval_steps_per_second": 53.871, "step": 552000 }, { "epoch": 1.58, "learning_rate": 3.1433333333333336e-07, "loss": 2.4407, "step": 560000 }, { "epoch": 1.58, "eval_loss": 2.30268931388855, "eval_runtime": 342.4357, "eval_samples_per_second": 872.792, "eval_steps_per_second": 54.55, "step": 560000 }, { "epoch": 1.6, "eval_loss": 2.306097984313965, "eval_runtime": 343.1221, "eval_samples_per_second": 871.046, "eval_steps_per_second": 54.441, "step": 568000 }, { "epoch": 1.62, "learning_rate": 3.116e-07, "loss": 2.4364, "step": 576000 }, { "epoch": 1.62, "eval_loss": 2.2972164154052734, "eval_runtime": 340.2397, "eval_samples_per_second": 878.425, "eval_steps_per_second": 54.902, "step": 576000 }, { "epoch": 1.65, "eval_loss": 2.296656847000122, "eval_runtime": 344.7235, "eval_samples_per_second": 866.999, "eval_steps_per_second": 54.188, "step": 584000 }, { "epoch": 1.67, "learning_rate": 3.0886666666666667e-07, "loss": 2.4406, "step": 592000 }, { "epoch": 1.67, "eval_loss": 2.296457052230835, "eval_runtime": 344.6991, "eval_samples_per_second": 867.06, "eval_steps_per_second": 54.192, "step": 592000 }, { "epoch": 1.69, "eval_loss": 2.2965714931488037, "eval_runtime": 347.5835, "eval_samples_per_second": 859.865, "eval_steps_per_second": 53.742, "step": 600000 }, { "epoch": 1.71, "learning_rate": 3.061333333333333e-07, "loss": 2.4393, "step": 608000 }, { "epoch": 1.71, "eval_loss": 2.2982165813446045, "eval_runtime": 346.6362, "eval_samples_per_second": 862.215, "eval_steps_per_second": 53.889, "step": 608000 }, { "epoch": 1.74, "eval_loss": 2.29933762550354, "eval_runtime": 348.196, "eval_samples_per_second": 858.353, "eval_steps_per_second": 53.648, "step": 616000 }, { "epoch": 1.76, "learning_rate": 3.034e-07, "loss": 2.4352, "step": 624000 }, { "epoch": 1.76, "eval_loss": 2.2916171550750732, "eval_runtime": 351.0418, "eval_samples_per_second": 851.394, "eval_steps_per_second": 53.213, "step": 624000 }, { "epoch": 1.78, "eval_loss": 2.2930855751037598, "eval_runtime": 352.9412, "eval_samples_per_second": 846.812, "eval_steps_per_second": 52.927, "step": 632000 }, { "epoch": 1.8, "learning_rate": 3.0066666666666663e-07, "loss": 2.4366, "step": 640000 }, { "epoch": 1.8, "eval_loss": 2.3015854358673096, "eval_runtime": 346.5375, "eval_samples_per_second": 862.461, "eval_steps_per_second": 53.905, "step": 640000 }, { "epoch": 1.83, "eval_loss": 2.2984261512756348, "eval_runtime": 347.0524, "eval_samples_per_second": 861.181, "eval_steps_per_second": 53.825, "step": 648000 }, { "epoch": 1.85, "learning_rate": 2.9793333333333334e-07, "loss": 2.4361, "step": 656000 }, { "epoch": 1.85, "eval_loss": 2.2876782417297363, "eval_runtime": 347.6875, "eval_samples_per_second": 859.608, "eval_steps_per_second": 53.726, "step": 656000 }, { "epoch": 1.87, "eval_loss": 2.2983310222625732, "eval_runtime": 349.3813, "eval_samples_per_second": 855.441, "eval_steps_per_second": 53.466, "step": 664000 }, { "epoch": 1.89, "learning_rate": 2.952e-07, "loss": 2.437, "step": 672000 }, { "epoch": 1.89, "eval_loss": 2.303272008895874, "eval_runtime": 343.49, "eval_samples_per_second": 870.113, "eval_steps_per_second": 54.383, "step": 672000 }, { "epoch": 1.92, "eval_loss": 2.2928237915039062, "eval_runtime": 340.3613, "eval_samples_per_second": 878.111, "eval_steps_per_second": 54.883, "step": 680000 }, { "epoch": 1.94, "learning_rate": 2.9246666666666665e-07, "loss": 2.4488, "step": 688000 }, { "epoch": 1.94, "eval_loss": 2.2953317165374756, "eval_runtime": 344.7555, "eval_samples_per_second": 866.919, "eval_steps_per_second": 54.183, "step": 688000 }, { "epoch": 1.96, "eval_loss": 2.2945399284362793, "eval_runtime": 344.3288, "eval_samples_per_second": 867.993, "eval_steps_per_second": 54.25, "step": 696000 }, { "epoch": 1.98, "learning_rate": 2.897333333333333e-07, "loss": 2.4459, "step": 704000 }, { "epoch": 1.98, "eval_loss": 2.2960965633392334, "eval_runtime": 344.734, "eval_samples_per_second": 866.973, "eval_steps_per_second": 54.187, "step": 704000 }, { "epoch": 2.01, "eval_loss": 2.2899136543273926, "eval_runtime": 351.056, "eval_samples_per_second": 851.36, "eval_steps_per_second": 53.211, "step": 712000 }, { "epoch": 2.03, "learning_rate": 2.8699999999999996e-07, "loss": 2.4334, "step": 720000 }, { "epoch": 2.03, "eval_loss": 2.296393632888794, "eval_runtime": 350.4146, "eval_samples_per_second": 852.918, "eval_steps_per_second": 53.308, "step": 720000 }, { "epoch": 2.05, "eval_loss": 2.289637565612793, "eval_runtime": 352.6868, "eval_samples_per_second": 847.423, "eval_steps_per_second": 52.965, "step": 728000 }, { "epoch": 2.07, "learning_rate": 2.8426666666666667e-07, "loss": 2.4343, "step": 736000 }, { "epoch": 2.07, "eval_loss": 2.2953732013702393, "eval_runtime": 351.9552, "eval_samples_per_second": 849.185, "eval_steps_per_second": 53.075, "step": 736000 }, { "epoch": 2.1, "eval_loss": 2.3004140853881836, "eval_runtime": 349.5709, "eval_samples_per_second": 854.977, "eval_steps_per_second": 53.437, "step": 744000 }, { "epoch": 2.12, "learning_rate": 2.815333333333333e-07, "loss": 2.4345, "step": 752000 }, { "epoch": 2.12, "eval_loss": 2.2892417907714844, "eval_runtime": 346.1285, "eval_samples_per_second": 863.48, "eval_steps_per_second": 53.968, "step": 752000 }, { "epoch": 2.14, "eval_loss": 2.2995636463165283, "eval_runtime": 344.8242, "eval_samples_per_second": 866.746, "eval_steps_per_second": 54.173, "step": 760000 }, { "epoch": 2.16, "learning_rate": 2.7880000000000003e-07, "loss": 2.4386, "step": 768000 }, { "epoch": 2.16, "eval_loss": 2.2885777950286865, "eval_runtime": 347.239, "eval_samples_per_second": 860.718, "eval_steps_per_second": 53.796, "step": 768000 }, { "epoch": 2.19, "eval_loss": 2.297384262084961, "eval_runtime": 343.4517, "eval_samples_per_second": 870.21, "eval_steps_per_second": 54.389, "step": 776000 }, { "epoch": 2.21, "learning_rate": 2.7606666666666664e-07, "loss": 2.434, "step": 784000 }, { "epoch": 2.21, "eval_loss": 2.288207769393921, "eval_runtime": 340.9633, "eval_samples_per_second": 876.561, "eval_steps_per_second": 54.786, "step": 784000 }, { "epoch": 2.23, "eval_loss": 2.2965118885040283, "eval_runtime": 346.5403, "eval_samples_per_second": 862.454, "eval_steps_per_second": 53.904, "step": 792000 }, { "epoch": 2.25, "learning_rate": 2.733333333333333e-07, "loss": 2.4379, "step": 800000 }, { "epoch": 2.25, "eval_loss": 2.2898786067962646, "eval_runtime": 344.2873, "eval_samples_per_second": 868.098, "eval_steps_per_second": 54.257, "step": 800000 }, { "epoch": 2.28, "eval_loss": 2.2937777042388916, "eval_runtime": 348.0922, "eval_samples_per_second": 858.609, "eval_steps_per_second": 53.664, "step": 808000 }, { "epoch": 2.3, "learning_rate": 2.706e-07, "loss": 2.4356, "step": 816000 }, { "epoch": 2.3, "eval_loss": 2.299677848815918, "eval_runtime": 349.122, "eval_samples_per_second": 856.076, "eval_steps_per_second": 53.506, "step": 816000 }, { "epoch": 2.32, "eval_loss": 2.2942495346069336, "eval_runtime": 347.1934, "eval_samples_per_second": 860.831, "eval_steps_per_second": 53.803, "step": 824000 }, { "epoch": 2.34, "learning_rate": 2.6786666666666666e-07, "loss": 2.4399, "step": 832000 }, { "epoch": 2.34, "eval_loss": 2.291579246520996, "eval_runtime": 353.1113, "eval_samples_per_second": 846.405, "eval_steps_per_second": 52.901, "step": 832000 }, { "epoch": 2.37, "eval_loss": 2.293363332748413, "eval_runtime": 349.0076, "eval_samples_per_second": 856.357, "eval_steps_per_second": 53.523, "step": 840000 }, { "epoch": 2.39, "learning_rate": 2.651333333333333e-07, "loss": 2.437, "step": 848000 }, { "epoch": 2.39, "eval_loss": 2.2978403568267822, "eval_runtime": 347.2528, "eval_samples_per_second": 860.684, "eval_steps_per_second": 53.794, "step": 848000 }, { "epoch": 2.41, "eval_loss": 2.28342342376709, "eval_runtime": 349.8061, "eval_samples_per_second": 854.402, "eval_steps_per_second": 53.401, "step": 856000 }, { "epoch": 2.43, "learning_rate": 2.624e-07, "loss": 2.4311, "step": 864000 }, { "epoch": 2.43, "eval_loss": 2.2872231006622314, "eval_runtime": 346.3845, "eval_samples_per_second": 862.842, "eval_steps_per_second": 53.929, "step": 864000 }, { "epoch": 2.46, "eval_loss": 2.292755365371704, "eval_runtime": 349.3227, "eval_samples_per_second": 855.584, "eval_steps_per_second": 53.475, "step": 872000 }, { "epoch": 2.48, "learning_rate": 2.596666666666667e-07, "loss": 2.4453, "step": 880000 }, { "epoch": 2.48, "eval_loss": 2.2888131141662598, "eval_runtime": 343.7493, "eval_samples_per_second": 869.456, "eval_steps_per_second": 54.342, "step": 880000 }, { "epoch": 2.5, "eval_loss": 2.293339490890503, "eval_runtime": 345.1119, "eval_samples_per_second": 866.023, "eval_steps_per_second": 54.127, "step": 888000 }, { "epoch": 2.52, "learning_rate": 2.5693333333333333e-07, "loss": 2.4434, "step": 896000 }, { "epoch": 2.52, "eval_loss": 2.2911081314086914, "eval_runtime": 350.4894, "eval_samples_per_second": 852.736, "eval_steps_per_second": 53.297, "step": 896000 }, { "epoch": 2.55, "eval_loss": 2.292876720428467, "eval_runtime": 348.1116, "eval_samples_per_second": 858.561, "eval_steps_per_second": 53.661, "step": 904000 }, { "epoch": 2.57, "learning_rate": 2.542e-07, "loss": 2.443, "step": 912000 }, { "epoch": 2.57, "eval_loss": 2.2925541400909424, "eval_runtime": 350.1813, "eval_samples_per_second": 853.486, "eval_steps_per_second": 53.344, "step": 912000 }, { "epoch": 2.59, "eval_loss": 2.290844440460205, "eval_runtime": 350.6581, "eval_samples_per_second": 852.326, "eval_steps_per_second": 53.271, "step": 920000 }, { "epoch": 2.61, "learning_rate": 2.5146666666666664e-07, "loss": 2.4361, "step": 928000 }, { "epoch": 2.61, "eval_loss": 2.291395902633667, "eval_runtime": 349.6079, "eval_samples_per_second": 854.886, "eval_steps_per_second": 53.431, "step": 928000 }, { "epoch": 2.64, "eval_loss": 2.2877848148345947, "eval_runtime": 355.0616, "eval_samples_per_second": 841.755, "eval_steps_per_second": 52.611, "step": 936000 }, { "epoch": 2.66, "learning_rate": 2.4873333333333335e-07, "loss": 2.44, "step": 944000 }, { "epoch": 2.66, "eval_loss": 2.2872154712677, "eval_runtime": 349.7428, "eval_samples_per_second": 854.557, "eval_steps_per_second": 53.411, "step": 944000 }, { "epoch": 2.68, "eval_loss": 2.285693407058716, "eval_runtime": 350.4484, "eval_samples_per_second": 852.836, "eval_steps_per_second": 53.303, "step": 952000 }, { "epoch": 2.7, "learning_rate": 2.46e-07, "loss": 2.4447, "step": 960000 }, { "epoch": 2.7, "eval_loss": 2.293245792388916, "eval_runtime": 351.2715, "eval_samples_per_second": 850.838, "eval_steps_per_second": 53.178, "step": 960000 }, { "epoch": 2.73, "eval_loss": 2.2918214797973633, "eval_runtime": 351.5132, "eval_samples_per_second": 850.253, "eval_steps_per_second": 53.142, "step": 968000 }, { "epoch": 2.75, "learning_rate": 2.4326666666666666e-07, "loss": 2.4362, "step": 976000 }, { "epoch": 2.75, "eval_loss": 2.2874860763549805, "eval_runtime": 352.7279, "eval_samples_per_second": 847.324, "eval_steps_per_second": 52.959, "step": 976000 }, { "epoch": 2.77, "eval_loss": 2.289992570877075, "eval_runtime": 344.5561, "eval_samples_per_second": 867.42, "eval_steps_per_second": 54.215, "step": 984000 }, { "epoch": 2.8, "learning_rate": 2.405333333333333e-07, "loss": 2.4457, "step": 992000 }, { "epoch": 2.8, "eval_loss": 2.2913424968719482, "eval_runtime": 345.1921, "eval_samples_per_second": 865.822, "eval_steps_per_second": 54.115, "step": 992000 }, { "epoch": 2.82, "eval_loss": 2.2871413230895996, "eval_runtime": 352.8496, "eval_samples_per_second": 847.032, "eval_steps_per_second": 52.94, "step": 1000000 }, { "epoch": 2.84, "learning_rate": 2.3779999999999997e-07, "loss": 2.4474, "step": 1008000 }, { "epoch": 2.84, "eval_loss": 2.287524700164795, "eval_runtime": 347.2864, "eval_samples_per_second": 860.601, "eval_steps_per_second": 53.788, "step": 1008000 }, { "epoch": 2.86, "eval_loss": 2.2902050018310547, "eval_runtime": 352.1643, "eval_samples_per_second": 848.681, "eval_steps_per_second": 53.043, "step": 1016000 }, { "epoch": 2.89, "learning_rate": 2.3506666666666668e-07, "loss": 2.444, "step": 1024000 }, { "epoch": 2.89, "eval_loss": 2.2878451347351074, "eval_runtime": 350.8228, "eval_samples_per_second": 851.926, "eval_steps_per_second": 53.246, "step": 1024000 }, { "epoch": 2.91, "eval_loss": 2.2856147289276123, "eval_runtime": 351.1127, "eval_samples_per_second": 851.222, "eval_steps_per_second": 53.202, "step": 1032000 }, { "epoch": 2.93, "learning_rate": 2.3233333333333334e-07, "loss": 2.4316, "step": 1040000 }, { "epoch": 2.93, "eval_loss": 2.290835380554199, "eval_runtime": 350.8558, "eval_samples_per_second": 851.846, "eval_steps_per_second": 53.241, "step": 1040000 }, { "epoch": 2.95, "eval_loss": 2.288860559463501, "eval_runtime": 347.8843, "eval_samples_per_second": 859.122, "eval_steps_per_second": 53.696, "step": 1048000 }, { "epoch": 2.98, "learning_rate": 2.2960000000000002e-07, "loss": 2.4388, "step": 1056000 }, { "epoch": 2.98, "eval_loss": 2.2922022342681885, "eval_runtime": 347.615, "eval_samples_per_second": 859.787, "eval_steps_per_second": 53.738, "step": 1056000 }, { "epoch": 3.0, "eval_loss": 2.2867324352264404, "eval_runtime": 344.8012, "eval_samples_per_second": 866.804, "eval_steps_per_second": 54.176, "step": 1064000 }, { "epoch": 3.02, "learning_rate": 2.2686666666666667e-07, "loss": 2.442, "step": 1072000 }, { "epoch": 3.02, "eval_loss": 2.2911500930786133, "eval_runtime": 350.8498, "eval_samples_per_second": 851.86, "eval_steps_per_second": 53.242, "step": 1072000 }, { "epoch": 3.04, "eval_loss": 2.28912615776062, "eval_runtime": 348.7498, "eval_samples_per_second": 856.99, "eval_steps_per_second": 53.563, "step": 1080000 }, { "epoch": 3.07, "learning_rate": 2.2413333333333333e-07, "loss": 2.4388, "step": 1088000 }, { "epoch": 3.07, "eval_loss": 2.2854888439178467, "eval_runtime": 342.8616, "eval_samples_per_second": 871.707, "eval_steps_per_second": 54.483, "step": 1088000 }, { "epoch": 3.09, "eval_loss": 2.2949378490448, "eval_runtime": 347.9027, "eval_samples_per_second": 859.076, "eval_steps_per_second": 53.693, "step": 1096000 }, { "epoch": 3.11, "learning_rate": 2.214e-07, "loss": 2.4296, "step": 1104000 }, { "epoch": 3.11, "eval_loss": 2.2852776050567627, "eval_runtime": 349.7013, "eval_samples_per_second": 854.658, "eval_steps_per_second": 53.417, "step": 1104000 }, { "epoch": 3.13, "eval_loss": 2.2854092121124268, "eval_runtime": 353.0802, "eval_samples_per_second": 846.479, "eval_steps_per_second": 52.906, "step": 1112000 }, { "epoch": 3.16, "learning_rate": 2.1866666666666667e-07, "loss": 2.4411, "step": 1120000 }, { "epoch": 3.16, "eval_loss": 2.2902419567108154, "eval_runtime": 354.848, "eval_samples_per_second": 842.262, "eval_steps_per_second": 52.642, "step": 1120000 }, { "epoch": 3.18, "eval_loss": 2.2901620864868164, "eval_runtime": 350.3589, "eval_samples_per_second": 853.054, "eval_steps_per_second": 53.317, "step": 1128000 }, { "epoch": 3.2, "learning_rate": 2.1593333333333332e-07, "loss": 2.4354, "step": 1136000 }, { "epoch": 3.2, "eval_loss": 2.287290573120117, "eval_runtime": 350.0116, "eval_samples_per_second": 853.9, "eval_steps_per_second": 53.37, "step": 1136000 }, { "epoch": 3.22, "eval_loss": 2.293120861053467, "eval_runtime": 349.3584, "eval_samples_per_second": 855.497, "eval_steps_per_second": 53.469, "step": 1144000 }, { "epoch": 3.25, "learning_rate": 2.132e-07, "loss": 2.4436, "step": 1152000 }, { "epoch": 3.25, "eval_loss": 2.290649175643921, "eval_runtime": 343.8183, "eval_samples_per_second": 869.282, "eval_steps_per_second": 54.331, "step": 1152000 }, { "epoch": 3.27, "eval_loss": 2.2945170402526855, "eval_runtime": 346.9787, "eval_samples_per_second": 861.364, "eval_steps_per_second": 53.836, "step": 1160000 }, { "epoch": 3.29, "learning_rate": 2.1046666666666666e-07, "loss": 2.4372, "step": 1168000 }, { "epoch": 3.29, "eval_loss": 2.289876699447632, "eval_runtime": 345.2265, "eval_samples_per_second": 865.736, "eval_steps_per_second": 54.109, "step": 1168000 }, { "epoch": 3.31, "eval_loss": 2.286865234375, "eval_runtime": 348.5092, "eval_samples_per_second": 857.581, "eval_steps_per_second": 53.6, "step": 1176000 }, { "epoch": 3.34, "learning_rate": 2.0773333333333334e-07, "loss": 2.4327, "step": 1184000 }, { "epoch": 3.34, "eval_loss": 2.2891159057617188, "eval_runtime": 348.9782, "eval_samples_per_second": 856.429, "eval_steps_per_second": 53.528, "step": 1184000 }, { "epoch": 3.36, "eval_loss": 2.293283462524414, "eval_runtime": 348.4433, "eval_samples_per_second": 857.744, "eval_steps_per_second": 53.61, "step": 1192000 }, { "epoch": 3.38, "learning_rate": 2.05e-07, "loss": 2.4387, "step": 1200000 }, { "epoch": 3.38, "eval_loss": 2.284895420074463, "eval_runtime": 356.461, "eval_samples_per_second": 838.451, "eval_steps_per_second": 52.404, "step": 1200000 }, { "epoch": 3.4, "eval_loss": 2.293365716934204, "eval_runtime": 357.7488, "eval_samples_per_second": 835.433, "eval_steps_per_second": 52.215, "step": 1208000 }, { "epoch": 3.43, "learning_rate": 2.0226666666666668e-07, "loss": 2.4433, "step": 1216000 }, { "epoch": 3.43, "eval_loss": 2.287588357925415, "eval_runtime": 357.109, "eval_samples_per_second": 836.929, "eval_steps_per_second": 52.309, "step": 1216000 }, { "epoch": 3.45, "eval_loss": 2.285968065261841, "eval_runtime": 350.8655, "eval_samples_per_second": 851.822, "eval_steps_per_second": 53.24, "step": 1224000 }, { "epoch": 3.47, "learning_rate": 1.9953333333333333e-07, "loss": 2.4396, "step": 1232000 }, { "epoch": 3.47, "eval_loss": 2.289820432662964, "eval_runtime": 349.7393, "eval_samples_per_second": 854.565, "eval_steps_per_second": 53.411, "step": 1232000 }, { "epoch": 3.49, "eval_loss": 2.2829771041870117, "eval_runtime": 350.3367, "eval_samples_per_second": 853.108, "eval_steps_per_second": 53.32, "step": 1240000 }, { "epoch": 3.52, "learning_rate": 1.968e-07, "loss": 2.4332, "step": 1248000 }, { "epoch": 3.52, "eval_loss": 2.2855498790740967, "eval_runtime": 345.6301, "eval_samples_per_second": 864.725, "eval_steps_per_second": 54.046, "step": 1248000 }, { "epoch": 3.54, "eval_loss": 2.292543649673462, "eval_runtime": 349.3325, "eval_samples_per_second": 855.56, "eval_steps_per_second": 53.473, "step": 1256000 }, { "epoch": 3.56, "learning_rate": 1.9406666666666667e-07, "loss": 2.4332, "step": 1264000 }, { "epoch": 3.56, "eval_loss": 2.283198118209839, "eval_runtime": 345.7713, "eval_samples_per_second": 864.372, "eval_steps_per_second": 54.024, "step": 1264000 }, { "epoch": 3.58, "eval_loss": 2.285101890563965, "eval_runtime": 349.5425, "eval_samples_per_second": 855.046, "eval_steps_per_second": 53.441, "step": 1272000 }, { "epoch": 3.61, "learning_rate": 1.9133333333333333e-07, "loss": 2.4307, "step": 1280000 }, { "epoch": 3.61, "eval_loss": 2.291198968887329, "eval_runtime": 351.2814, "eval_samples_per_second": 850.814, "eval_steps_per_second": 53.177, "step": 1280000 }, { "epoch": 3.63, "eval_loss": 2.2924153804779053, "eval_runtime": 355.1993, "eval_samples_per_second": 841.429, "eval_steps_per_second": 52.59, "step": 1288000 }, { "epoch": 3.65, "learning_rate": 1.886e-07, "loss": 2.4432, "step": 1296000 }, { "epoch": 3.65, "eval_loss": 2.2916078567504883, "eval_runtime": 355.8076, "eval_samples_per_second": 839.991, "eval_steps_per_second": 52.5, "step": 1296000 }, { "epoch": 3.67, "eval_loss": 2.289199113845825, "eval_runtime": 356.5105, "eval_samples_per_second": 838.334, "eval_steps_per_second": 52.397, "step": 1304000 }, { "epoch": 3.7, "learning_rate": 1.8586666666666666e-07, "loss": 2.4319, "step": 1312000 }, { "epoch": 3.7, "eval_loss": 2.2908453941345215, "eval_runtime": 354.6936, "eval_samples_per_second": 842.629, "eval_steps_per_second": 52.665, "step": 1312000 }, { "epoch": 3.72, "eval_loss": 2.2897725105285645, "eval_runtime": 353.5637, "eval_samples_per_second": 845.321, "eval_steps_per_second": 52.833, "step": 1320000 }, { "epoch": 3.74, "learning_rate": 1.8313333333333332e-07, "loss": 2.4394, "step": 1328000 }, { "epoch": 3.74, "eval_loss": 2.28599214553833, "eval_runtime": 349.9235, "eval_samples_per_second": 854.115, "eval_steps_per_second": 53.383, "step": 1328000 }, { "epoch": 3.76, "eval_loss": 2.2879199981689453, "eval_runtime": 353.8988, "eval_samples_per_second": 844.521, "eval_steps_per_second": 52.783, "step": 1336000 }, { "epoch": 3.79, "learning_rate": 1.804e-07, "loss": 2.4462, "step": 1344000 }, { "epoch": 3.79, "eval_loss": 2.2865374088287354, "eval_runtime": 349.6059, "eval_samples_per_second": 854.891, "eval_steps_per_second": 53.432, "step": 1344000 }, { "epoch": 3.81, "eval_loss": 2.284397602081299, "eval_runtime": 350.054, "eval_samples_per_second": 853.797, "eval_steps_per_second": 53.363, "step": 1352000 }, { "epoch": 3.83, "learning_rate": 1.7766666666666666e-07, "loss": 2.4373, "step": 1360000 }, { "epoch": 3.83, "eval_loss": 2.293339252471924, "eval_runtime": 349.6245, "eval_samples_per_second": 854.846, "eval_steps_per_second": 53.429, "step": 1360000 }, { "epoch": 3.85, "eval_loss": 2.2876994609832764, "eval_runtime": 349.2234, "eval_samples_per_second": 855.827, "eval_steps_per_second": 53.49, "step": 1368000 }, { "epoch": 3.88, "learning_rate": 1.7493333333333334e-07, "loss": 2.4436, "step": 1376000 }, { "epoch": 3.88, "eval_loss": 2.29374098777771, "eval_runtime": 359.6908, "eval_samples_per_second": 830.922, "eval_steps_per_second": 51.933, "step": 1376000 }, { "epoch": 3.9, "eval_loss": 2.2901968955993652, "eval_runtime": 356.9892, "eval_samples_per_second": 837.21, "eval_steps_per_second": 52.327, "step": 1384000 }, { "epoch": 3.92, "learning_rate": 1.722e-07, "loss": 2.4387, "step": 1392000 }, { "epoch": 3.92, "eval_loss": 2.286952495574951, "eval_runtime": 354.6402, "eval_samples_per_second": 842.756, "eval_steps_per_second": 52.673, "step": 1392000 }, { "epoch": 3.94, "eval_loss": 2.2822632789611816, "eval_runtime": 357.4743, "eval_samples_per_second": 836.074, "eval_steps_per_second": 52.256, "step": 1400000 }, { "epoch": 3.97, "learning_rate": 1.6946666666666668e-07, "loss": 2.4384, "step": 1408000 }, { "epoch": 3.97, "eval_loss": 2.2899303436279297, "eval_runtime": 354.468, "eval_samples_per_second": 843.165, "eval_steps_per_second": 52.699, "step": 1408000 }, { "epoch": 3.99, "eval_loss": 2.286475658416748, "eval_runtime": 351.3521, "eval_samples_per_second": 850.642, "eval_steps_per_second": 53.166, "step": 1416000 }, { "epoch": 4.01, "learning_rate": 1.6673333333333333e-07, "loss": 2.4389, "step": 1424000 }, { "epoch": 4.01, "eval_loss": 2.285567283630371, "eval_runtime": 347.7423, "eval_samples_per_second": 859.473, "eval_steps_per_second": 53.718, "step": 1424000 }, { "epoch": 4.03, "eval_loss": 2.291102170944214, "eval_runtime": 351.9607, "eval_samples_per_second": 849.172, "eval_steps_per_second": 53.074, "step": 1432000 }, { "epoch": 4.06, "learning_rate": 1.64e-07, "loss": 2.4408, "step": 1440000 }, { "epoch": 4.06, "eval_loss": 2.2906458377838135, "eval_runtime": 351.8929, "eval_samples_per_second": 849.335, "eval_steps_per_second": 53.084, "step": 1440000 }, { "epoch": 4.08, "eval_loss": 2.2860496044158936, "eval_runtime": 346.7014, "eval_samples_per_second": 862.053, "eval_steps_per_second": 53.879, "step": 1448000 }, { "epoch": 4.1, "learning_rate": 1.6126666666666667e-07, "loss": 2.4424, "step": 1456000 }, { "epoch": 4.1, "eval_loss": 2.281554937362671, "eval_runtime": 350.8804, "eval_samples_per_second": 851.786, "eval_steps_per_second": 53.238, "step": 1456000 }, { "epoch": 4.12, "eval_loss": 2.2850003242492676, "eval_runtime": 354.4123, "eval_samples_per_second": 843.297, "eval_steps_per_second": 52.707, "step": 1464000 }, { "epoch": 4.15, "learning_rate": 1.5853333333333332e-07, "loss": 2.4446, "step": 1472000 }, { "epoch": 4.15, "eval_loss": 2.2936201095581055, "eval_runtime": 356.785, "eval_samples_per_second": 837.689, "eval_steps_per_second": 52.356, "step": 1472000 }, { "epoch": 4.17, "eval_loss": 2.282886505126953, "eval_runtime": 359.985, "eval_samples_per_second": 830.243, "eval_steps_per_second": 51.891, "step": 1480000 }, { "epoch": 4.19, "learning_rate": 1.558e-07, "loss": 2.4419, "step": 1488000 }, { "epoch": 4.19, "eval_loss": 2.2871294021606445, "eval_runtime": 354.402, "eval_samples_per_second": 843.322, "eval_steps_per_second": 52.709, "step": 1488000 }, { "epoch": 4.22, "eval_loss": 2.289201259613037, "eval_runtime": 355.8187, "eval_samples_per_second": 839.964, "eval_steps_per_second": 52.499, "step": 1496000 }, { "epoch": 4.24, "learning_rate": 1.5306666666666666e-07, "loss": 2.4327, "step": 1504000 }, { "epoch": 4.24, "eval_loss": 2.282169818878174, "eval_runtime": 352.2189, "eval_samples_per_second": 848.549, "eval_steps_per_second": 53.035, "step": 1504000 }, { "epoch": 4.26, "eval_loss": 2.290008544921875, "eval_runtime": 350.005, "eval_samples_per_second": 853.916, "eval_steps_per_second": 53.371, "step": 1512000 }, { "epoch": 4.28, "learning_rate": 1.5033333333333332e-07, "loss": 2.4346, "step": 1520000 }, { "epoch": 4.28, "eval_loss": 2.2906084060668945, "eval_runtime": 349.8157, "eval_samples_per_second": 854.379, "eval_steps_per_second": 53.4, "step": 1520000 }, { "epoch": 4.31, "eval_loss": 2.2836883068084717, "eval_runtime": 348.3899, "eval_samples_per_second": 857.875, "eval_steps_per_second": 53.618, "step": 1528000 }, { "epoch": 4.33, "learning_rate": 1.476e-07, "loss": 2.4342, "step": 1536000 }, { "epoch": 4.33, "eval_loss": 2.284578323364258, "eval_runtime": 354.2741, "eval_samples_per_second": 843.627, "eval_steps_per_second": 52.728, "step": 1536000 }, { "epoch": 4.35, "eval_loss": 2.286259412765503, "eval_runtime": 354.2335, "eval_samples_per_second": 843.723, "eval_steps_per_second": 52.734, "step": 1544000 }, { "epoch": 4.37, "learning_rate": 1.4486666666666665e-07, "loss": 2.4381, "step": 1552000 }, { "epoch": 4.37, "eval_loss": 2.2939820289611816, "eval_runtime": 355.7068, "eval_samples_per_second": 840.228, "eval_steps_per_second": 52.515, "step": 1552000 }, { "epoch": 4.4, "eval_loss": 2.289985179901123, "eval_runtime": 357.8923, "eval_samples_per_second": 835.098, "eval_steps_per_second": 52.194, "step": 1560000 }, { "epoch": 4.42, "learning_rate": 1.4213333333333334e-07, "loss": 2.4445, "step": 1568000 }, { "epoch": 4.42, "eval_loss": 2.288726568222046, "eval_runtime": 357.4652, "eval_samples_per_second": 836.095, "eval_steps_per_second": 52.257, "step": 1568000 }, { "epoch": 4.44, "eval_loss": 2.2901298999786377, "eval_runtime": 359.0951, "eval_samples_per_second": 832.3, "eval_steps_per_second": 52.02, "step": 1576000 }, { "epoch": 4.46, "learning_rate": 1.3940000000000002e-07, "loss": 2.4306, "step": 1584000 }, { "epoch": 4.46, "eval_loss": 2.2832207679748535, "eval_runtime": 355.0248, "eval_samples_per_second": 841.843, "eval_steps_per_second": 52.616, "step": 1584000 }, { "epoch": 4.49, "eval_loss": 2.286226511001587, "eval_runtime": 351.3817, "eval_samples_per_second": 850.571, "eval_steps_per_second": 53.162, "step": 1592000 }, { "epoch": 4.51, "learning_rate": 1.3666666666666665e-07, "loss": 2.4348, "step": 1600000 }, { "epoch": 4.51, "eval_loss": 2.287696123123169, "eval_runtime": 352.6935, "eval_samples_per_second": 847.407, "eval_steps_per_second": 52.964, "step": 1600000 }, { "epoch": 4.53, "eval_loss": 2.283357858657837, "eval_runtime": 355.8598, "eval_samples_per_second": 839.867, "eval_steps_per_second": 52.493, "step": 1608000 }, { "epoch": 4.55, "learning_rate": 1.3393333333333333e-07, "loss": 2.4446, "step": 1616000 }, { "epoch": 4.55, "eval_loss": 2.289189100265503, "eval_runtime": 355.3202, "eval_samples_per_second": 841.143, "eval_steps_per_second": 52.572, "step": 1616000 }, { "epoch": 4.58, "eval_loss": 2.2799973487854004, "eval_runtime": 349.6872, "eval_samples_per_second": 854.692, "eval_steps_per_second": 53.419, "step": 1624000 }, { "epoch": 4.6, "learning_rate": 1.312e-07, "loss": 2.444, "step": 1632000 }, { "epoch": 4.6, "eval_loss": 2.289069652557373, "eval_runtime": 359.7218, "eval_samples_per_second": 830.85, "eval_steps_per_second": 51.929, "step": 1632000 }, { "epoch": 4.62, "eval_loss": 2.283940315246582, "eval_runtime": 359.6356, "eval_samples_per_second": 831.05, "eval_steps_per_second": 51.941, "step": 1640000 }, { "epoch": 4.64, "learning_rate": 1.2846666666666667e-07, "loss": 2.4335, "step": 1648000 }, { "epoch": 4.64, "eval_loss": 2.278726100921631, "eval_runtime": 360.7957, "eval_samples_per_second": 828.377, "eval_steps_per_second": 51.774, "step": 1648000 }, { "epoch": 4.67, "eval_loss": 2.2856054306030273, "eval_runtime": 359.4924, "eval_samples_per_second": 831.381, "eval_steps_per_second": 51.962, "step": 1656000 }, { "epoch": 4.69, "learning_rate": 1.2573333333333332e-07, "loss": 2.4369, "step": 1664000 }, { "epoch": 4.69, "eval_loss": 2.288928985595703, "eval_runtime": 361.8657, "eval_samples_per_second": 825.928, "eval_steps_per_second": 51.621, "step": 1664000 }, { "epoch": 4.71, "eval_loss": 2.2900381088256836, "eval_runtime": 359.8563, "eval_samples_per_second": 830.54, "eval_steps_per_second": 51.91, "step": 1672000 }, { "epoch": 4.73, "learning_rate": 1.23e-07, "loss": 2.4446, "step": 1680000 }, { "epoch": 4.73, "eval_loss": 2.2890822887420654, "eval_runtime": 351.5884, "eval_samples_per_second": 850.071, "eval_steps_per_second": 53.13, "step": 1680000 }, { "epoch": 4.76, "eval_loss": 2.2835495471954346, "eval_runtime": 351.6589, "eval_samples_per_second": 849.9, "eval_steps_per_second": 53.12, "step": 1688000 }, { "epoch": 4.78, "learning_rate": 1.2026666666666666e-07, "loss": 2.4334, "step": 1696000 }, { "epoch": 4.78, "eval_loss": 2.284076690673828, "eval_runtime": 354.655, "eval_samples_per_second": 842.72, "eval_steps_per_second": 52.671, "step": 1696000 }, { "epoch": 4.8, "eval_loss": 2.289497137069702, "eval_runtime": 353.0802, "eval_samples_per_second": 846.479, "eval_steps_per_second": 52.906, "step": 1704000 }, { "epoch": 4.82, "learning_rate": 1.1753333333333334e-07, "loss": 2.4426, "step": 1712000 }, { "epoch": 4.82, "eval_loss": 2.2832465171813965, "eval_runtime": 357.8036, "eval_samples_per_second": 835.305, "eval_steps_per_second": 52.207, "step": 1712000 }, { "epoch": 4.85, "eval_loss": 2.286963939666748, "eval_runtime": 357.5218, "eval_samples_per_second": 835.963, "eval_steps_per_second": 52.249, "step": 1720000 }, { "epoch": 4.87, "learning_rate": 1.1480000000000001e-07, "loss": 2.4434, "step": 1728000 }, { "epoch": 4.87, "eval_loss": 2.2818775177001953, "eval_runtime": 361.0674, "eval_samples_per_second": 827.754, "eval_steps_per_second": 51.735, "step": 1728000 }, { "epoch": 4.89, "eval_loss": 2.2896018028259277, "eval_runtime": 362.7301, "eval_samples_per_second": 823.96, "eval_steps_per_second": 51.498, "step": 1736000 }, { "epoch": 4.91, "learning_rate": 1.1206666666666666e-07, "loss": 2.4382, "step": 1744000 }, { "epoch": 4.91, "eval_loss": 2.2868669033050537, "eval_runtime": 358.283, "eval_samples_per_second": 834.187, "eval_steps_per_second": 52.138, "step": 1744000 }, { "epoch": 4.94, "eval_loss": 2.284435272216797, "eval_runtime": 355.6376, "eval_samples_per_second": 840.392, "eval_steps_per_second": 52.525, "step": 1752000 }, { "epoch": 4.96, "learning_rate": 1.0933333333333333e-07, "loss": 2.4405, "step": 1760000 }, { "epoch": 4.96, "eval_loss": 2.2819952964782715, "eval_runtime": 359.1374, "eval_samples_per_second": 832.202, "eval_steps_per_second": 52.014, "step": 1760000 }, { "epoch": 4.98, "eval_loss": 2.292201519012451, "eval_runtime": 357.4483, "eval_samples_per_second": 836.135, "eval_steps_per_second": 52.259, "step": 1768000 }, { "epoch": 5.0, "learning_rate": 1.066e-07, "loss": 2.4507, "step": 1776000 }, { "epoch": 5.0, "eval_loss": 2.2807607650756836, "eval_runtime": 355.1669, "eval_samples_per_second": 841.506, "eval_steps_per_second": 52.595, "step": 1776000 }, { "epoch": 5.03, "eval_loss": 2.2868053913116455, "eval_runtime": 352.5728, "eval_samples_per_second": 847.697, "eval_steps_per_second": 52.982, "step": 1784000 }, { "epoch": 5.05, "learning_rate": 1.0386666666666667e-07, "loss": 2.4437, "step": 1792000 }, { "epoch": 5.05, "eval_loss": 2.281529664993286, "eval_runtime": 357.7225, "eval_samples_per_second": 835.494, "eval_steps_per_second": 52.219, "step": 1792000 }, { "epoch": 5.07, "eval_loss": 2.2889392375946045, "eval_runtime": 364.19, "eval_samples_per_second": 820.657, "eval_steps_per_second": 51.292, "step": 1800000 }, { "epoch": 5.09, "learning_rate": 1.0113333333333334e-07, "loss": 2.4373, "step": 1808000 }, { "epoch": 5.09, "eval_loss": 2.279672384262085, "eval_runtime": 361.6551, "eval_samples_per_second": 826.409, "eval_steps_per_second": 51.651, "step": 1808000 }, { "epoch": 5.12, "eval_loss": 2.2881906032562256, "eval_runtime": 357.9221, "eval_samples_per_second": 835.028, "eval_steps_per_second": 52.19, "step": 1816000 }, { "epoch": 5.14, "learning_rate": 9.84e-08, "loss": 2.4368, "step": 1824000 }, { "epoch": 5.14, "eval_loss": 2.2879436016082764, "eval_runtime": 359.3013, "eval_samples_per_second": 831.823, "eval_steps_per_second": 51.99, "step": 1824000 }, { "epoch": 5.16, "eval_loss": 2.2829103469848633, "eval_runtime": 358.506, "eval_samples_per_second": 833.668, "eval_steps_per_second": 52.105, "step": 1832000 }, { "epoch": 5.18, "learning_rate": 9.566666666666666e-08, "loss": 2.4398, "step": 1840000 }, { "epoch": 5.18, "eval_loss": 2.2866697311401367, "eval_runtime": 356.6586, "eval_samples_per_second": 837.986, "eval_steps_per_second": 52.375, "step": 1840000 }, { "epoch": 5.21, "eval_loss": 2.282909393310547, "eval_runtime": 351.4508, "eval_samples_per_second": 850.404, "eval_steps_per_second": 53.151, "step": 1848000 }, { "epoch": 5.23, "learning_rate": 9.293333333333333e-08, "loss": 2.4469, "step": 1856000 }, { "epoch": 5.23, "eval_loss": 2.2845985889434814, "eval_runtime": 349.6059, "eval_samples_per_second": 854.891, "eval_steps_per_second": 53.432, "step": 1856000 }, { "epoch": 5.25, "eval_loss": 2.28393292427063, "eval_runtime": 358.3641, "eval_samples_per_second": 833.998, "eval_steps_per_second": 52.126, "step": 1864000 }, { "epoch": 5.27, "learning_rate": 9.02e-08, "loss": 2.4457, "step": 1872000 }, { "epoch": 5.27, "eval_loss": 2.2879955768585205, "eval_runtime": 356.0302, "eval_samples_per_second": 839.465, "eval_steps_per_second": 52.467, "step": 1872000 }, { "epoch": 5.3, "eval_loss": 2.2848622798919678, "eval_runtime": 357.091, "eval_samples_per_second": 836.972, "eval_steps_per_second": 52.312, "step": 1880000 }, { "epoch": 5.32, "learning_rate": 8.746666666666667e-08, "loss": 2.4444, "step": 1888000 }, { "epoch": 5.32, "eval_loss": 2.2838330268859863, "eval_runtime": 361.7799, "eval_samples_per_second": 826.124, "eval_steps_per_second": 51.634, "step": 1888000 }, { "epoch": 5.34, "eval_loss": 2.2800259590148926, "eval_runtime": 359.7779, "eval_samples_per_second": 830.721, "eval_steps_per_second": 51.921, "step": 1896000 }, { "epoch": 5.36, "learning_rate": 8.473333333333334e-08, "loss": 2.437, "step": 1904000 }, { "epoch": 5.36, "eval_loss": 2.291501760482788, "eval_runtime": 361.1309, "eval_samples_per_second": 827.608, "eval_steps_per_second": 51.726, "step": 1904000 }, { "epoch": 5.39, "eval_loss": 2.281313896179199, "eval_runtime": 355.1342, "eval_samples_per_second": 841.583, "eval_steps_per_second": 52.6, "step": 1912000 }, { "epoch": 5.41, "learning_rate": 8.2e-08, "loss": 2.4415, "step": 1920000 }, { "epoch": 5.41, "eval_loss": 2.2893078327178955, "eval_runtime": 356.2368, "eval_samples_per_second": 838.978, "eval_steps_per_second": 52.437, "step": 1920000 }, { "epoch": 5.43, "eval_loss": 2.2847840785980225, "eval_runtime": 355.1336, "eval_samples_per_second": 841.585, "eval_steps_per_second": 52.6, "step": 1928000 }, { "epoch": 5.45, "learning_rate": 7.926666666666666e-08, "loss": 2.4472, "step": 1936000 }, { "epoch": 5.45, "eval_loss": 2.2919681072235107, "eval_runtime": 350.7015, "eval_samples_per_second": 852.22, "eval_steps_per_second": 53.265, "step": 1936000 }, { "epoch": 5.48, "eval_loss": 2.275907516479492, "eval_runtime": 356.1315, "eval_samples_per_second": 839.227, "eval_steps_per_second": 52.453, "step": 1944000 }, { "epoch": 5.5, "learning_rate": 7.653333333333333e-08, "loss": 2.4418, "step": 1952000 }, { "epoch": 5.5, "eval_loss": 2.283698558807373, "eval_runtime": 358.3961, "eval_samples_per_second": 833.924, "eval_steps_per_second": 52.121, "step": 1952000 }, { "epoch": 5.52, "eval_loss": 2.2859556674957275, "eval_runtime": 363.646, "eval_samples_per_second": 821.885, "eval_steps_per_second": 51.369, "step": 1960000 }, { "epoch": 5.54, "learning_rate": 7.38e-08, "loss": 2.4406, "step": 1968000 }, { "epoch": 5.54, "eval_loss": 2.2824556827545166, "eval_runtime": 359.8407, "eval_samples_per_second": 830.576, "eval_steps_per_second": 51.912, "step": 1968000 }, { "epoch": 5.57, "eval_loss": 2.279388666152954, "eval_runtime": 360.1131, "eval_samples_per_second": 829.947, "eval_steps_per_second": 51.873, "step": 1976000 }, { "epoch": 5.59, "learning_rate": 7.106666666666667e-08, "loss": 2.4359, "step": 1984000 }, { "epoch": 5.59, "eval_loss": 2.277308464050293, "eval_runtime": 357.416, "eval_samples_per_second": 836.211, "eval_steps_per_second": 52.264, "step": 1984000 }, { "epoch": 5.61, "eval_loss": 2.287609100341797, "eval_runtime": 354.1483, "eval_samples_per_second": 843.926, "eval_steps_per_second": 52.746, "step": 1992000 }, { "epoch": 5.64, "learning_rate": 6.833333333333332e-08, "loss": 2.4416, "step": 2000000 }, { "epoch": 5.64, "eval_loss": 2.2792654037475586, "eval_runtime": 356.1929, "eval_samples_per_second": 839.082, "eval_steps_per_second": 52.443, "step": 2000000 }, { "epoch": 5.66, "eval_loss": 2.281383514404297, "eval_runtime": 351.3355, "eval_samples_per_second": 850.683, "eval_steps_per_second": 53.169, "step": 2008000 }, { "epoch": 5.68, "learning_rate": 6.56e-08, "loss": 2.4327, "step": 2016000 }, { "epoch": 5.68, "eval_loss": 2.2865149974823, "eval_runtime": 353.5028, "eval_samples_per_second": 845.467, "eval_steps_per_second": 52.843, "step": 2016000 }, { "epoch": 5.7, "eval_loss": 2.290292263031006, "eval_runtime": 361.761, "eval_samples_per_second": 826.167, "eval_steps_per_second": 51.636, "step": 2024000 }, { "epoch": 5.73, "learning_rate": 6.286666666666666e-08, "loss": 2.4395, "step": 2032000 }, { "epoch": 5.73, "eval_loss": 2.2849538326263428, "eval_runtime": 361.6836, "eval_samples_per_second": 826.344, "eval_steps_per_second": 51.647, "step": 2032000 }, { "epoch": 5.75, "eval_loss": 2.283485174179077, "eval_runtime": 364.0666, "eval_samples_per_second": 820.935, "eval_steps_per_second": 51.309, "step": 2040000 }, { "epoch": 5.77, "learning_rate": 6.013333333333333e-08, "loss": 2.4379, "step": 2048000 }, { "epoch": 5.77, "eval_loss": 2.283696413040161, "eval_runtime": 362.2372, "eval_samples_per_second": 825.081, "eval_steps_per_second": 51.568, "step": 2048000 }, { "epoch": 5.79, "eval_loss": 2.2832694053649902, "eval_runtime": 362.6288, "eval_samples_per_second": 824.19, "eval_steps_per_second": 51.513, "step": 2056000 }, { "epoch": 5.82, "learning_rate": 5.7400000000000004e-08, "loss": 2.4471, "step": 2064000 }, { "epoch": 5.82, "eval_loss": 2.285653591156006, "eval_runtime": 357.4355, "eval_samples_per_second": 836.165, "eval_steps_per_second": 52.261, "step": 2064000 }, { "epoch": 5.84, "eval_loss": 2.286252498626709, "eval_runtime": 354.5345, "eval_samples_per_second": 843.007, "eval_steps_per_second": 52.689, "step": 2072000 }, { "epoch": 5.86, "learning_rate": 5.4666666666666666e-08, "loss": 2.4443, "step": 2080000 }, { "epoch": 5.86, "eval_loss": 2.288219690322876, "eval_runtime": 355.482, "eval_samples_per_second": 840.76, "eval_steps_per_second": 52.548, "step": 2080000 }, { "epoch": 5.88, "eval_loss": 2.284872531890869, "eval_runtime": 352.94, "eval_samples_per_second": 846.815, "eval_steps_per_second": 52.927, "step": 2088000 }, { "epoch": 5.91, "learning_rate": 5.1933333333333335e-08, "loss": 2.4406, "step": 2096000 }, { "epoch": 5.91, "eval_loss": 2.288465738296509, "eval_runtime": 356.0, "eval_samples_per_second": 839.536, "eval_steps_per_second": 52.472, "step": 2096000 }, { "epoch": 5.93, "eval_loss": 2.285224437713623, "eval_runtime": 356.2606, "eval_samples_per_second": 838.922, "eval_steps_per_second": 52.434, "step": 2104000 }, { "epoch": 5.95, "learning_rate": 4.92e-08, "loss": 2.4502, "step": 2112000 }, { "epoch": 5.95, "eval_loss": 2.2898335456848145, "eval_runtime": 359.0245, "eval_samples_per_second": 832.464, "eval_steps_per_second": 52.03, "step": 2112000 }, { "epoch": 5.97, "eval_loss": 2.292398691177368, "eval_runtime": 368.3779, "eval_samples_per_second": 811.327, "eval_steps_per_second": 50.709, "step": 2120000 }, { "epoch": 6.0, "learning_rate": 4.6466666666666666e-08, "loss": 2.4356, "step": 2128000 }, { "epoch": 6.0, "eval_loss": 2.288630485534668, "eval_runtime": 363.4772, "eval_samples_per_second": 822.266, "eval_steps_per_second": 51.392, "step": 2128000 }, { "epoch": 6.02, "eval_loss": 2.288259744644165, "eval_runtime": 359.5292, "eval_samples_per_second": 831.295, "eval_steps_per_second": 51.957, "step": 2136000 }, { "epoch": 6.04, "learning_rate": 4.3733333333333335e-08, "loss": 2.4431, "step": 2144000 }, { "epoch": 6.04, "eval_loss": 2.2935080528259277, "eval_runtime": 359.5265, "eval_samples_per_second": 831.302, "eval_steps_per_second": 51.957, "step": 2144000 }, { "epoch": 6.06, "eval_loss": 2.2917544841766357, "eval_runtime": 359.5447, "eval_samples_per_second": 831.26, "eval_steps_per_second": 51.955, "step": 2152000 }, { "epoch": 6.09, "learning_rate": 4.1e-08, "loss": 2.4379, "step": 2160000 }, { "epoch": 6.09, "eval_loss": 2.2824108600616455, "eval_runtime": 355.9664, "eval_samples_per_second": 839.616, "eval_steps_per_second": 52.477, "step": 2160000 }, { "epoch": 6.11, "eval_loss": 2.284963607788086, "eval_runtime": 357.1233, "eval_samples_per_second": 836.896, "eval_steps_per_second": 52.307, "step": 2168000 }, { "epoch": 6.13, "learning_rate": 3.8266666666666665e-08, "loss": 2.4504, "step": 2176000 }, { "epoch": 6.13, "eval_loss": 2.2842228412628174, "eval_runtime": 362.4099, "eval_samples_per_second": 824.688, "eval_steps_per_second": 51.544, "step": 2176000 }, { "epoch": 6.15, "eval_loss": 2.2891006469726562, "eval_runtime": 361.5949, "eval_samples_per_second": 826.546, "eval_steps_per_second": 51.66, "step": 2184000 }, { "epoch": 6.18, "learning_rate": 3.5533333333333334e-08, "loss": 2.4352, "step": 2192000 }, { "epoch": 6.18, "eval_loss": 2.283393144607544, "eval_runtime": 363.5932, "eval_samples_per_second": 822.004, "eval_steps_per_second": 51.376, "step": 2192000 }, { "epoch": 6.2, "eval_loss": 2.287693500518799, "eval_runtime": 361.5904, "eval_samples_per_second": 826.557, "eval_steps_per_second": 51.661, "step": 2200000 }, { "epoch": 6.22, "learning_rate": 3.28e-08, "loss": 2.4385, "step": 2208000 }, { "epoch": 6.22, "eval_loss": 2.2836203575134277, "eval_runtime": 366.14, "eval_samples_per_second": 816.286, "eval_steps_per_second": 51.019, "step": 2208000 }, { "epoch": 6.24, "eval_loss": 2.2922732830047607, "eval_runtime": 362.88, "eval_samples_per_second": 823.619, "eval_steps_per_second": 51.477, "step": 2216000 }, { "epoch": 6.27, "learning_rate": 3.0066666666666665e-08, "loss": 2.4401, "step": 2224000 }, { "epoch": 6.27, "eval_loss": 2.2884254455566406, "eval_runtime": 358.59, "eval_samples_per_second": 833.473, "eval_steps_per_second": 52.093, "step": 2224000 }, { "epoch": 6.29, "eval_loss": 2.287552833557129, "eval_runtime": 356.8763, "eval_samples_per_second": 837.475, "eval_steps_per_second": 52.343, "step": 2232000 }, { "epoch": 6.31, "learning_rate": 2.7333333333333333e-08, "loss": 2.4396, "step": 2240000 }, { "epoch": 6.31, "eval_loss": 2.295525550842285, "eval_runtime": 356.3763, "eval_samples_per_second": 838.65, "eval_steps_per_second": 52.417, "step": 2240000 }, { "epoch": 6.33, "eval_loss": 2.284346103668213, "eval_runtime": 363.4862, "eval_samples_per_second": 822.246, "eval_steps_per_second": 51.391, "step": 2248000 }, { "epoch": 6.36, "learning_rate": 2.46e-08, "loss": 2.4384, "step": 2256000 }, { "epoch": 6.36, "eval_loss": 2.2884328365325928, "eval_runtime": 364.0629, "eval_samples_per_second": 820.943, "eval_steps_per_second": 51.31, "step": 2256000 }, { "epoch": 6.38, "eval_loss": 2.290330648422241, "eval_runtime": 363.2214, "eval_samples_per_second": 822.845, "eval_steps_per_second": 51.429, "step": 2264000 }, { "epoch": 6.4, "learning_rate": 2.1866666666666667e-08, "loss": 2.4365, "step": 2272000 }, { "epoch": 6.4, "eval_loss": 2.284994125366211, "eval_runtime": 367.1967, "eval_samples_per_second": 813.937, "eval_steps_per_second": 50.872, "step": 2272000 }, { "epoch": 6.42, "eval_loss": 2.2877182960510254, "eval_runtime": 366.6261, "eval_samples_per_second": 815.204, "eval_steps_per_second": 50.951, "step": 2280000 }, { "epoch": 6.45, "learning_rate": 1.9133333333333333e-08, "loss": 2.4361, "step": 2288000 }, { "epoch": 6.45, "eval_loss": 2.288691759109497, "eval_runtime": 359.7439, "eval_samples_per_second": 830.799, "eval_steps_per_second": 51.926, "step": 2288000 }, { "epoch": 6.47, "eval_loss": 2.2872354984283447, "eval_runtime": 358.3927, "eval_samples_per_second": 833.932, "eval_steps_per_second": 52.122, "step": 2296000 }, { "epoch": 6.49, "learning_rate": 1.64e-08, "loss": 2.4409, "step": 2304000 }, { "epoch": 6.49, "eval_loss": 2.2850539684295654, "eval_runtime": 359.4604, "eval_samples_per_second": 831.454, "eval_steps_per_second": 51.967, "step": 2304000 }, { "epoch": 6.51, "eval_loss": 2.2846696376800537, "eval_runtime": 359.0236, "eval_samples_per_second": 832.466, "eval_steps_per_second": 52.03, "step": 2312000 }, { "epoch": 6.54, "learning_rate": 1.3666666666666667e-08, "loss": 2.4423, "step": 2320000 }, { "epoch": 6.54, "eval_loss": 2.2844512462615967, "eval_runtime": 355.6441, "eval_samples_per_second": 840.377, "eval_steps_per_second": 52.524, "step": 2320000 }, { "epoch": 6.56, "eval_loss": 2.2848641872406006, "eval_runtime": 361.3595, "eval_samples_per_second": 827.085, "eval_steps_per_second": 51.694, "step": 2328000 }, { "epoch": 6.58, "learning_rate": 1.0933333333333334e-08, "loss": 2.4409, "step": 2336000 }, { "epoch": 6.58, "eval_loss": 2.2865257263183594, "eval_runtime": 365.7466, "eval_samples_per_second": 817.164, "eval_steps_per_second": 51.074, "step": 2336000 }, { "epoch": 6.6, "eval_loss": 2.2856223583221436, "eval_runtime": 371.4825, "eval_samples_per_second": 804.547, "eval_steps_per_second": 50.285, "step": 2344000 }, { "epoch": 6.63, "learning_rate": 8.2e-09, "loss": 2.4468, "step": 2352000 }, { "epoch": 6.63, "eval_loss": 2.2841665744781494, "eval_runtime": 362.5213, "eval_samples_per_second": 824.434, "eval_steps_per_second": 51.528, "step": 2352000 }, { "epoch": 6.65, "eval_loss": 2.2869627475738525, "eval_runtime": 362.9198, "eval_samples_per_second": 823.529, "eval_steps_per_second": 51.471, "step": 2360000 }, { "epoch": 6.67, "learning_rate": 5.466666666666667e-09, "loss": 2.4461, "step": 2368000 }, { "epoch": 6.67, "eval_loss": 2.285756826400757, "eval_runtime": 359.55, "eval_samples_per_second": 831.247, "eval_steps_per_second": 51.954, "step": 2368000 }, { "epoch": 6.69, "eval_loss": 2.2851572036743164, "eval_runtime": 360.1511, "eval_samples_per_second": 829.86, "eval_steps_per_second": 51.867, "step": 2376000 }, { "epoch": 6.72, "learning_rate": 2.7333333333333334e-09, "loss": 2.4469, "step": 2384000 }, { "epoch": 6.72, "eval_loss": 2.287076234817505, "eval_runtime": 357.6933, "eval_samples_per_second": 835.562, "eval_steps_per_second": 52.224, "step": 2384000 }, { "epoch": 6.74, "eval_loss": 2.2894530296325684, "eval_runtime": 358.3621, "eval_samples_per_second": 834.003, "eval_steps_per_second": 52.126, "step": 2392000 }, { "epoch": 6.76, "learning_rate": 0.0, "loss": 2.4413, "step": 2400000 }, { "epoch": 6.76, "eval_loss": 2.2823402881622314, "eval_runtime": 363.9779, "eval_samples_per_second": 821.135, "eval_steps_per_second": 51.322, "step": 2400000 }, { "epoch": 6.76, "step": 2400000, "total_flos": 7.627768836102533e+17, "train_loss": 2.4450033056640623, "train_runtime": 259948.3281, "train_samples_per_second": 147.722, "train_steps_per_second": 9.233 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 7, "save_steps": 32000, "total_flos": 7.627768836102533e+17, "trial_name": null, "trial_params": null }