{ "best_metric": 1.9023408889770508, "best_model_checkpoint": "./model_tweets_2020_Q1_75/checkpoint-2336000", "epoch": 19.569471624266146, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "eval_loss": 2.2396600246429443, "eval_runtime": 113.355, "eval_samples_per_second": 911.075, "eval_steps_per_second": 56.945, "step": 8000 }, { "epoch": 0.13, "learning_rate": 4.0726666666666665e-07, "loss": 2.4342, "step": 16000 }, { "epoch": 0.13, "eval_loss": 2.1510801315307617, "eval_runtime": 111.953, "eval_samples_per_second": 922.486, "eval_steps_per_second": 57.658, "step": 16000 }, { "epoch": 0.2, "eval_loss": 2.1108760833740234, "eval_runtime": 111.7901, "eval_samples_per_second": 923.83, "eval_steps_per_second": 57.742, "step": 24000 }, { "epoch": 0.26, "learning_rate": 4.0453333333333336e-07, "loss": 2.2417, "step": 32000 }, { "epoch": 0.26, "eval_loss": 2.0788779258728027, "eval_runtime": 111.8843, "eval_samples_per_second": 923.051, "eval_steps_per_second": 57.694, "step": 32000 }, { "epoch": 0.33, "eval_loss": 2.065674066543579, "eval_runtime": 111.7346, "eval_samples_per_second": 924.288, "eval_steps_per_second": 57.771, "step": 40000 }, { "epoch": 0.39, "learning_rate": 4.018e-07, "loss": 2.1852, "step": 48000 }, { "epoch": 0.39, "eval_loss": 2.0397331714630127, "eval_runtime": 114.2687, "eval_samples_per_second": 903.791, "eval_steps_per_second": 56.49, "step": 48000 }, { "epoch": 0.46, "eval_loss": 2.0303494930267334, "eval_runtime": 112.0167, "eval_samples_per_second": 921.961, "eval_steps_per_second": 57.625, "step": 56000 }, { "epoch": 0.52, "learning_rate": 3.9906666666666667e-07, "loss": 2.1511, "step": 64000 }, { "epoch": 0.52, "eval_loss": 2.024770975112915, "eval_runtime": 112.2634, "eval_samples_per_second": 919.935, "eval_steps_per_second": 57.499, "step": 64000 }, { "epoch": 0.59, "eval_loss": 2.022064685821533, "eval_runtime": 112.5031, "eval_samples_per_second": 917.975, "eval_steps_per_second": 57.376, "step": 72000 }, { "epoch": 0.65, "learning_rate": 3.963333333333333e-07, "loss": 2.1261, "step": 80000 }, { "epoch": 0.65, "eval_loss": 2.0128211975097656, "eval_runtime": 113.0139, "eval_samples_per_second": 913.826, "eval_steps_per_second": 57.117, "step": 80000 }, { "epoch": 0.72, "eval_loss": 2.0067052841186523, "eval_runtime": 112.3731, "eval_samples_per_second": 919.037, "eval_steps_per_second": 57.443, "step": 88000 }, { "epoch": 0.78, "learning_rate": 3.936e-07, "loss": 2.1179, "step": 96000 }, { "epoch": 0.78, "eval_loss": 2.003864288330078, "eval_runtime": 113.5425, "eval_samples_per_second": 909.571, "eval_steps_per_second": 56.851, "step": 96000 }, { "epoch": 0.85, "eval_loss": 1.9972714185714722, "eval_runtime": 112.6781, "eval_samples_per_second": 916.549, "eval_steps_per_second": 57.287, "step": 104000 }, { "epoch": 0.91, "learning_rate": 3.908666666666667e-07, "loss": 2.1097, "step": 112000 }, { "epoch": 0.91, "eval_loss": 1.98354971408844, "eval_runtime": 112.0904, "eval_samples_per_second": 921.355, "eval_steps_per_second": 57.587, "step": 112000 }, { "epoch": 0.98, "eval_loss": 1.9983153343200684, "eval_runtime": 112.7204, "eval_samples_per_second": 916.205, "eval_steps_per_second": 57.266, "step": 120000 }, { "epoch": 1.04, "learning_rate": 3.8813333333333334e-07, "loss": 2.1031, "step": 128000 }, { "epoch": 1.04, "eval_loss": 1.9898955821990967, "eval_runtime": 114.024, "eval_samples_per_second": 905.731, "eval_steps_per_second": 56.611, "step": 128000 }, { "epoch": 1.11, "eval_loss": 1.9755431413650513, "eval_runtime": 114.5549, "eval_samples_per_second": 901.533, "eval_steps_per_second": 56.349, "step": 136000 }, { "epoch": 1.17, "learning_rate": 3.854e-07, "loss": 2.0977, "step": 144000 }, { "epoch": 1.17, "eval_loss": 1.9855457544326782, "eval_runtime": 113.3525, "eval_samples_per_second": 911.096, "eval_steps_per_second": 56.946, "step": 144000 }, { "epoch": 1.24, "eval_loss": 1.9721323251724243, "eval_runtime": 114.0229, "eval_samples_per_second": 905.739, "eval_steps_per_second": 56.611, "step": 152000 }, { "epoch": 1.3, "learning_rate": 3.8266666666666665e-07, "loss": 2.0892, "step": 160000 }, { "epoch": 1.3, "eval_loss": 1.9813446998596191, "eval_runtime": 113.5565, "eval_samples_per_second": 909.459, "eval_steps_per_second": 56.844, "step": 160000 }, { "epoch": 1.37, "eval_loss": 1.9827616214752197, "eval_runtime": 113.4289, "eval_samples_per_second": 910.482, "eval_steps_per_second": 56.908, "step": 168000 }, { "epoch": 1.44, "learning_rate": 3.799333333333333e-07, "loss": 2.0882, "step": 176000 }, { "epoch": 1.44, "eval_loss": 1.9703537225723267, "eval_runtime": 112.6429, "eval_samples_per_second": 916.835, "eval_steps_per_second": 57.305, "step": 176000 }, { "epoch": 1.5, "eval_loss": 1.9728624820709229, "eval_runtime": 113.7442, "eval_samples_per_second": 907.958, "eval_steps_per_second": 56.75, "step": 184000 }, { "epoch": 1.57, "learning_rate": 3.772e-07, "loss": 2.0884, "step": 192000 }, { "epoch": 1.57, "eval_loss": 1.9721413850784302, "eval_runtime": 113.2577, "eval_samples_per_second": 911.859, "eval_steps_per_second": 56.994, "step": 192000 }, { "epoch": 1.63, "eval_loss": 1.9663499593734741, "eval_runtime": 115.0819, "eval_samples_per_second": 897.404, "eval_steps_per_second": 56.09, "step": 200000 }, { "epoch": 1.7, "learning_rate": 3.7446666666666667e-07, "loss": 2.0814, "step": 208000 }, { "epoch": 1.7, "eval_loss": 1.9612431526184082, "eval_runtime": 113.9384, "eval_samples_per_second": 906.411, "eval_steps_per_second": 56.653, "step": 208000 }, { "epoch": 1.76, "eval_loss": 1.971712350845337, "eval_runtime": 113.4328, "eval_samples_per_second": 910.451, "eval_steps_per_second": 56.906, "step": 216000 }, { "epoch": 1.83, "learning_rate": 3.7173333333333333e-07, "loss": 2.0806, "step": 224000 }, { "epoch": 1.83, "eval_loss": 1.959405541419983, "eval_runtime": 113.1738, "eval_samples_per_second": 912.535, "eval_steps_per_second": 57.036, "step": 224000 }, { "epoch": 1.89, "eval_loss": 1.960507869720459, "eval_runtime": 112.7387, "eval_samples_per_second": 916.056, "eval_steps_per_second": 57.256, "step": 232000 }, { "epoch": 1.96, "learning_rate": 3.69e-07, "loss": 2.0838, "step": 240000 }, { "epoch": 1.96, "eval_loss": 1.9555588960647583, "eval_runtime": 113.0742, "eval_samples_per_second": 913.338, "eval_steps_per_second": 57.086, "step": 240000 }, { "epoch": 2.02, "eval_loss": 1.955155849456787, "eval_runtime": 114.9852, "eval_samples_per_second": 898.159, "eval_steps_per_second": 56.138, "step": 248000 }, { "epoch": 2.09, "learning_rate": 3.6626666666666664e-07, "loss": 2.0711, "step": 256000 }, { "epoch": 2.09, "eval_loss": 1.965279459953308, "eval_runtime": 115.2918, "eval_samples_per_second": 895.771, "eval_steps_per_second": 55.988, "step": 256000 }, { "epoch": 2.15, "eval_loss": 1.9581040143966675, "eval_runtime": 112.9905, "eval_samples_per_second": 914.015, "eval_steps_per_second": 57.129, "step": 264000 }, { "epoch": 2.22, "learning_rate": 3.6353333333333335e-07, "loss": 2.065, "step": 272000 }, { "epoch": 2.22, "eval_loss": 1.9558922052383423, "eval_runtime": 113.2393, "eval_samples_per_second": 912.007, "eval_steps_per_second": 57.003, "step": 272000 }, { "epoch": 2.28, "eval_loss": 1.9615230560302734, "eval_runtime": 113.7321, "eval_samples_per_second": 908.055, "eval_steps_per_second": 56.756, "step": 280000 }, { "epoch": 2.35, "learning_rate": 3.608e-07, "loss": 2.0769, "step": 288000 }, { "epoch": 2.35, "eval_loss": 1.9493736028671265, "eval_runtime": 113.1393, "eval_samples_per_second": 912.813, "eval_steps_per_second": 57.054, "step": 288000 }, { "epoch": 2.41, "eval_loss": 1.9487217664718628, "eval_runtime": 112.3053, "eval_samples_per_second": 919.591, "eval_steps_per_second": 57.477, "step": 296000 }, { "epoch": 2.48, "learning_rate": 3.5806666666666666e-07, "loss": 2.0733, "step": 304000 }, { "epoch": 2.48, "eval_loss": 1.9546173810958862, "eval_runtime": 113.1729, "eval_samples_per_second": 912.542, "eval_steps_per_second": 57.037, "step": 304000 }, { "epoch": 2.54, "eval_loss": 1.9445385932922363, "eval_runtime": 112.3458, "eval_samples_per_second": 919.26, "eval_steps_per_second": 57.457, "step": 312000 }, { "epoch": 2.61, "learning_rate": 3.553333333333333e-07, "loss": 2.0675, "step": 320000 }, { "epoch": 2.61, "eval_loss": 1.9534730911254883, "eval_runtime": 113.0946, "eval_samples_per_second": 913.174, "eval_steps_per_second": 57.076, "step": 320000 }, { "epoch": 2.67, "eval_loss": 1.9580506086349487, "eval_runtime": 112.7099, "eval_samples_per_second": 916.291, "eval_steps_per_second": 57.271, "step": 328000 }, { "epoch": 2.74, "learning_rate": 3.5259999999999997e-07, "loss": 2.0599, "step": 336000 }, { "epoch": 2.74, "eval_loss": 1.9472216367721558, "eval_runtime": 113.0268, "eval_samples_per_second": 913.722, "eval_steps_per_second": 57.11, "step": 336000 }, { "epoch": 2.8, "eval_loss": 1.9545352458953857, "eval_runtime": 112.8522, "eval_samples_per_second": 915.135, "eval_steps_per_second": 57.199, "step": 344000 }, { "epoch": 2.87, "learning_rate": 3.498666666666667e-07, "loss": 2.0675, "step": 352000 }, { "epoch": 2.87, "eval_loss": 1.9551931619644165, "eval_runtime": 112.8344, "eval_samples_per_second": 915.28, "eval_steps_per_second": 57.208, "step": 352000 }, { "epoch": 2.94, "eval_loss": 1.9397163391113281, "eval_runtime": 112.7266, "eval_samples_per_second": 916.155, "eval_steps_per_second": 57.262, "step": 360000 }, { "epoch": 3.0, "learning_rate": 3.4713333333333333e-07, "loss": 2.0711, "step": 368000 }, { "epoch": 3.0, "eval_loss": 1.9475340843200684, "eval_runtime": 113.527, "eval_samples_per_second": 909.695, "eval_steps_per_second": 56.859, "step": 368000 }, { "epoch": 3.07, "eval_loss": 1.9387180805206299, "eval_runtime": 112.9318, "eval_samples_per_second": 914.49, "eval_steps_per_second": 57.158, "step": 376000 }, { "epoch": 3.13, "learning_rate": 3.444e-07, "loss": 2.0663, "step": 384000 }, { "epoch": 3.13, "eval_loss": 1.948356032371521, "eval_runtime": 113.6939, "eval_samples_per_second": 908.36, "eval_steps_per_second": 56.775, "step": 384000 }, { "epoch": 3.2, "eval_loss": 1.942366361618042, "eval_runtime": 112.9142, "eval_samples_per_second": 914.633, "eval_steps_per_second": 57.167, "step": 392000 }, { "epoch": 3.26, "learning_rate": 3.416666666666667e-07, "loss": 2.0628, "step": 400000 }, { "epoch": 3.26, "eval_loss": 1.941139578819275, "eval_runtime": 112.7091, "eval_samples_per_second": 916.297, "eval_steps_per_second": 57.271, "step": 400000 }, { "epoch": 3.33, "eval_loss": 1.940949559211731, "eval_runtime": 112.6381, "eval_samples_per_second": 916.875, "eval_steps_per_second": 57.307, "step": 408000 }, { "epoch": 3.39, "learning_rate": 3.3893333333333335e-07, "loss": 2.0651, "step": 416000 }, { "epoch": 3.39, "eval_loss": 1.9446566104888916, "eval_runtime": 112.949, "eval_samples_per_second": 914.351, "eval_steps_per_second": 57.15, "step": 416000 }, { "epoch": 3.46, "eval_loss": 1.940216064453125, "eval_runtime": 114.0012, "eval_samples_per_second": 905.912, "eval_steps_per_second": 56.622, "step": 424000 }, { "epoch": 3.52, "learning_rate": 3.3619999999999995e-07, "loss": 2.0598, "step": 432000 }, { "epoch": 3.52, "eval_loss": 1.9503767490386963, "eval_runtime": 113.8999, "eval_samples_per_second": 906.717, "eval_steps_per_second": 56.673, "step": 432000 }, { "epoch": 3.59, "eval_loss": 1.9414310455322266, "eval_runtime": 113.4917, "eval_samples_per_second": 909.978, "eval_steps_per_second": 56.876, "step": 440000 }, { "epoch": 3.65, "learning_rate": 3.3346666666666666e-07, "loss": 2.0612, "step": 448000 }, { "epoch": 3.65, "eval_loss": 1.9329679012298584, "eval_runtime": 113.8065, "eval_samples_per_second": 907.462, "eval_steps_per_second": 56.719, "step": 448000 }, { "epoch": 3.72, "eval_loss": 1.942373514175415, "eval_runtime": 112.7208, "eval_samples_per_second": 916.202, "eval_steps_per_second": 57.265, "step": 456000 }, { "epoch": 3.78, "learning_rate": 3.307333333333333e-07, "loss": 2.0653, "step": 464000 }, { "epoch": 3.78, "eval_loss": 1.930959939956665, "eval_runtime": 113.0386, "eval_samples_per_second": 913.626, "eval_steps_per_second": 57.104, "step": 464000 }, { "epoch": 3.85, "eval_loss": 1.9363598823547363, "eval_runtime": 112.8486, "eval_samples_per_second": 915.164, "eval_steps_per_second": 57.201, "step": 472000 }, { "epoch": 3.91, "learning_rate": 3.28e-07, "loss": 2.0585, "step": 480000 }, { "epoch": 3.91, "eval_loss": 1.9507150650024414, "eval_runtime": 114.0147, "eval_samples_per_second": 905.804, "eval_steps_per_second": 56.615, "step": 480000 }, { "epoch": 3.98, "eval_loss": 1.9320358037948608, "eval_runtime": 113.3729, "eval_samples_per_second": 910.932, "eval_steps_per_second": 56.936, "step": 488000 }, { "epoch": 4.04, "learning_rate": 3.252666666666667e-07, "loss": 2.0593, "step": 496000 }, { "epoch": 4.04, "eval_loss": 1.9416472911834717, "eval_runtime": 113.4866, "eval_samples_per_second": 910.02, "eval_steps_per_second": 56.879, "step": 496000 }, { "epoch": 4.11, "eval_loss": 1.934741735458374, "eval_runtime": 112.805, "eval_samples_per_second": 915.518, "eval_steps_per_second": 57.223, "step": 504000 }, { "epoch": 4.17, "learning_rate": 3.2253333333333334e-07, "loss": 2.0671, "step": 512000 }, { "epoch": 4.17, "eval_loss": 1.9390867948532104, "eval_runtime": 112.908, "eval_samples_per_second": 914.683, "eval_steps_per_second": 57.17, "step": 512000 }, { "epoch": 4.24, "eval_loss": 1.9453818798065186, "eval_runtime": 112.778, "eval_samples_per_second": 915.737, "eval_steps_per_second": 57.236, "step": 520000 }, { "epoch": 4.31, "learning_rate": 3.198e-07, "loss": 2.0552, "step": 528000 }, { "epoch": 4.31, "eval_loss": 1.9501063823699951, "eval_runtime": 112.534, "eval_samples_per_second": 917.723, "eval_steps_per_second": 57.36, "step": 528000 }, { "epoch": 4.37, "eval_loss": 1.935518741607666, "eval_runtime": 113.9789, "eval_samples_per_second": 906.089, "eval_steps_per_second": 56.633, "step": 536000 }, { "epoch": 4.44, "learning_rate": 3.1706666666666665e-07, "loss": 2.0626, "step": 544000 }, { "epoch": 4.44, "eval_loss": 1.9239717721939087, "eval_runtime": 112.9585, "eval_samples_per_second": 914.273, "eval_steps_per_second": 57.145, "step": 544000 }, { "epoch": 4.5, "eval_loss": 1.9398826360702515, "eval_runtime": 113.7219, "eval_samples_per_second": 908.137, "eval_steps_per_second": 56.761, "step": 552000 }, { "epoch": 4.57, "learning_rate": 3.1433333333333336e-07, "loss": 2.0592, "step": 560000 }, { "epoch": 4.57, "eval_loss": 1.9360318183898926, "eval_runtime": 113.1836, "eval_samples_per_second": 912.456, "eval_steps_per_second": 57.031, "step": 560000 }, { "epoch": 4.63, "eval_loss": 1.9377766847610474, "eval_runtime": 113.185, "eval_samples_per_second": 912.444, "eval_steps_per_second": 57.031, "step": 568000 }, { "epoch": 4.7, "learning_rate": 3.116e-07, "loss": 2.0584, "step": 576000 }, { "epoch": 4.7, "eval_loss": 1.9293311834335327, "eval_runtime": 113.6435, "eval_samples_per_second": 908.763, "eval_steps_per_second": 56.8, "step": 576000 }, { "epoch": 4.76, "eval_loss": 1.943053126335144, "eval_runtime": 113.0332, "eval_samples_per_second": 913.67, "eval_steps_per_second": 57.107, "step": 584000 }, { "epoch": 4.83, "learning_rate": 3.0886666666666667e-07, "loss": 2.0515, "step": 592000 }, { "epoch": 4.83, "eval_loss": 1.9324830770492554, "eval_runtime": 113.1852, "eval_samples_per_second": 912.443, "eval_steps_per_second": 57.03, "step": 592000 }, { "epoch": 4.89, "eval_loss": 1.9265968799591064, "eval_runtime": 113.5248, "eval_samples_per_second": 909.713, "eval_steps_per_second": 56.86, "step": 600000 }, { "epoch": 4.96, "learning_rate": 3.061333333333333e-07, "loss": 2.0545, "step": 608000 }, { "epoch": 4.96, "eval_loss": 1.921515941619873, "eval_runtime": 113.9086, "eval_samples_per_second": 906.648, "eval_steps_per_second": 56.668, "step": 608000 }, { "epoch": 5.02, "eval_loss": 1.924493670463562, "eval_runtime": 113.1157, "eval_samples_per_second": 913.003, "eval_steps_per_second": 57.065, "step": 616000 }, { "epoch": 5.09, "learning_rate": 3.034e-07, "loss": 2.0525, "step": 624000 }, { "epoch": 5.09, "eval_loss": 1.9372978210449219, "eval_runtime": 113.8901, "eval_samples_per_second": 906.795, "eval_steps_per_second": 56.677, "step": 624000 }, { "epoch": 5.15, "eval_loss": 1.934131145477295, "eval_runtime": 112.8536, "eval_samples_per_second": 915.124, "eval_steps_per_second": 57.198, "step": 632000 }, { "epoch": 5.22, "learning_rate": 3.0066666666666663e-07, "loss": 2.0556, "step": 640000 }, { "epoch": 5.22, "eval_loss": 1.9312899112701416, "eval_runtime": 113.0744, "eval_samples_per_second": 913.336, "eval_steps_per_second": 57.086, "step": 640000 }, { "epoch": 5.28, "eval_loss": 1.922965407371521, "eval_runtime": 113.8801, "eval_samples_per_second": 906.875, "eval_steps_per_second": 56.682, "step": 648000 }, { "epoch": 5.35, "learning_rate": 2.9793333333333334e-07, "loss": 2.0567, "step": 656000 }, { "epoch": 5.35, "eval_loss": 1.930014729499817, "eval_runtime": 112.8937, "eval_samples_per_second": 914.799, "eval_steps_per_second": 57.178, "step": 656000 }, { "epoch": 5.41, "eval_loss": 1.9337064027786255, "eval_runtime": 113.4421, "eval_samples_per_second": 910.376, "eval_steps_per_second": 56.901, "step": 664000 }, { "epoch": 5.48, "learning_rate": 2.952e-07, "loss": 2.0506, "step": 672000 }, { "epoch": 5.48, "eval_loss": 1.9316705465316772, "eval_runtime": 113.3757, "eval_samples_per_second": 910.91, "eval_steps_per_second": 56.935, "step": 672000 }, { "epoch": 5.54, "eval_loss": 1.9275363683700562, "eval_runtime": 113.4405, "eval_samples_per_second": 910.389, "eval_steps_per_second": 56.902, "step": 680000 }, { "epoch": 5.61, "learning_rate": 2.9246666666666665e-07, "loss": 2.0561, "step": 688000 }, { "epoch": 5.61, "eval_loss": 1.9376088380813599, "eval_runtime": 113.0322, "eval_samples_per_second": 913.678, "eval_steps_per_second": 57.108, "step": 688000 }, { "epoch": 5.68, "eval_loss": 1.9461050033569336, "eval_runtime": 113.738, "eval_samples_per_second": 908.008, "eval_steps_per_second": 56.753, "step": 696000 }, { "epoch": 5.74, "learning_rate": 2.897333333333333e-07, "loss": 2.0496, "step": 704000 }, { "epoch": 5.74, "eval_loss": 1.9239321947097778, "eval_runtime": 113.8527, "eval_samples_per_second": 907.093, "eval_steps_per_second": 56.696, "step": 704000 }, { "epoch": 5.81, "eval_loss": 1.9250530004501343, "eval_runtime": 113.6483, "eval_samples_per_second": 908.725, "eval_steps_per_second": 56.798, "step": 712000 }, { "epoch": 5.87, "learning_rate": 2.8699999999999996e-07, "loss": 2.045, "step": 720000 }, { "epoch": 5.87, "eval_loss": 1.9309498071670532, "eval_runtime": 114.1187, "eval_samples_per_second": 904.979, "eval_steps_per_second": 56.564, "step": 720000 }, { "epoch": 5.94, "eval_loss": 1.925881266593933, "eval_runtime": 113.5651, "eval_samples_per_second": 909.391, "eval_steps_per_second": 56.84, "step": 728000 }, { "epoch": 6.0, "learning_rate": 2.8426666666666667e-07, "loss": 2.0512, "step": 736000 }, { "epoch": 6.0, "eval_loss": 1.9236810207366943, "eval_runtime": 113.1127, "eval_samples_per_second": 913.027, "eval_steps_per_second": 57.067, "step": 736000 }, { "epoch": 6.07, "eval_loss": 1.9148136377334595, "eval_runtime": 113.2705, "eval_samples_per_second": 911.756, "eval_steps_per_second": 56.987, "step": 744000 }, { "epoch": 6.13, "learning_rate": 2.815333333333333e-07, "loss": 2.0512, "step": 752000 }, { "epoch": 6.13, "eval_loss": 1.9219812154769897, "eval_runtime": 114.1512, "eval_samples_per_second": 904.721, "eval_steps_per_second": 56.548, "step": 752000 }, { "epoch": 6.2, "eval_loss": 1.9397040605545044, "eval_runtime": 113.2005, "eval_samples_per_second": 912.319, "eval_steps_per_second": 57.023, "step": 760000 }, { "epoch": 6.26, "learning_rate": 2.7880000000000003e-07, "loss": 2.0445, "step": 768000 }, { "epoch": 6.26, "eval_loss": 1.9240758419036865, "eval_runtime": 113.1775, "eval_samples_per_second": 912.505, "eval_steps_per_second": 57.034, "step": 768000 }, { "epoch": 6.33, "eval_loss": 1.9330027103424072, "eval_runtime": 113.0566, "eval_samples_per_second": 913.481, "eval_steps_per_second": 57.095, "step": 776000 }, { "epoch": 6.39, "learning_rate": 2.7606666666666664e-07, "loss": 2.0481, "step": 784000 }, { "epoch": 6.39, "eval_loss": 1.9123960733413696, "eval_runtime": 113.268, "eval_samples_per_second": 911.775, "eval_steps_per_second": 56.989, "step": 784000 }, { "epoch": 6.46, "eval_loss": 1.9267631769180298, "eval_runtime": 113.7712, "eval_samples_per_second": 907.743, "eval_steps_per_second": 56.737, "step": 792000 }, { "epoch": 6.52, "learning_rate": 2.733333333333333e-07, "loss": 2.048, "step": 800000 }, { "epoch": 6.52, "eval_loss": 1.921078085899353, "eval_runtime": 113.8106, "eval_samples_per_second": 907.429, "eval_steps_per_second": 56.717, "step": 800000 }, { "epoch": 6.59, "eval_loss": 1.9279391765594482, "eval_runtime": 113.2864, "eval_samples_per_second": 911.627, "eval_steps_per_second": 56.979, "step": 808000 }, { "epoch": 6.65, "learning_rate": 2.706e-07, "loss": 2.0555, "step": 816000 }, { "epoch": 6.65, "eval_loss": 1.9168628454208374, "eval_runtime": 113.2491, "eval_samples_per_second": 911.928, "eval_steps_per_second": 56.998, "step": 816000 }, { "epoch": 6.72, "eval_loss": 1.922944188117981, "eval_runtime": 113.2414, "eval_samples_per_second": 911.99, "eval_steps_per_second": 57.002, "step": 824000 }, { "epoch": 6.78, "learning_rate": 2.6786666666666666e-07, "loss": 2.052, "step": 832000 }, { "epoch": 6.78, "eval_loss": 1.9253454208374023, "eval_runtime": 114.0468, "eval_samples_per_second": 905.549, "eval_steps_per_second": 56.6, "step": 832000 }, { "epoch": 6.85, "eval_loss": 1.9244375228881836, "eval_runtime": 113.2582, "eval_samples_per_second": 911.855, "eval_steps_per_second": 56.994, "step": 840000 }, { "epoch": 6.91, "learning_rate": 2.651333333333333e-07, "loss": 2.0475, "step": 848000 }, { "epoch": 6.91, "eval_loss": 1.9191728830337524, "eval_runtime": 113.2946, "eval_samples_per_second": 911.561, "eval_steps_per_second": 56.975, "step": 848000 }, { "epoch": 6.98, "eval_loss": 1.9167262315750122, "eval_runtime": 113.3788, "eval_samples_per_second": 910.884, "eval_steps_per_second": 56.933, "step": 856000 }, { "epoch": 7.05, "learning_rate": 2.624e-07, "loss": 2.0521, "step": 864000 }, { "epoch": 7.05, "eval_loss": 1.9202110767364502, "eval_runtime": 113.3134, "eval_samples_per_second": 911.41, "eval_steps_per_second": 56.966, "step": 864000 }, { "epoch": 7.11, "eval_loss": 1.9240491390228271, "eval_runtime": 113.6592, "eval_samples_per_second": 908.638, "eval_steps_per_second": 56.793, "step": 872000 }, { "epoch": 7.18, "learning_rate": 2.596666666666667e-07, "loss": 2.0516, "step": 880000 }, { "epoch": 7.18, "eval_loss": 1.923065423965454, "eval_runtime": 113.5487, "eval_samples_per_second": 909.522, "eval_steps_per_second": 56.848, "step": 880000 }, { "epoch": 7.24, "eval_loss": 1.9245978593826294, "eval_runtime": 114.3166, "eval_samples_per_second": 903.412, "eval_steps_per_second": 56.466, "step": 888000 }, { "epoch": 7.31, "learning_rate": 2.5693333333333333e-07, "loss": 2.0526, "step": 896000 }, { "epoch": 7.31, "eval_loss": 1.9173697233200073, "eval_runtime": 113.4859, "eval_samples_per_second": 910.025, "eval_steps_per_second": 56.879, "step": 896000 }, { "epoch": 7.37, "eval_loss": 1.9256370067596436, "eval_runtime": 114.6588, "eval_samples_per_second": 900.716, "eval_steps_per_second": 56.297, "step": 904000 }, { "epoch": 7.44, "learning_rate": 2.542e-07, "loss": 2.044, "step": 912000 }, { "epoch": 7.44, "eval_loss": 1.9233709573745728, "eval_runtime": 114.8311, "eval_samples_per_second": 899.364, "eval_steps_per_second": 56.213, "step": 912000 }, { "epoch": 7.5, "eval_loss": 1.9208239316940308, "eval_runtime": 114.4555, "eval_samples_per_second": 902.316, "eval_steps_per_second": 56.397, "step": 920000 }, { "epoch": 7.57, "learning_rate": 2.5146666666666664e-07, "loss": 2.0493, "step": 928000 }, { "epoch": 7.57, "eval_loss": 1.9232600927352905, "eval_runtime": 113.2901, "eval_samples_per_second": 911.598, "eval_steps_per_second": 56.978, "step": 928000 }, { "epoch": 7.63, "eval_loss": 1.918021321296692, "eval_runtime": 114.0382, "eval_samples_per_second": 905.617, "eval_steps_per_second": 56.604, "step": 936000 }, { "epoch": 7.7, "learning_rate": 2.4873333333333335e-07, "loss": 2.0535, "step": 944000 }, { "epoch": 7.7, "eval_loss": 1.919961929321289, "eval_runtime": 114.7577, "eval_samples_per_second": 899.94, "eval_steps_per_second": 56.249, "step": 944000 }, { "epoch": 7.76, "eval_loss": 1.9151924848556519, "eval_runtime": 113.434, "eval_samples_per_second": 910.441, "eval_steps_per_second": 56.905, "step": 952000 }, { "epoch": 7.83, "learning_rate": 2.46e-07, "loss": 2.0454, "step": 960000 }, { "epoch": 7.83, "eval_loss": 1.926845669746399, "eval_runtime": 114.0309, "eval_samples_per_second": 905.676, "eval_steps_per_second": 56.607, "step": 960000 }, { "epoch": 7.89, "eval_loss": 1.9206236600875854, "eval_runtime": 113.4283, "eval_samples_per_second": 910.487, "eval_steps_per_second": 56.908, "step": 968000 }, { "epoch": 7.96, "learning_rate": 2.4326666666666666e-07, "loss": 2.0428, "step": 976000 }, { "epoch": 7.96, "eval_loss": 1.9169600009918213, "eval_runtime": 113.0231, "eval_samples_per_second": 913.751, "eval_steps_per_second": 57.112, "step": 976000 }, { "epoch": 8.02, "eval_loss": 1.923983097076416, "eval_runtime": 114.0029, "eval_samples_per_second": 905.898, "eval_steps_per_second": 56.621, "step": 984000 }, { "epoch": 8.09, "learning_rate": 2.405333333333333e-07, "loss": 2.052, "step": 992000 }, { "epoch": 8.09, "eval_loss": 1.9306118488311768, "eval_runtime": 114.2567, "eval_samples_per_second": 903.886, "eval_steps_per_second": 56.496, "step": 992000 }, { "epoch": 8.15, "eval_loss": 1.9191603660583496, "eval_runtime": 114.2092, "eval_samples_per_second": 904.262, "eval_steps_per_second": 56.519, "step": 1000000 }, { "epoch": 8.22, "learning_rate": 2.3779999999999997e-07, "loss": 2.0472, "step": 1008000 }, { "epoch": 8.22, "eval_loss": 1.9313241243362427, "eval_runtime": 114.3737, "eval_samples_per_second": 902.961, "eval_steps_per_second": 56.438, "step": 1008000 }, { "epoch": 8.28, "eval_loss": 1.9238113164901733, "eval_runtime": 114.2747, "eval_samples_per_second": 903.743, "eval_steps_per_second": 56.487, "step": 1016000 }, { "epoch": 8.35, "learning_rate": 2.3506666666666668e-07, "loss": 2.0454, "step": 1024000 }, { "epoch": 8.35, "eval_loss": 1.9162325859069824, "eval_runtime": 114.6251, "eval_samples_per_second": 900.98, "eval_steps_per_second": 56.314, "step": 1024000 }, { "epoch": 8.41, "eval_loss": 1.913014531135559, "eval_runtime": 113.9073, "eval_samples_per_second": 906.658, "eval_steps_per_second": 56.669, "step": 1032000 }, { "epoch": 8.48, "learning_rate": 2.3233333333333334e-07, "loss": 2.0503, "step": 1040000 }, { "epoch": 8.48, "eval_loss": 1.9260133504867554, "eval_runtime": 114.9945, "eval_samples_per_second": 898.086, "eval_steps_per_second": 56.133, "step": 1040000 }, { "epoch": 8.55, "eval_loss": 1.9212397336959839, "eval_runtime": 113.1012, "eval_samples_per_second": 913.12, "eval_steps_per_second": 57.073, "step": 1048000 }, { "epoch": 8.61, "learning_rate": 2.2960000000000002e-07, "loss": 2.0511, "step": 1056000 }, { "epoch": 8.61, "eval_loss": 1.9114716053009033, "eval_runtime": 113.5853, "eval_samples_per_second": 909.229, "eval_steps_per_second": 56.83, "step": 1056000 }, { "epoch": 8.68, "eval_loss": 1.9123215675354004, "eval_runtime": 113.8372, "eval_samples_per_second": 907.217, "eval_steps_per_second": 56.704, "step": 1064000 }, { "epoch": 8.74, "learning_rate": 2.2686666666666667e-07, "loss": 2.049, "step": 1072000 }, { "epoch": 8.74, "eval_loss": 1.9258580207824707, "eval_runtime": 115.4682, "eval_samples_per_second": 894.402, "eval_steps_per_second": 55.903, "step": 1072000 }, { "epoch": 8.81, "eval_loss": 1.932053804397583, "eval_runtime": 113.8053, "eval_samples_per_second": 907.471, "eval_steps_per_second": 56.72, "step": 1080000 }, { "epoch": 8.87, "learning_rate": 2.2413333333333333e-07, "loss": 2.0463, "step": 1088000 }, { "epoch": 8.87, "eval_loss": 1.9148298501968384, "eval_runtime": 114.0323, "eval_samples_per_second": 905.664, "eval_steps_per_second": 56.607, "step": 1088000 }, { "epoch": 8.94, "eval_loss": 1.9144847393035889, "eval_runtime": 113.7624, "eval_samples_per_second": 907.813, "eval_steps_per_second": 56.741, "step": 1096000 }, { "epoch": 9.0, "learning_rate": 2.214e-07, "loss": 2.0494, "step": 1104000 }, { "epoch": 9.0, "eval_loss": 1.9097198247909546, "eval_runtime": 114.8448, "eval_samples_per_second": 899.257, "eval_steps_per_second": 56.206, "step": 1104000 }, { "epoch": 9.07, "eval_loss": 1.9135308265686035, "eval_runtime": 114.2552, "eval_samples_per_second": 903.898, "eval_steps_per_second": 56.496, "step": 1112000 }, { "epoch": 9.13, "learning_rate": 2.1866666666666667e-07, "loss": 2.0467, "step": 1120000 }, { "epoch": 9.13, "eval_loss": 1.9163955450057983, "eval_runtime": 114.3157, "eval_samples_per_second": 903.419, "eval_steps_per_second": 56.466, "step": 1120000 }, { "epoch": 9.2, "eval_loss": 1.9224337339401245, "eval_runtime": 113.5325, "eval_samples_per_second": 909.652, "eval_steps_per_second": 56.856, "step": 1128000 }, { "epoch": 9.26, "learning_rate": 2.1593333333333332e-07, "loss": 2.0483, "step": 1136000 }, { "epoch": 9.26, "eval_loss": 1.9134596586227417, "eval_runtime": 113.9566, "eval_samples_per_second": 906.266, "eval_steps_per_second": 56.644, "step": 1136000 }, { "epoch": 9.33, "eval_loss": 1.919922947883606, "eval_runtime": 114.7403, "eval_samples_per_second": 900.076, "eval_steps_per_second": 56.257, "step": 1144000 }, { "epoch": 9.39, "learning_rate": 2.132e-07, "loss": 2.0437, "step": 1152000 }, { "epoch": 9.39, "eval_loss": 1.9213168621063232, "eval_runtime": 113.9265, "eval_samples_per_second": 906.505, "eval_steps_per_second": 56.659, "step": 1152000 }, { "epoch": 9.46, "eval_loss": 1.9161458015441895, "eval_runtime": 114.3737, "eval_samples_per_second": 902.961, "eval_steps_per_second": 56.438, "step": 1160000 }, { "epoch": 9.52, "learning_rate": 2.1046666666666666e-07, "loss": 2.0526, "step": 1168000 }, { "epoch": 9.52, "eval_loss": 1.9148198366165161, "eval_runtime": 113.75, "eval_samples_per_second": 907.912, "eval_steps_per_second": 56.747, "step": 1168000 }, { "epoch": 9.59, "eval_loss": 1.9182627201080322, "eval_runtime": 114.4421, "eval_samples_per_second": 902.421, "eval_steps_per_second": 56.404, "step": 1176000 }, { "epoch": 9.65, "learning_rate": 2.0773333333333334e-07, "loss": 2.0408, "step": 1184000 }, { "epoch": 9.65, "eval_loss": 1.9078502655029297, "eval_runtime": 116.0253, "eval_samples_per_second": 890.108, "eval_steps_per_second": 55.634, "step": 1184000 }, { "epoch": 9.72, "eval_loss": 1.918637752532959, "eval_runtime": 116.0532, "eval_samples_per_second": 889.893, "eval_steps_per_second": 55.621, "step": 1192000 }, { "epoch": 9.78, "learning_rate": 2.05e-07, "loss": 2.0488, "step": 1200000 }, { "epoch": 9.78, "eval_loss": 1.9140615463256836, "eval_runtime": 114.9098, "eval_samples_per_second": 898.748, "eval_steps_per_second": 56.174, "step": 1200000 }, { "epoch": 9.85, "eval_loss": 1.907893419265747, "eval_runtime": 114.1171, "eval_samples_per_second": 904.991, "eval_steps_per_second": 56.565, "step": 1208000 }, { "epoch": 9.92, "learning_rate": 2.0226666666666668e-07, "loss": 2.0441, "step": 1216000 }, { "epoch": 9.92, "eval_loss": 1.9250520467758179, "eval_runtime": 113.8841, "eval_samples_per_second": 906.843, "eval_steps_per_second": 56.68, "step": 1216000 }, { "epoch": 9.98, "eval_loss": 1.9254791736602783, "eval_runtime": 114.3655, "eval_samples_per_second": 903.026, "eval_steps_per_second": 56.442, "step": 1224000 }, { "epoch": 10.05, "learning_rate": 1.9953333333333333e-07, "loss": 2.0483, "step": 1232000 }, { "epoch": 10.05, "eval_loss": 1.9108103513717651, "eval_runtime": 114.7681, "eval_samples_per_second": 899.858, "eval_steps_per_second": 56.244, "step": 1232000 }, { "epoch": 10.11, "eval_loss": 1.904534935951233, "eval_runtime": 115.2497, "eval_samples_per_second": 896.097, "eval_steps_per_second": 56.009, "step": 1240000 }, { "epoch": 10.18, "learning_rate": 1.968e-07, "loss": 2.0503, "step": 1248000 }, { "epoch": 10.18, "eval_loss": 1.9169738292694092, "eval_runtime": 115.5795, "eval_samples_per_second": 893.541, "eval_steps_per_second": 55.849, "step": 1248000 }, { "epoch": 10.24, "eval_loss": 1.9024699926376343, "eval_runtime": 115.6553, "eval_samples_per_second": 892.955, "eval_steps_per_second": 55.812, "step": 1256000 }, { "epoch": 10.31, "learning_rate": 1.9406666666666667e-07, "loss": 2.0334, "step": 1264000 }, { "epoch": 10.31, "eval_loss": 1.9198503494262695, "eval_runtime": 115.9185, "eval_samples_per_second": 890.927, "eval_steps_per_second": 55.686, "step": 1264000 }, { "epoch": 10.37, "eval_loss": 1.9187484979629517, "eval_runtime": 114.9362, "eval_samples_per_second": 898.542, "eval_steps_per_second": 56.162, "step": 1272000 }, { "epoch": 10.44, "learning_rate": 1.9133333333333333e-07, "loss": 2.0388, "step": 1280000 }, { "epoch": 10.44, "eval_loss": 1.902976393699646, "eval_runtime": 115.6842, "eval_samples_per_second": 892.732, "eval_steps_per_second": 55.798, "step": 1280000 }, { "epoch": 10.5, "eval_loss": 1.9231475591659546, "eval_runtime": 114.4315, "eval_samples_per_second": 902.505, "eval_steps_per_second": 56.409, "step": 1288000 }, { "epoch": 10.57, "learning_rate": 1.886e-07, "loss": 2.0489, "step": 1296000 }, { "epoch": 10.57, "eval_loss": 1.9084066152572632, "eval_runtime": 114.2511, "eval_samples_per_second": 903.93, "eval_steps_per_second": 56.498, "step": 1296000 }, { "epoch": 10.63, "eval_loss": 1.9184343814849854, "eval_runtime": 115.1565, "eval_samples_per_second": 896.823, "eval_steps_per_second": 56.054, "step": 1304000 }, { "epoch": 10.7, "learning_rate": 1.8586666666666666e-07, "loss": 2.0476, "step": 1312000 }, { "epoch": 10.7, "eval_loss": 1.9159677028656006, "eval_runtime": 114.4329, "eval_samples_per_second": 902.494, "eval_steps_per_second": 56.409, "step": 1312000 }, { "epoch": 10.76, "eval_loss": 1.9276108741760254, "eval_runtime": 114.2813, "eval_samples_per_second": 903.691, "eval_steps_per_second": 56.483, "step": 1320000 }, { "epoch": 10.83, "learning_rate": 1.8313333333333332e-07, "loss": 2.037, "step": 1328000 }, { "epoch": 10.83, "eval_loss": 1.9041118621826172, "eval_runtime": 114.8143, "eval_samples_per_second": 899.496, "eval_steps_per_second": 56.221, "step": 1328000 }, { "epoch": 10.89, "eval_loss": 1.9227638244628906, "eval_runtime": 115.0142, "eval_samples_per_second": 897.933, "eval_steps_per_second": 56.124, "step": 1336000 }, { "epoch": 10.96, "learning_rate": 1.804e-07, "loss": 2.0447, "step": 1344000 }, { "epoch": 10.96, "eval_loss": 1.9151026010513306, "eval_runtime": 115.3034, "eval_samples_per_second": 895.68, "eval_steps_per_second": 55.983, "step": 1344000 }, { "epoch": 11.02, "eval_loss": 1.9068875312805176, "eval_runtime": 114.8441, "eval_samples_per_second": 899.263, "eval_steps_per_second": 56.207, "step": 1352000 }, { "epoch": 11.09, "learning_rate": 1.7766666666666666e-07, "loss": 2.039, "step": 1360000 }, { "epoch": 11.09, "eval_loss": 1.9274860620498657, "eval_runtime": 116.0333, "eval_samples_per_second": 890.046, "eval_steps_per_second": 55.631, "step": 1360000 }, { "epoch": 11.15, "eval_loss": 1.9066658020019531, "eval_runtime": 115.792, "eval_samples_per_second": 891.901, "eval_steps_per_second": 55.747, "step": 1368000 }, { "epoch": 11.22, "learning_rate": 1.7493333333333334e-07, "loss": 2.0434, "step": 1376000 }, { "epoch": 11.22, "eval_loss": 1.9086920022964478, "eval_runtime": 115.0319, "eval_samples_per_second": 897.795, "eval_steps_per_second": 56.115, "step": 1376000 }, { "epoch": 11.29, "eval_loss": 1.9041084051132202, "eval_runtime": 115.5247, "eval_samples_per_second": 893.965, "eval_steps_per_second": 55.876, "step": 1384000 }, { "epoch": 11.35, "learning_rate": 1.722e-07, "loss": 2.0501, "step": 1392000 }, { "epoch": 11.35, "eval_loss": 1.9032894372940063, "eval_runtime": 115.2083, "eval_samples_per_second": 896.42, "eval_steps_per_second": 56.029, "step": 1392000 }, { "epoch": 11.42, "eval_loss": 1.9152663946151733, "eval_runtime": 115.0179, "eval_samples_per_second": 897.903, "eval_steps_per_second": 56.122, "step": 1400000 }, { "epoch": 11.48, "learning_rate": 1.6946666666666668e-07, "loss": 2.0455, "step": 1408000 }, { "epoch": 11.48, "eval_loss": 1.9173645973205566, "eval_runtime": 115.462, "eval_samples_per_second": 894.45, "eval_steps_per_second": 55.906, "step": 1408000 }, { "epoch": 11.55, "eval_loss": 1.9174134731292725, "eval_runtime": 114.825, "eval_samples_per_second": 899.412, "eval_steps_per_second": 56.216, "step": 1416000 }, { "epoch": 11.61, "learning_rate": 1.6673333333333333e-07, "loss": 2.0466, "step": 1424000 }, { "epoch": 11.61, "eval_loss": 1.9260660409927368, "eval_runtime": 114.5404, "eval_samples_per_second": 901.647, "eval_steps_per_second": 56.356, "step": 1424000 }, { "epoch": 11.68, "eval_loss": 1.9181084632873535, "eval_runtime": 115.0034, "eval_samples_per_second": 898.017, "eval_steps_per_second": 56.129, "step": 1432000 }, { "epoch": 11.74, "learning_rate": 1.64e-07, "loss": 2.0424, "step": 1440000 }, { "epoch": 11.74, "eval_loss": 1.9141377210617065, "eval_runtime": 114.2837, "eval_samples_per_second": 903.672, "eval_steps_per_second": 56.482, "step": 1440000 }, { "epoch": 11.81, "eval_loss": 1.9004480838775635, "eval_runtime": 114.3666, "eval_samples_per_second": 903.017, "eval_steps_per_second": 56.441, "step": 1448000 }, { "epoch": 11.87, "learning_rate": 1.6126666666666667e-07, "loss": 2.0441, "step": 1456000 }, { "epoch": 11.87, "eval_loss": 1.919699788093567, "eval_runtime": 115.2012, "eval_samples_per_second": 896.475, "eval_steps_per_second": 56.032, "step": 1456000 }, { "epoch": 11.94, "eval_loss": 1.9074804782867432, "eval_runtime": 114.4122, "eval_samples_per_second": 902.658, "eval_steps_per_second": 56.419, "step": 1464000 }, { "epoch": 12.0, "learning_rate": 1.5853333333333332e-07, "loss": 2.04, "step": 1472000 }, { "epoch": 12.0, "eval_loss": 1.9121414422988892, "eval_runtime": 114.3242, "eval_samples_per_second": 903.352, "eval_steps_per_second": 56.462, "step": 1472000 }, { "epoch": 12.07, "eval_loss": 1.9210638999938965, "eval_runtime": 114.213, "eval_samples_per_second": 904.231, "eval_steps_per_second": 56.517, "step": 1480000 }, { "epoch": 12.13, "learning_rate": 1.558e-07, "loss": 2.0375, "step": 1488000 }, { "epoch": 12.13, "eval_loss": 1.9110891819000244, "eval_runtime": 114.5716, "eval_samples_per_second": 901.401, "eval_steps_per_second": 56.34, "step": 1488000 }, { "epoch": 12.2, "eval_loss": 1.9187558889389038, "eval_runtime": 114.4912, "eval_samples_per_second": 902.034, "eval_steps_per_second": 56.38, "step": 1496000 }, { "epoch": 12.26, "learning_rate": 1.5306666666666666e-07, "loss": 2.0482, "step": 1504000 }, { "epoch": 12.26, "eval_loss": 1.9099169969558716, "eval_runtime": 114.3549, "eval_samples_per_second": 903.109, "eval_steps_per_second": 56.447, "step": 1504000 }, { "epoch": 12.33, "eval_loss": 1.9160943031311035, "eval_runtime": 116.1161, "eval_samples_per_second": 889.412, "eval_steps_per_second": 55.591, "step": 1512000 }, { "epoch": 12.39, "learning_rate": 1.5033333333333332e-07, "loss": 2.0432, "step": 1520000 }, { "epoch": 12.39, "eval_loss": 1.9197900295257568, "eval_runtime": 114.3248, "eval_samples_per_second": 903.347, "eval_steps_per_second": 56.462, "step": 1520000 }, { "epoch": 12.46, "eval_loss": 1.9154330492019653, "eval_runtime": 114.7975, "eval_samples_per_second": 899.627, "eval_steps_per_second": 56.229, "step": 1528000 }, { "epoch": 12.52, "learning_rate": 1.476e-07, "loss": 2.0514, "step": 1536000 }, { "epoch": 12.52, "eval_loss": 1.9058637619018555, "eval_runtime": 114.484, "eval_samples_per_second": 902.091, "eval_steps_per_second": 56.383, "step": 1536000 }, { "epoch": 12.59, "eval_loss": 1.920427680015564, "eval_runtime": 114.3098, "eval_samples_per_second": 903.466, "eval_steps_per_second": 56.469, "step": 1544000 }, { "epoch": 12.65, "learning_rate": 1.4486666666666665e-07, "loss": 2.0397, "step": 1552000 }, { "epoch": 12.65, "eval_loss": 1.9054511785507202, "eval_runtime": 114.3602, "eval_samples_per_second": 903.068, "eval_steps_per_second": 56.444, "step": 1552000 }, { "epoch": 12.72, "eval_loss": 1.896202802658081, "eval_runtime": 115.3412, "eval_samples_per_second": 895.387, "eval_steps_per_second": 55.964, "step": 1560000 }, { "epoch": 12.79, "learning_rate": 1.4213333333333334e-07, "loss": 2.0454, "step": 1568000 }, { "epoch": 12.79, "eval_loss": 1.9040275812149048, "eval_runtime": 114.7741, "eval_samples_per_second": 899.811, "eval_steps_per_second": 56.241, "step": 1568000 }, { "epoch": 12.85, "eval_loss": 1.916807770729065, "eval_runtime": 114.6956, "eval_samples_per_second": 900.427, "eval_steps_per_second": 56.279, "step": 1576000 }, { "epoch": 12.92, "learning_rate": 1.3940000000000002e-07, "loss": 2.0391, "step": 1584000 }, { "epoch": 12.92, "eval_loss": 1.9037362337112427, "eval_runtime": 114.764, "eval_samples_per_second": 899.89, "eval_steps_per_second": 56.246, "step": 1584000 }, { "epoch": 12.98, "eval_loss": 1.9186286926269531, "eval_runtime": 114.5005, "eval_samples_per_second": 901.961, "eval_steps_per_second": 56.375, "step": 1592000 }, { "epoch": 13.05, "learning_rate": 1.3666666666666665e-07, "loss": 2.0414, "step": 1600000 }, { "epoch": 13.05, "eval_loss": 1.9122203588485718, "eval_runtime": 114.4898, "eval_samples_per_second": 902.045, "eval_steps_per_second": 56.381, "step": 1600000 }, { "epoch": 13.11, "eval_loss": 1.9115867614746094, "eval_runtime": 115.3456, "eval_samples_per_second": 895.352, "eval_steps_per_second": 55.962, "step": 1608000 }, { "epoch": 13.18, "learning_rate": 1.3393333333333333e-07, "loss": 2.0431, "step": 1616000 }, { "epoch": 13.18, "eval_loss": 1.9056520462036133, "eval_runtime": 114.6382, "eval_samples_per_second": 900.878, "eval_steps_per_second": 56.308, "step": 1616000 }, { "epoch": 13.24, "eval_loss": 1.9115238189697266, "eval_runtime": 114.36, "eval_samples_per_second": 903.07, "eval_steps_per_second": 56.445, "step": 1624000 }, { "epoch": 13.31, "learning_rate": 1.312e-07, "loss": 2.0368, "step": 1632000 }, { "epoch": 13.31, "eval_loss": 1.911974549293518, "eval_runtime": 114.4999, "eval_samples_per_second": 901.966, "eval_steps_per_second": 56.376, "step": 1632000 }, { "epoch": 13.37, "eval_loss": 1.9113932847976685, "eval_runtime": 115.0038, "eval_samples_per_second": 898.014, "eval_steps_per_second": 56.129, "step": 1640000 }, { "epoch": 13.44, "learning_rate": 1.2846666666666667e-07, "loss": 2.0427, "step": 1648000 }, { "epoch": 13.44, "eval_loss": 1.9128488302230835, "eval_runtime": 115.5184, "eval_samples_per_second": 894.013, "eval_steps_per_second": 55.879, "step": 1648000 }, { "epoch": 13.5, "eval_loss": 1.9200862646102905, "eval_runtime": 115.9784, "eval_samples_per_second": 890.467, "eval_steps_per_second": 55.657, "step": 1656000 }, { "epoch": 13.57, "learning_rate": 1.2573333333333332e-07, "loss": 2.0366, "step": 1664000 }, { "epoch": 13.57, "eval_loss": 1.9053164720535278, "eval_runtime": 115.3446, "eval_samples_per_second": 895.361, "eval_steps_per_second": 55.963, "step": 1664000 }, { "epoch": 13.63, "eval_loss": 1.9077204465866089, "eval_runtime": 114.7784, "eval_samples_per_second": 899.777, "eval_steps_per_second": 56.239, "step": 1672000 }, { "epoch": 13.7, "learning_rate": 1.23e-07, "loss": 2.0423, "step": 1680000 }, { "epoch": 13.7, "eval_loss": 1.9155118465423584, "eval_runtime": 114.5734, "eval_samples_per_second": 901.387, "eval_steps_per_second": 56.339, "step": 1680000 }, { "epoch": 13.76, "eval_loss": 1.9025253057479858, "eval_runtime": 115.4889, "eval_samples_per_second": 894.242, "eval_steps_per_second": 55.893, "step": 1688000 }, { "epoch": 13.83, "learning_rate": 1.2026666666666666e-07, "loss": 2.0345, "step": 1696000 }, { "epoch": 13.83, "eval_loss": 1.911736011505127, "eval_runtime": 115.8028, "eval_samples_per_second": 891.818, "eval_steps_per_second": 55.741, "step": 1696000 }, { "epoch": 13.89, "eval_loss": 1.9146357774734497, "eval_runtime": 115.518, "eval_samples_per_second": 894.017, "eval_steps_per_second": 55.879, "step": 1704000 }, { "epoch": 13.96, "learning_rate": 1.1753333333333334e-07, "loss": 2.0523, "step": 1712000 }, { "epoch": 13.96, "eval_loss": 1.9094045162200928, "eval_runtime": 115.0577, "eval_samples_per_second": 897.593, "eval_steps_per_second": 56.102, "step": 1712000 }, { "epoch": 14.02, "eval_loss": 1.9028066396713257, "eval_runtime": 115.3284, "eval_samples_per_second": 895.486, "eval_steps_per_second": 55.971, "step": 1720000 }, { "epoch": 14.09, "learning_rate": 1.1480000000000001e-07, "loss": 2.0405, "step": 1728000 }, { "epoch": 14.09, "eval_loss": 1.9033746719360352, "eval_runtime": 116.3993, "eval_samples_per_second": 887.247, "eval_steps_per_second": 55.456, "step": 1728000 }, { "epoch": 14.16, "eval_loss": 1.903308629989624, "eval_runtime": 115.3224, "eval_samples_per_second": 895.533, "eval_steps_per_second": 55.973, "step": 1736000 }, { "epoch": 14.22, "learning_rate": 1.1206666666666666e-07, "loss": 2.0416, "step": 1744000 }, { "epoch": 14.22, "eval_loss": 1.8958499431610107, "eval_runtime": 115.3629, "eval_samples_per_second": 895.218, "eval_steps_per_second": 55.954, "step": 1744000 }, { "epoch": 14.29, "eval_loss": 1.9071624279022217, "eval_runtime": 114.3411, "eval_samples_per_second": 903.218, "eval_steps_per_second": 56.454, "step": 1752000 }, { "epoch": 14.35, "learning_rate": 1.0933333333333333e-07, "loss": 2.0453, "step": 1760000 }, { "epoch": 14.35, "eval_loss": 1.90669846534729, "eval_runtime": 114.9673, "eval_samples_per_second": 898.299, "eval_steps_per_second": 56.146, "step": 1760000 }, { "epoch": 14.42, "eval_loss": 1.9112778902053833, "eval_runtime": 115.0041, "eval_samples_per_second": 898.012, "eval_steps_per_second": 56.128, "step": 1768000 }, { "epoch": 14.48, "learning_rate": 1.066e-07, "loss": 2.0425, "step": 1776000 }, { "epoch": 14.48, "eval_loss": 1.9103703498840332, "eval_runtime": 115.7959, "eval_samples_per_second": 891.871, "eval_steps_per_second": 55.745, "step": 1776000 }, { "epoch": 14.55, "eval_loss": 1.9110212326049805, "eval_runtime": 115.8835, "eval_samples_per_second": 891.197, "eval_steps_per_second": 55.702, "step": 1784000 }, { "epoch": 14.61, "learning_rate": 1.0386666666666667e-07, "loss": 2.0404, "step": 1792000 }, { "epoch": 14.61, "eval_loss": 1.9037020206451416, "eval_runtime": 115.7942, "eval_samples_per_second": 891.884, "eval_steps_per_second": 55.745, "step": 1792000 }, { "epoch": 14.68, "eval_loss": 1.9003052711486816, "eval_runtime": 115.8783, "eval_samples_per_second": 891.236, "eval_steps_per_second": 55.705, "step": 1800000 }, { "epoch": 14.74, "learning_rate": 1.0113333333333334e-07, "loss": 2.0427, "step": 1808000 }, { "epoch": 14.74, "eval_loss": 1.911608099937439, "eval_runtime": 116.1597, "eval_samples_per_second": 889.078, "eval_steps_per_second": 55.57, "step": 1808000 }, { "epoch": 14.81, "eval_loss": 1.9105613231658936, "eval_runtime": 116.0079, "eval_samples_per_second": 890.241, "eval_steps_per_second": 55.643, "step": 1816000 }, { "epoch": 14.87, "learning_rate": 9.84e-08, "loss": 2.0368, "step": 1824000 }, { "epoch": 14.87, "eval_loss": 1.9095083475112915, "eval_runtime": 116.8018, "eval_samples_per_second": 884.19, "eval_steps_per_second": 55.265, "step": 1824000 }, { "epoch": 14.94, "eval_loss": 1.8979859352111816, "eval_runtime": 114.5417, "eval_samples_per_second": 901.637, "eval_steps_per_second": 56.355, "step": 1832000 }, { "epoch": 15.0, "learning_rate": 9.566666666666666e-08, "loss": 2.0441, "step": 1840000 }, { "epoch": 15.0, "eval_loss": 1.918567419052124, "eval_runtime": 114.9208, "eval_samples_per_second": 898.662, "eval_steps_per_second": 56.169, "step": 1840000 }, { "epoch": 15.07, "eval_loss": 1.903983473777771, "eval_runtime": 114.4082, "eval_samples_per_second": 902.689, "eval_steps_per_second": 56.421, "step": 1848000 }, { "epoch": 15.13, "learning_rate": 9.293333333333333e-08, "loss": 2.0313, "step": 1856000 }, { "epoch": 15.13, "eval_loss": 1.9185600280761719, "eval_runtime": 115.728, "eval_samples_per_second": 892.394, "eval_steps_per_second": 55.777, "step": 1856000 }, { "epoch": 15.2, "eval_loss": 1.9016015529632568, "eval_runtime": 114.5834, "eval_samples_per_second": 901.308, "eval_steps_per_second": 56.335, "step": 1864000 }, { "epoch": 15.26, "learning_rate": 9.02e-08, "loss": 2.0488, "step": 1872000 }, { "epoch": 15.26, "eval_loss": 1.9047600030899048, "eval_runtime": 115.0099, "eval_samples_per_second": 897.966, "eval_steps_per_second": 56.126, "step": 1872000 }, { "epoch": 15.33, "eval_loss": 1.899457335472107, "eval_runtime": 115.8151, "eval_samples_per_second": 891.723, "eval_steps_per_second": 55.735, "step": 1880000 }, { "epoch": 15.39, "learning_rate": 8.746666666666667e-08, "loss": 2.0361, "step": 1888000 }, { "epoch": 15.39, "eval_loss": 1.9119617938995361, "eval_runtime": 116.351, "eval_samples_per_second": 887.616, "eval_steps_per_second": 55.479, "step": 1888000 }, { "epoch": 15.46, "eval_loss": 1.907942295074463, "eval_runtime": 116.3149, "eval_samples_per_second": 887.892, "eval_steps_per_second": 55.496, "step": 1896000 }, { "epoch": 15.53, "learning_rate": 8.473333333333334e-08, "loss": 2.0449, "step": 1904000 }, { "epoch": 15.53, "eval_loss": 1.9109671115875244, "eval_runtime": 114.9775, "eval_samples_per_second": 898.219, "eval_steps_per_second": 56.141, "step": 1904000 }, { "epoch": 15.59, "eval_loss": 1.909091591835022, "eval_runtime": 115.2151, "eval_samples_per_second": 896.367, "eval_steps_per_second": 56.026, "step": 1912000 }, { "epoch": 15.66, "learning_rate": 8.2e-08, "loss": 2.043, "step": 1920000 }, { "epoch": 15.66, "eval_loss": 1.9061814546585083, "eval_runtime": 115.9012, "eval_samples_per_second": 891.06, "eval_steps_per_second": 55.694, "step": 1920000 }, { "epoch": 15.72, "eval_loss": 1.9070407152175903, "eval_runtime": 115.1701, "eval_samples_per_second": 896.717, "eval_steps_per_second": 56.048, "step": 1928000 }, { "epoch": 15.79, "learning_rate": 7.926666666666666e-08, "loss": 2.0414, "step": 1936000 }, { "epoch": 15.79, "eval_loss": 1.913381576538086, "eval_runtime": 115.5442, "eval_samples_per_second": 893.814, "eval_steps_per_second": 55.866, "step": 1936000 }, { "epoch": 15.85, "eval_loss": 1.9079296588897705, "eval_runtime": 115.3858, "eval_samples_per_second": 895.041, "eval_steps_per_second": 55.943, "step": 1944000 }, { "epoch": 15.92, "learning_rate": 7.653333333333333e-08, "loss": 2.0419, "step": 1952000 }, { "epoch": 15.92, "eval_loss": 1.9060734510421753, "eval_runtime": 115.6219, "eval_samples_per_second": 893.213, "eval_steps_per_second": 55.829, "step": 1952000 }, { "epoch": 15.98, "eval_loss": 1.9058138132095337, "eval_runtime": 115.3288, "eval_samples_per_second": 895.483, "eval_steps_per_second": 55.97, "step": 1960000 }, { "epoch": 16.05, "learning_rate": 7.38e-08, "loss": 2.0384, "step": 1968000 }, { "epoch": 16.05, "eval_loss": 1.9113844633102417, "eval_runtime": 115.36, "eval_samples_per_second": 895.241, "eval_steps_per_second": 55.955, "step": 1968000 }, { "epoch": 16.11, "eval_loss": 1.904008150100708, "eval_runtime": 115.3314, "eval_samples_per_second": 895.463, "eval_steps_per_second": 55.969, "step": 1976000 }, { "epoch": 16.18, "learning_rate": 7.106666666666667e-08, "loss": 2.0391, "step": 1984000 }, { "epoch": 16.18, "eval_loss": 1.9094995260238647, "eval_runtime": 116.0038, "eval_samples_per_second": 890.273, "eval_steps_per_second": 55.645, "step": 1984000 }, { "epoch": 16.24, "eval_loss": 1.918182134628296, "eval_runtime": 117.9985, "eval_samples_per_second": 875.223, "eval_steps_per_second": 54.704, "step": 1992000 }, { "epoch": 16.31, "learning_rate": 6.833333333333332e-08, "loss": 2.0405, "step": 2000000 }, { "epoch": 16.31, "eval_loss": 1.9111247062683105, "eval_runtime": 115.7318, "eval_samples_per_second": 892.365, "eval_steps_per_second": 55.776, "step": 2000000 }, { "epoch": 16.37, "eval_loss": 1.9056226015090942, "eval_runtime": 115.3321, "eval_samples_per_second": 895.458, "eval_steps_per_second": 55.969, "step": 2008000 }, { "epoch": 16.44, "learning_rate": 6.56e-08, "loss": 2.0404, "step": 2016000 }, { "epoch": 16.44, "eval_loss": 1.9133949279785156, "eval_runtime": 115.6177, "eval_samples_per_second": 893.245, "eval_steps_per_second": 55.831, "step": 2016000 }, { "epoch": 16.5, "eval_loss": 1.9069831371307373, "eval_runtime": 116.2332, "eval_samples_per_second": 888.516, "eval_steps_per_second": 55.535, "step": 2024000 }, { "epoch": 16.57, "learning_rate": 6.286666666666666e-08, "loss": 2.0414, "step": 2032000 }, { "epoch": 16.57, "eval_loss": 1.9084620475769043, "eval_runtime": 115.922, "eval_samples_per_second": 890.901, "eval_steps_per_second": 55.684, "step": 2032000 }, { "epoch": 16.63, "eval_loss": 1.9063148498535156, "eval_runtime": 116.5212, "eval_samples_per_second": 886.319, "eval_steps_per_second": 55.398, "step": 2040000 }, { "epoch": 16.7, "learning_rate": 6.013333333333333e-08, "loss": 2.0483, "step": 2048000 }, { "epoch": 16.7, "eval_loss": 1.9186962842941284, "eval_runtime": 116.2964, "eval_samples_per_second": 888.032, "eval_steps_per_second": 55.505, "step": 2048000 }, { "epoch": 16.76, "eval_loss": 1.9105137586593628, "eval_runtime": 115.5049, "eval_samples_per_second": 894.118, "eval_steps_per_second": 55.885, "step": 2056000 }, { "epoch": 16.83, "learning_rate": 5.7400000000000004e-08, "loss": 2.0452, "step": 2064000 }, { "epoch": 16.83, "eval_loss": 1.9117952585220337, "eval_runtime": 116.0281, "eval_samples_per_second": 890.086, "eval_steps_per_second": 55.633, "step": 2064000 }, { "epoch": 16.89, "eval_loss": 1.9091888666152954, "eval_runtime": 117.9897, "eval_samples_per_second": 875.288, "eval_steps_per_second": 54.708, "step": 2072000 }, { "epoch": 16.96, "learning_rate": 5.4666666666666666e-08, "loss": 2.0401, "step": 2080000 }, { "epoch": 16.96, "eval_loss": 1.9113515615463257, "eval_runtime": 116.2625, "eval_samples_per_second": 888.291, "eval_steps_per_second": 55.521, "step": 2080000 }, { "epoch": 17.03, "eval_loss": 1.9098221063613892, "eval_runtime": 115.7155, "eval_samples_per_second": 892.491, "eval_steps_per_second": 55.783, "step": 2088000 }, { "epoch": 17.09, "learning_rate": 5.1933333333333335e-08, "loss": 2.0353, "step": 2096000 }, { "epoch": 17.09, "eval_loss": 1.9069087505340576, "eval_runtime": 116.5348, "eval_samples_per_second": 886.216, "eval_steps_per_second": 55.391, "step": 2096000 }, { "epoch": 17.16, "eval_loss": 1.9027125835418701, "eval_runtime": 115.7058, "eval_samples_per_second": 892.566, "eval_steps_per_second": 55.788, "step": 2104000 }, { "epoch": 17.22, "learning_rate": 4.92e-08, "loss": 2.0468, "step": 2112000 }, { "epoch": 17.22, "eval_loss": 1.910232424736023, "eval_runtime": 115.6722, "eval_samples_per_second": 892.825, "eval_steps_per_second": 55.804, "step": 2112000 }, { "epoch": 17.29, "eval_loss": 1.9046436548233032, "eval_runtime": 116.4733, "eval_samples_per_second": 886.684, "eval_steps_per_second": 55.42, "step": 2120000 }, { "epoch": 17.35, "learning_rate": 4.6466666666666666e-08, "loss": 2.0448, "step": 2128000 }, { "epoch": 17.35, "eval_loss": 1.9024384021759033, "eval_runtime": 115.5381, "eval_samples_per_second": 893.861, "eval_steps_per_second": 55.869, "step": 2128000 }, { "epoch": 17.42, "eval_loss": 1.910799264907837, "eval_runtime": 116.2371, "eval_samples_per_second": 888.486, "eval_steps_per_second": 55.533, "step": 2136000 }, { "epoch": 17.48, "learning_rate": 4.3733333333333335e-08, "loss": 2.0435, "step": 2144000 }, { "epoch": 17.48, "eval_loss": 1.9122228622436523, "eval_runtime": 116.6097, "eval_samples_per_second": 885.647, "eval_steps_per_second": 55.356, "step": 2144000 }, { "epoch": 17.55, "eval_loss": 1.9043642282485962, "eval_runtime": 115.6604, "eval_samples_per_second": 892.916, "eval_steps_per_second": 55.81, "step": 2152000 }, { "epoch": 17.61, "learning_rate": 4.1e-08, "loss": 2.0421, "step": 2160000 }, { "epoch": 17.61, "eval_loss": 1.9069358110427856, "eval_runtime": 116.7738, "eval_samples_per_second": 884.402, "eval_steps_per_second": 55.278, "step": 2160000 }, { "epoch": 17.68, "eval_loss": 1.9019508361816406, "eval_runtime": 116.2558, "eval_samples_per_second": 888.342, "eval_steps_per_second": 55.524, "step": 2168000 }, { "epoch": 17.74, "learning_rate": 3.8266666666666665e-08, "loss": 2.0366, "step": 2176000 }, { "epoch": 17.74, "eval_loss": 1.9152798652648926, "eval_runtime": 116.0194, "eval_samples_per_second": 890.153, "eval_steps_per_second": 55.637, "step": 2176000 }, { "epoch": 17.81, "eval_loss": 1.9072139263153076, "eval_runtime": 115.6971, "eval_samples_per_second": 892.633, "eval_steps_per_second": 55.792, "step": 2184000 }, { "epoch": 17.87, "learning_rate": 3.5533333333333334e-08, "loss": 2.034, "step": 2192000 }, { "epoch": 17.87, "eval_loss": 1.9181559085845947, "eval_runtime": 116.0059, "eval_samples_per_second": 890.256, "eval_steps_per_second": 55.644, "step": 2192000 }, { "epoch": 17.94, "eval_loss": 1.9085872173309326, "eval_runtime": 116.4771, "eval_samples_per_second": 886.655, "eval_steps_per_second": 55.419, "step": 2200000 }, { "epoch": 18.0, "learning_rate": 3.28e-08, "loss": 2.0397, "step": 2208000 }, { "epoch": 18.0, "eval_loss": 1.9070638418197632, "eval_runtime": 116.2437, "eval_samples_per_second": 888.435, "eval_steps_per_second": 55.53, "step": 2208000 }, { "epoch": 18.07, "eval_loss": 1.9146629571914673, "eval_runtime": 117.2397, "eval_samples_per_second": 880.887, "eval_steps_per_second": 55.058, "step": 2216000 }, { "epoch": 18.13, "learning_rate": 3.0066666666666665e-08, "loss": 2.0374, "step": 2224000 }, { "epoch": 18.13, "eval_loss": 1.9199682474136353, "eval_runtime": 116.9772, "eval_samples_per_second": 882.865, "eval_steps_per_second": 55.182, "step": 2224000 }, { "epoch": 18.2, "eval_loss": 1.917845368385315, "eval_runtime": 116.926, "eval_samples_per_second": 883.251, "eval_steps_per_second": 55.206, "step": 2232000 }, { "epoch": 18.26, "learning_rate": 2.7333333333333333e-08, "loss": 2.0413, "step": 2240000 }, { "epoch": 18.26, "eval_loss": 1.9089611768722534, "eval_runtime": 116.5053, "eval_samples_per_second": 886.44, "eval_steps_per_second": 55.405, "step": 2240000 }, { "epoch": 18.33, "eval_loss": 1.9036976099014282, "eval_runtime": 115.8055, "eval_samples_per_second": 891.797, "eval_steps_per_second": 55.74, "step": 2248000 }, { "epoch": 18.4, "learning_rate": 2.46e-08, "loss": 2.047, "step": 2256000 }, { "epoch": 18.4, "eval_loss": 1.9126322269439697, "eval_runtime": 116.2519, "eval_samples_per_second": 888.372, "eval_steps_per_second": 55.526, "step": 2256000 }, { "epoch": 18.46, "eval_loss": 1.9117310047149658, "eval_runtime": 116.138, "eval_samples_per_second": 889.244, "eval_steps_per_second": 55.58, "step": 2264000 }, { "epoch": 18.53, "learning_rate": 2.1866666666666667e-08, "loss": 2.0395, "step": 2272000 }, { "epoch": 18.53, "eval_loss": 1.911027431488037, "eval_runtime": 116.0632, "eval_samples_per_second": 889.817, "eval_steps_per_second": 55.616, "step": 2272000 }, { "epoch": 18.59, "eval_loss": 1.9157801866531372, "eval_runtime": 116.0777, "eval_samples_per_second": 889.706, "eval_steps_per_second": 55.609, "step": 2280000 }, { "epoch": 18.66, "learning_rate": 1.9133333333333333e-08, "loss": 2.0447, "step": 2288000 }, { "epoch": 18.66, "eval_loss": 1.9016647338867188, "eval_runtime": 116.0944, "eval_samples_per_second": 889.578, "eval_steps_per_second": 55.601, "step": 2288000 }, { "epoch": 18.72, "eval_loss": 1.9071747064590454, "eval_runtime": 117.2998, "eval_samples_per_second": 880.436, "eval_steps_per_second": 55.03, "step": 2296000 }, { "epoch": 18.79, "learning_rate": 1.64e-08, "loss": 2.0377, "step": 2304000 }, { "epoch": 18.79, "eval_loss": 1.91358482837677, "eval_runtime": 118.15, "eval_samples_per_second": 874.101, "eval_steps_per_second": 54.634, "step": 2304000 }, { "epoch": 18.85, "eval_loss": 1.9084006547927856, "eval_runtime": 118.6409, "eval_samples_per_second": 870.484, "eval_steps_per_second": 54.408, "step": 2312000 }, { "epoch": 18.92, "learning_rate": 1.3666666666666667e-08, "loss": 2.0312, "step": 2320000 }, { "epoch": 18.92, "eval_loss": 1.909172773361206, "eval_runtime": 117.7813, "eval_samples_per_second": 876.837, "eval_steps_per_second": 54.805, "step": 2320000 }, { "epoch": 18.98, "eval_loss": 1.9103314876556396, "eval_runtime": 117.0394, "eval_samples_per_second": 882.395, "eval_steps_per_second": 55.152, "step": 2328000 }, { "epoch": 19.05, "learning_rate": 1.0933333333333334e-08, "loss": 2.0387, "step": 2336000 }, { "epoch": 19.05, "eval_loss": 1.9023408889770508, "eval_runtime": 117.1786, "eval_samples_per_second": 881.347, "eval_steps_per_second": 55.087, "step": 2336000 }, { "epoch": 19.11, "eval_loss": 1.9034806489944458, "eval_runtime": 118.479, "eval_samples_per_second": 871.674, "eval_steps_per_second": 54.482, "step": 2344000 }, { "epoch": 19.18, "learning_rate": 8.2e-09, "loss": 2.0358, "step": 2352000 }, { "epoch": 19.18, "eval_loss": 1.9131251573562622, "eval_runtime": 116.6651, "eval_samples_per_second": 885.226, "eval_steps_per_second": 55.329, "step": 2352000 }, { "epoch": 19.24, "eval_loss": 1.9065865278244019, "eval_runtime": 118.6652, "eval_samples_per_second": 870.306, "eval_steps_per_second": 54.397, "step": 2360000 }, { "epoch": 19.31, "learning_rate": 5.466666666666667e-09, "loss": 2.0402, "step": 2368000 }, { "epoch": 19.31, "eval_loss": 1.9083107709884644, "eval_runtime": 117.2736, "eval_samples_per_second": 880.633, "eval_steps_per_second": 55.042, "step": 2368000 }, { "epoch": 19.37, "eval_loss": 1.9068409204483032, "eval_runtime": 117.4261, "eval_samples_per_second": 879.49, "eval_steps_per_second": 54.971, "step": 2376000 }, { "epoch": 19.44, "learning_rate": 2.7333333333333334e-09, "loss": 2.0319, "step": 2384000 }, { "epoch": 19.44, "eval_loss": 1.9011958837509155, "eval_runtime": 117.8174, "eval_samples_per_second": 876.568, "eval_steps_per_second": 54.788, "step": 2384000 }, { "epoch": 19.5, "eval_loss": 1.927274465560913, "eval_runtime": 118.5235, "eval_samples_per_second": 871.346, "eval_steps_per_second": 54.462, "step": 2392000 }, { "epoch": 19.57, "learning_rate": 0.0, "loss": 2.0436, "step": 2400000 }, { "epoch": 19.57, "eval_loss": 1.905896782875061, "eval_runtime": 116.8523, "eval_samples_per_second": 883.808, "eval_steps_per_second": 55.241, "step": 2400000 }, { "epoch": 19.57, "step": 2400000, "total_flos": 7.485113755399533e+17, "train_loss": 2.0565961393229166, "train_runtime": 185876.2415, "train_samples_per_second": 206.589, "train_steps_per_second": 12.912 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 20, "save_steps": 32000, "total_flos": 7.485113755399533e+17, "trial_name": null, "trial_params": null }