{ "best_metric": 3.23770809173584, "best_model_checkpoint": "./model_tweets_2020_Q1_90/checkpoint-128000", "epoch": 49.171259398881354, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "eval_loss": 3.4494731426239014, "eval_runtime": 46.3964, "eval_samples_per_second": 885.888, "eval_steps_per_second": 55.371, "step": 8000 }, { "epoch": 0.33, "learning_rate": 9.939131159843243e-06, "loss": 3.5684, "step": 16000 }, { "epoch": 0.33, "eval_loss": 3.416630744934082, "eval_runtime": 46.3565, "eval_samples_per_second": 886.65, "eval_steps_per_second": 55.418, "step": 16000 }, { "epoch": 0.49, "eval_loss": 3.3847219944000244, "eval_runtime": 47.2297, "eval_samples_per_second": 870.258, "eval_steps_per_second": 54.394, "step": 24000 }, { "epoch": 0.66, "learning_rate": 9.872425581589261e-06, "loss": 3.3755, "step": 32000 }, { "epoch": 0.66, "eval_loss": 3.3664660453796387, "eval_runtime": 47.0495, "eval_samples_per_second": 873.591, "eval_steps_per_second": 54.602, "step": 32000 }, { "epoch": 0.82, "eval_loss": 3.3654134273529053, "eval_runtime": 46.3932, "eval_samples_per_second": 885.949, "eval_steps_per_second": 55.374, "step": 40000 }, { "epoch": 0.98, "learning_rate": 9.80572000333528e-06, "loss": 3.3533, "step": 48000 }, { "epoch": 0.98, "eval_loss": 3.3654167652130127, "eval_runtime": 46.5322, "eval_samples_per_second": 883.301, "eval_steps_per_second": 55.209, "step": 48000 }, { "epoch": 1.15, "eval_loss": 3.332759380340576, "eval_runtime": 46.4492, "eval_samples_per_second": 884.88, "eval_steps_per_second": 55.308, "step": 56000 }, { "epoch": 1.31, "learning_rate": 9.739014425081299e-06, "loss": 3.3014, "step": 64000 }, { "epoch": 1.31, "eval_loss": 3.3209590911865234, "eval_runtime": 45.8973, "eval_samples_per_second": 895.521, "eval_steps_per_second": 55.973, "step": 64000 }, { "epoch": 1.48, "eval_loss": 3.3491690158843994, "eval_runtime": 46.3252, "eval_samples_per_second": 887.249, "eval_steps_per_second": 55.456, "step": 72000 }, { "epoch": 1.64, "learning_rate": 9.672308846827316e-06, "loss": 3.2888, "step": 80000 }, { "epoch": 1.64, "eval_loss": 3.3213465213775635, "eval_runtime": 45.915, "eval_samples_per_second": 895.177, "eval_steps_per_second": 55.951, "step": 80000 }, { "epoch": 1.8, "eval_loss": 3.2708065509796143, "eval_runtime": 45.9723, "eval_samples_per_second": 894.061, "eval_steps_per_second": 55.882, "step": 88000 }, { "epoch": 1.97, "learning_rate": 9.605603268573334e-06, "loss": 3.2609, "step": 96000 }, { "epoch": 1.97, "eval_loss": 3.290764808654785, "eval_runtime": 46.6916, "eval_samples_per_second": 880.287, "eval_steps_per_second": 55.021, "step": 96000 }, { "epoch": 2.13, "eval_loss": 3.2766778469085693, "eval_runtime": 45.6527, "eval_samples_per_second": 900.318, "eval_steps_per_second": 56.273, "step": 104000 }, { "epoch": 2.29, "learning_rate": 9.538897690319354e-06, "loss": 3.2159, "step": 112000 }, { "epoch": 2.29, "eval_loss": 3.259241819381714, "eval_runtime": 45.9077, "eval_samples_per_second": 895.319, "eval_steps_per_second": 55.96, "step": 112000 }, { "epoch": 2.46, "eval_loss": 3.2411258220672607, "eval_runtime": 46.8974, "eval_samples_per_second": 876.424, "eval_steps_per_second": 54.779, "step": 120000 }, { "epoch": 2.62, "learning_rate": 9.472192112065373e-06, "loss": 3.2167, "step": 128000 }, { "epoch": 2.62, "eval_loss": 3.23770809173584, "eval_runtime": 46.0285, "eval_samples_per_second": 892.969, "eval_steps_per_second": 55.813, "step": 128000 }, { "epoch": 2.79, "eval_loss": 3.2485291957855225, "eval_runtime": 46.313, "eval_samples_per_second": 887.483, "eval_steps_per_second": 55.47, "step": 136000 }, { "epoch": 2.95, "learning_rate": 9.405486533811392e-06, "loss": 3.199, "step": 144000 }, { "epoch": 2.95, "eval_loss": 3.2608513832092285, "eval_runtime": 46.3737, "eval_samples_per_second": 886.322, "eval_steps_per_second": 55.398, "step": 144000 }, { "epoch": 3.11, "eval_loss": 3.2552711963653564, "eval_runtime": 45.7073, "eval_samples_per_second": 899.243, "eval_steps_per_second": 56.205, "step": 152000 }, { "epoch": 3.28, "learning_rate": 9.338780955557409e-06, "loss": 3.1905, "step": 160000 }, { "epoch": 3.28, "eval_loss": 3.2425193786621094, "eval_runtime": 46.3189, "eval_samples_per_second": 887.37, "eval_steps_per_second": 55.463, "step": 160000 }, { "epoch": 3.44, "eval_loss": 3.2421696186065674, "eval_runtime": 46.3489, "eval_samples_per_second": 886.796, "eval_steps_per_second": 55.427, "step": 168000 }, { "epoch": 3.61, "learning_rate": 9.272075377303427e-06, "loss": 3.1822, "step": 176000 }, { "epoch": 3.61, "eval_loss": 3.262392997741699, "eval_runtime": 46.6763, "eval_samples_per_second": 880.575, "eval_steps_per_second": 55.039, "step": 176000 }, { "epoch": 3.77, "eval_loss": 3.2507119178771973, "eval_runtime": 46.8277, "eval_samples_per_second": 877.728, "eval_steps_per_second": 54.861, "step": 184000 }, { "epoch": 3.93, "learning_rate": 9.205369799049446e-06, "loss": 3.1852, "step": 192000 }, { "epoch": 3.93, "eval_loss": 3.2483315467834473, "eval_runtime": 45.7607, "eval_samples_per_second": 898.195, "eval_steps_per_second": 56.14, "step": 192000 }, { "epoch": 4.1, "eval_loss": 3.251424789428711, "eval_runtime": 46.3642, "eval_samples_per_second": 886.503, "eval_steps_per_second": 55.409, "step": 200000 }, { "epoch": 4.26, "learning_rate": 9.138664220795464e-06, "loss": 3.1767, "step": 208000 }, { "epoch": 4.26, "eval_loss": 3.242562770843506, "eval_runtime": 46.886, "eval_samples_per_second": 876.637, "eval_steps_per_second": 54.792, "step": 208000 }, { "epoch": 4.43, "eval_loss": 3.234778642654419, "eval_runtime": 46.4949, "eval_samples_per_second": 884.01, "eval_steps_per_second": 55.253, "step": 216000 }, { "epoch": 4.59, "learning_rate": 9.071958642541483e-06, "loss": 3.1767, "step": 224000 }, { "epoch": 4.59, "eval_loss": 3.2734625339508057, "eval_runtime": 46.0486, "eval_samples_per_second": 892.58, "eval_steps_per_second": 55.789, "step": 224000 }, { "epoch": 4.75, "eval_loss": 3.2471694946289062, "eval_runtime": 46.5054, "eval_samples_per_second": 883.811, "eval_steps_per_second": 55.241, "step": 232000 }, { "epoch": 4.92, "learning_rate": 9.005253064287502e-06, "loss": 3.1973, "step": 240000 }, { "epoch": 4.92, "eval_loss": 3.259644031524658, "eval_runtime": 45.8405, "eval_samples_per_second": 896.631, "eval_steps_per_second": 56.042, "step": 240000 }, { "epoch": 5.08, "eval_loss": 3.2605602741241455, "eval_runtime": 45.4485, "eval_samples_per_second": 904.365, "eval_steps_per_second": 56.526, "step": 248000 }, { "epoch": 5.24, "learning_rate": 8.93854748603352e-06, "loss": 3.1781, "step": 256000 }, { "epoch": 5.24, "eval_loss": 3.281527519226074, "eval_runtime": 46.3336, "eval_samples_per_second": 887.089, "eval_steps_per_second": 55.446, "step": 256000 }, { "epoch": 5.41, "eval_loss": 3.273421049118042, "eval_runtime": 45.4558, "eval_samples_per_second": 904.218, "eval_steps_per_second": 56.516, "step": 264000 }, { "epoch": 5.57, "learning_rate": 8.871841907779539e-06, "loss": 3.1803, "step": 272000 }, { "epoch": 5.57, "eval_loss": 3.2739477157592773, "eval_runtime": 45.6455, "eval_samples_per_second": 900.462, "eval_steps_per_second": 56.282, "step": 272000 }, { "epoch": 5.74, "eval_loss": 3.2712481021881104, "eval_runtime": 46.5973, "eval_samples_per_second": 882.068, "eval_steps_per_second": 55.132, "step": 280000 }, { "epoch": 5.9, "learning_rate": 8.805136329525557e-06, "loss": 3.1989, "step": 288000 }, { "epoch": 5.9, "eval_loss": 3.273439884185791, "eval_runtime": 46.318, "eval_samples_per_second": 887.387, "eval_steps_per_second": 55.464, "step": 288000 }, { "epoch": 6.06, "eval_loss": 3.293893814086914, "eval_runtime": 45.8003, "eval_samples_per_second": 897.418, "eval_steps_per_second": 56.091, "step": 296000 }, { "epoch": 6.23, "learning_rate": 8.738430751271576e-06, "loss": 3.1929, "step": 304000 }, { "epoch": 6.23, "eval_loss": 3.288043737411499, "eval_runtime": 46.6462, "eval_samples_per_second": 881.144, "eval_steps_per_second": 55.074, "step": 304000 }, { "epoch": 6.39, "eval_loss": 3.289358139038086, "eval_runtime": 45.8512, "eval_samples_per_second": 896.422, "eval_steps_per_second": 56.029, "step": 312000 }, { "epoch": 6.56, "learning_rate": 8.671725173017595e-06, "loss": 3.2083, "step": 320000 }, { "epoch": 6.56, "eval_loss": 3.308645725250244, "eval_runtime": 46.2317, "eval_samples_per_second": 889.043, "eval_steps_per_second": 55.568, "step": 320000 }, { "epoch": 6.72, "eval_loss": 3.3066623210906982, "eval_runtime": 46.8669, "eval_samples_per_second": 876.995, "eval_steps_per_second": 54.815, "step": 328000 }, { "epoch": 6.88, "learning_rate": 8.605019594763613e-06, "loss": 3.2013, "step": 336000 }, { "epoch": 6.88, "eval_loss": 3.278655529022217, "eval_runtime": 45.904, "eval_samples_per_second": 895.391, "eval_steps_per_second": 55.965, "step": 336000 }, { "epoch": 7.05, "eval_loss": 3.3152964115142822, "eval_runtime": 46.5312, "eval_samples_per_second": 883.322, "eval_steps_per_second": 55.21, "step": 344000 }, { "epoch": 7.21, "learning_rate": 8.538314016509632e-06, "loss": 3.2111, "step": 352000 }, { "epoch": 7.21, "eval_loss": 3.3246278762817383, "eval_runtime": 46.7247, "eval_samples_per_second": 879.664, "eval_steps_per_second": 54.982, "step": 352000 }, { "epoch": 7.38, "eval_loss": 3.3322579860687256, "eval_runtime": 45.9989, "eval_samples_per_second": 893.543, "eval_steps_per_second": 55.849, "step": 360000 }, { "epoch": 7.54, "learning_rate": 8.471608438255649e-06, "loss": 3.2186, "step": 368000 }, { "epoch": 7.54, "eval_loss": 3.2938337326049805, "eval_runtime": 46.144, "eval_samples_per_second": 890.734, "eval_steps_per_second": 55.674, "step": 368000 }, { "epoch": 7.7, "eval_loss": 3.3499817848205566, "eval_runtime": 45.582, "eval_samples_per_second": 901.717, "eval_steps_per_second": 56.36, "step": 376000 }, { "epoch": 7.87, "learning_rate": 8.404902860001667e-06, "loss": 3.2268, "step": 384000 }, { "epoch": 7.87, "eval_loss": 3.3179759979248047, "eval_runtime": 45.2091, "eval_samples_per_second": 909.153, "eval_steps_per_second": 56.825, "step": 384000 }, { "epoch": 8.03, "eval_loss": 3.3171069622039795, "eval_runtime": 46.0196, "eval_samples_per_second": 893.141, "eval_steps_per_second": 55.824, "step": 392000 }, { "epoch": 8.2, "learning_rate": 8.338197281747686e-06, "loss": 3.233, "step": 400000 }, { "epoch": 8.2, "eval_loss": 3.3461642265319824, "eval_runtime": 45.5487, "eval_samples_per_second": 902.375, "eval_steps_per_second": 56.401, "step": 400000 }, { "epoch": 8.36, "eval_loss": 3.341256618499756, "eval_runtime": 45.4264, "eval_samples_per_second": 904.804, "eval_steps_per_second": 56.553, "step": 408000 }, { "epoch": 8.52, "learning_rate": 8.271491703493705e-06, "loss": 3.2432, "step": 416000 }, { "epoch": 8.52, "eval_loss": 3.328122615814209, "eval_runtime": 45.9787, "eval_samples_per_second": 893.936, "eval_steps_per_second": 55.874, "step": 416000 }, { "epoch": 8.69, "eval_loss": 3.342041492462158, "eval_runtime": 45.4274, "eval_samples_per_second": 904.784, "eval_steps_per_second": 56.552, "step": 424000 }, { "epoch": 8.85, "learning_rate": 8.204786125239725e-06, "loss": 3.2586, "step": 432000 }, { "epoch": 8.85, "eval_loss": 3.3609066009521484, "eval_runtime": 45.3913, "eval_samples_per_second": 905.504, "eval_steps_per_second": 56.597, "step": 432000 }, { "epoch": 9.01, "eval_loss": 3.352691173553467, "eval_runtime": 46.0515, "eval_samples_per_second": 892.522, "eval_steps_per_second": 55.785, "step": 440000 }, { "epoch": 9.18, "learning_rate": 8.138080546985743e-06, "loss": 3.2567, "step": 448000 }, { "epoch": 9.18, "eval_loss": 3.359393358230591, "eval_runtime": 45.57, "eval_samples_per_second": 901.953, "eval_steps_per_second": 56.375, "step": 448000 }, { "epoch": 9.34, "eval_loss": 3.3497443199157715, "eval_runtime": 45.4208, "eval_samples_per_second": 904.915, "eval_steps_per_second": 56.56, "step": 456000 }, { "epoch": 9.51, "learning_rate": 8.07137496873176e-06, "loss": 3.2592, "step": 464000 }, { "epoch": 9.51, "eval_loss": 3.3606550693511963, "eval_runtime": 46.15, "eval_samples_per_second": 890.617, "eval_steps_per_second": 55.666, "step": 464000 }, { "epoch": 9.67, "eval_loss": 3.3839540481567383, "eval_runtime": 45.5702, "eval_samples_per_second": 901.95, "eval_steps_per_second": 56.375, "step": 472000 }, { "epoch": 9.83, "learning_rate": 8.004669390477779e-06, "loss": 3.2793, "step": 480000 }, { "epoch": 9.83, "eval_loss": 3.366785764694214, "eval_runtime": 45.749, "eval_samples_per_second": 898.424, "eval_steps_per_second": 56.154, "step": 480000 }, { "epoch": 10.0, "eval_loss": 3.3609416484832764, "eval_runtime": 47.1383, "eval_samples_per_second": 871.945, "eval_steps_per_second": 54.499, "step": 488000 }, { "epoch": 10.16, "learning_rate": 7.937963812223798e-06, "loss": 3.257, "step": 496000 }, { "epoch": 10.16, "eval_loss": 3.368229389190674, "eval_runtime": 45.5778, "eval_samples_per_second": 901.798, "eval_steps_per_second": 56.365, "step": 496000 }, { "epoch": 10.33, "eval_loss": 3.4005918502807617, "eval_runtime": 46.5843, "eval_samples_per_second": 882.314, "eval_steps_per_second": 55.147, "step": 504000 }, { "epoch": 10.49, "learning_rate": 7.871258233969816e-06, "loss": 3.2656, "step": 512000 }, { "epoch": 10.49, "eval_loss": 3.358835220336914, "eval_runtime": 46.2545, "eval_samples_per_second": 888.605, "eval_steps_per_second": 55.541, "step": 512000 }, { "epoch": 10.65, "eval_loss": 3.379861831665039, "eval_runtime": 45.613, "eval_samples_per_second": 901.103, "eval_steps_per_second": 56.322, "step": 520000 }, { "epoch": 10.82, "learning_rate": 7.804552655715835e-06, "loss": 3.2727, "step": 528000 }, { "epoch": 10.82, "eval_loss": 3.383315086364746, "eval_runtime": 46.0041, "eval_samples_per_second": 893.442, "eval_steps_per_second": 55.843, "step": 528000 }, { "epoch": 10.98, "eval_loss": 3.356590747833252, "eval_runtime": 45.9202, "eval_samples_per_second": 895.074, "eval_steps_per_second": 55.945, "step": 536000 }, { "epoch": 11.15, "learning_rate": 7.737847077461853e-06, "loss": 3.2705, "step": 544000 }, { "epoch": 11.15, "eval_loss": 3.3793959617614746, "eval_runtime": 45.6075, "eval_samples_per_second": 901.211, "eval_steps_per_second": 56.328, "step": 544000 }, { "epoch": 11.31, "eval_loss": 3.3838233947753906, "eval_runtime": 46.1859, "eval_samples_per_second": 889.925, "eval_steps_per_second": 55.623, "step": 552000 }, { "epoch": 11.47, "learning_rate": 7.671141499207872e-06, "loss": 3.2676, "step": 560000 }, { "epoch": 11.47, "eval_loss": 3.3659656047821045, "eval_runtime": 45.7183, "eval_samples_per_second": 899.027, "eval_steps_per_second": 56.192, "step": 560000 }, { "epoch": 11.64, "eval_loss": 3.3937699794769287, "eval_runtime": 45.9326, "eval_samples_per_second": 894.832, "eval_steps_per_second": 55.93, "step": 568000 }, { "epoch": 11.8, "learning_rate": 7.604435920953891e-06, "loss": 3.258, "step": 576000 }, { "epoch": 11.8, "eval_loss": 3.3661420345306396, "eval_runtime": 46.4625, "eval_samples_per_second": 884.627, "eval_steps_per_second": 55.292, "step": 576000 }, { "epoch": 11.97, "eval_loss": 3.3490447998046875, "eval_runtime": 45.8318, "eval_samples_per_second": 896.801, "eval_steps_per_second": 56.053, "step": 584000 }, { "epoch": 12.13, "learning_rate": 7.537730342699909e-06, "loss": 3.2646, "step": 592000 }, { "epoch": 12.13, "eval_loss": 3.3716230392456055, "eval_runtime": 45.6734, "eval_samples_per_second": 899.91, "eval_steps_per_second": 56.247, "step": 592000 }, { "epoch": 12.29, "eval_loss": 3.3877346515655518, "eval_runtime": 46.2161, "eval_samples_per_second": 889.344, "eval_steps_per_second": 55.587, "step": 600000 }, { "epoch": 12.46, "learning_rate": 7.471024764445928e-06, "loss": 3.2578, "step": 608000 }, { "epoch": 12.46, "eval_loss": 3.3930206298828125, "eval_runtime": 45.3985, "eval_samples_per_second": 905.361, "eval_steps_per_second": 56.588, "step": 608000 }, { "epoch": 12.62, "eval_loss": 3.392077922821045, "eval_runtime": 45.1724, "eval_samples_per_second": 909.893, "eval_steps_per_second": 56.871, "step": 616000 }, { "epoch": 12.78, "learning_rate": 7.4043191861919465e-06, "loss": 3.2719, "step": 624000 }, { "epoch": 12.78, "eval_loss": 3.395730495452881, "eval_runtime": 45.8195, "eval_samples_per_second": 897.042, "eval_steps_per_second": 56.068, "step": 624000 }, { "epoch": 12.95, "eval_loss": 3.4196434020996094, "eval_runtime": 45.2614, "eval_samples_per_second": 908.103, "eval_steps_per_second": 56.759, "step": 632000 }, { "epoch": 13.11, "learning_rate": 7.337613607937964e-06, "loss": 3.2828, "step": 640000 }, { "epoch": 13.11, "eval_loss": 3.4077515602111816, "eval_runtime": 45.5674, "eval_samples_per_second": 902.004, "eval_steps_per_second": 56.378, "step": 640000 }, { "epoch": 13.28, "eval_loss": 3.4202864170074463, "eval_runtime": 46.3249, "eval_samples_per_second": 887.255, "eval_steps_per_second": 55.456, "step": 648000 }, { "epoch": 13.44, "learning_rate": 7.270908029683983e-06, "loss": 3.2805, "step": 656000 }, { "epoch": 13.44, "eval_loss": 3.3899548053741455, "eval_runtime": 46.1588, "eval_samples_per_second": 890.448, "eval_steps_per_second": 55.656, "step": 656000 }, { "epoch": 13.6, "eval_loss": 3.4037835597991943, "eval_runtime": 46.9454, "eval_samples_per_second": 875.527, "eval_steps_per_second": 54.723, "step": 664000 }, { "epoch": 13.77, "learning_rate": 7.2042024514300015e-06, "loss": 3.2975, "step": 672000 }, { "epoch": 13.77, "eval_loss": 3.405585765838623, "eval_runtime": 46.2706, "eval_samples_per_second": 888.297, "eval_steps_per_second": 55.521, "step": 672000 }, { "epoch": 13.93, "eval_loss": 3.428373336791992, "eval_runtime": 45.9889, "eval_samples_per_second": 893.738, "eval_steps_per_second": 55.861, "step": 680000 }, { "epoch": 14.1, "learning_rate": 7.13749687317602e-06, "loss": 3.2965, "step": 688000 }, { "epoch": 14.1, "eval_loss": 3.41803240776062, "eval_runtime": 46.9126, "eval_samples_per_second": 876.14, "eval_steps_per_second": 54.761, "step": 688000 }, { "epoch": 14.26, "eval_loss": 3.419599771499634, "eval_runtime": 46.1796, "eval_samples_per_second": 890.047, "eval_steps_per_second": 55.631, "step": 696000 }, { "epoch": 14.42, "learning_rate": 7.070791294922038e-06, "loss": 3.3069, "step": 704000 }, { "epoch": 14.42, "eval_loss": 3.425711154937744, "eval_runtime": 46.2298, "eval_samples_per_second": 889.08, "eval_steps_per_second": 55.57, "step": 704000 }, { "epoch": 14.59, "eval_loss": 3.4299447536468506, "eval_runtime": 46.768, "eval_samples_per_second": 878.85, "eval_steps_per_second": 54.931, "step": 712000 }, { "epoch": 14.75, "learning_rate": 7.0040857166680564e-06, "loss": 3.3152, "step": 720000 }, { "epoch": 14.75, "eval_loss": 3.4787514209747314, "eval_runtime": 46.0913, "eval_samples_per_second": 891.752, "eval_steps_per_second": 55.737, "step": 720000 }, { "epoch": 14.92, "eval_loss": 3.4424662590026855, "eval_runtime": 46.3411, "eval_samples_per_second": 886.945, "eval_steps_per_second": 55.437, "step": 728000 }, { "epoch": 15.08, "learning_rate": 6.937380138414076e-06, "loss": 3.3125, "step": 736000 }, { "epoch": 15.08, "eval_loss": 3.430126667022705, "eval_runtime": 46.9882, "eval_samples_per_second": 874.73, "eval_steps_per_second": 54.673, "step": 736000 }, { "epoch": 15.24, "eval_loss": 3.4440979957580566, "eval_runtime": 46.1825, "eval_samples_per_second": 889.99, "eval_steps_per_second": 55.627, "step": 744000 }, { "epoch": 15.41, "learning_rate": 6.8706745601600945e-06, "loss": 3.3174, "step": 752000 }, { "epoch": 15.41, "eval_loss": 3.4396116733551025, "eval_runtime": 46.2686, "eval_samples_per_second": 888.334, "eval_steps_per_second": 55.524, "step": 752000 }, { "epoch": 15.57, "eval_loss": 3.463931083679199, "eval_runtime": 46.7798, "eval_samples_per_second": 878.627, "eval_steps_per_second": 54.917, "step": 760000 }, { "epoch": 15.73, "learning_rate": 6.803968981906113e-06, "loss": 3.3242, "step": 768000 }, { "epoch": 15.73, "eval_loss": 3.4523837566375732, "eval_runtime": 45.7867, "eval_samples_per_second": 897.685, "eval_steps_per_second": 56.108, "step": 768000 }, { "epoch": 15.9, "eval_loss": 3.455958366394043, "eval_runtime": 45.3124, "eval_samples_per_second": 907.08, "eval_steps_per_second": 56.695, "step": 776000 }, { "epoch": 16.06, "learning_rate": 6.737263403652131e-06, "loss": 3.3385, "step": 784000 }, { "epoch": 16.06, "eval_loss": 3.4779999256134033, "eval_runtime": 46.0072, "eval_samples_per_second": 893.383, "eval_steps_per_second": 55.839, "step": 784000 }, { "epoch": 16.23, "eval_loss": 3.4773714542388916, "eval_runtime": 45.131, "eval_samples_per_second": 910.727, "eval_steps_per_second": 56.923, "step": 792000 }, { "epoch": 16.39, "learning_rate": 6.6705578253981495e-06, "loss": 3.3371, "step": 800000 }, { "epoch": 16.39, "eval_loss": 3.47719669342041, "eval_runtime": 45.6308, "eval_samples_per_second": 900.751, "eval_steps_per_second": 56.3, "step": 800000 }, { "epoch": 16.55, "eval_loss": 3.4955241680145264, "eval_runtime": 46.0477, "eval_samples_per_second": 892.597, "eval_steps_per_second": 55.79, "step": 808000 }, { "epoch": 16.72, "learning_rate": 6.603852247144168e-06, "loss": 3.3633, "step": 816000 }, { "epoch": 16.72, "eval_loss": 3.486057996749878, "eval_runtime": 44.9231, "eval_samples_per_second": 914.941, "eval_steps_per_second": 57.187, "step": 816000 }, { "epoch": 16.88, "eval_loss": 3.506316661834717, "eval_runtime": 45.7078, "eval_samples_per_second": 899.234, "eval_steps_per_second": 56.205, "step": 824000 }, { "epoch": 17.05, "learning_rate": 6.537146668890187e-06, "loss": 3.3678, "step": 832000 }, { "epoch": 17.05, "eval_loss": 3.50439190864563, "eval_runtime": 45.0245, "eval_samples_per_second": 912.882, "eval_steps_per_second": 57.058, "step": 832000 }, { "epoch": 17.21, "eval_loss": 3.520247220993042, "eval_runtime": 45.2071, "eval_samples_per_second": 909.193, "eval_steps_per_second": 56.827, "step": 840000 }, { "epoch": 17.37, "learning_rate": 6.4704410906362044e-06, "loss": 3.3634, "step": 848000 }, { "epoch": 17.37, "eval_loss": 3.4941418170928955, "eval_runtime": 46.4208, "eval_samples_per_second": 885.423, "eval_steps_per_second": 55.342, "step": 848000 }, { "epoch": 17.54, "eval_loss": 3.522303819656372, "eval_runtime": 46.164, "eval_samples_per_second": 890.347, "eval_steps_per_second": 55.649, "step": 856000 }, { "epoch": 17.7, "learning_rate": 6.403735512382223e-06, "loss": 3.3797, "step": 864000 }, { "epoch": 17.7, "eval_loss": 3.502774715423584, "eval_runtime": 45.8285, "eval_samples_per_second": 896.865, "eval_steps_per_second": 56.057, "step": 864000 }, { "epoch": 17.87, "eval_loss": 3.526393175125122, "eval_runtime": 46.6422, "eval_samples_per_second": 881.219, "eval_steps_per_second": 55.079, "step": 872000 }, { "epoch": 18.03, "learning_rate": 6.337029934128242e-06, "loss": 3.3802, "step": 880000 }, { "epoch": 18.03, "eval_loss": 3.531257152557373, "eval_runtime": 46.217, "eval_samples_per_second": 889.327, "eval_steps_per_second": 55.586, "step": 880000 }, { "epoch": 18.19, "eval_loss": 3.496319055557251, "eval_runtime": 45.9803, "eval_samples_per_second": 893.904, "eval_steps_per_second": 55.872, "step": 888000 }, { "epoch": 18.36, "learning_rate": 6.270324355874261e-06, "loss": 3.357, "step": 896000 }, { "epoch": 18.36, "eval_loss": 3.5171141624450684, "eval_runtime": 47.1622, "eval_samples_per_second": 871.504, "eval_steps_per_second": 54.472, "step": 896000 }, { "epoch": 18.52, "eval_loss": 3.530701160430908, "eval_runtime": 46.113, "eval_samples_per_second": 891.332, "eval_steps_per_second": 55.711, "step": 904000 }, { "epoch": 18.69, "learning_rate": 6.20361877762028e-06, "loss": 3.3866, "step": 912000 }, { "epoch": 18.69, "eval_loss": 3.5221967697143555, "eval_runtime": 46.035, "eval_samples_per_second": 892.843, "eval_steps_per_second": 55.805, "step": 912000 }, { "epoch": 18.85, "eval_loss": 3.5319056510925293, "eval_runtime": 46.8446, "eval_samples_per_second": 877.412, "eval_steps_per_second": 54.841, "step": 920000 }, { "epoch": 19.01, "learning_rate": 6.1369131993662975e-06, "loss": 3.3818, "step": 928000 }, { "epoch": 19.01, "eval_loss": 3.532552480697632, "eval_runtime": 46.3901, "eval_samples_per_second": 886.007, "eval_steps_per_second": 55.378, "step": 928000 }, { "epoch": 19.18, "eval_loss": 3.5116307735443115, "eval_runtime": 45.2931, "eval_samples_per_second": 907.466, "eval_steps_per_second": 56.719, "step": 936000 }, { "epoch": 19.34, "learning_rate": 6.070207621112316e-06, "loss": 3.3754, "step": 944000 }, { "epoch": 19.34, "eval_loss": 3.5228991508483887, "eval_runtime": 47.0715, "eval_samples_per_second": 873.183, "eval_steps_per_second": 54.577, "step": 944000 }, { "epoch": 19.5, "eval_loss": 3.538318634033203, "eval_runtime": 45.9256, "eval_samples_per_second": 894.97, "eval_steps_per_second": 55.938, "step": 952000 }, { "epoch": 19.67, "learning_rate": 6.003502042858335e-06, "loss": 3.3893, "step": 960000 }, { "epoch": 19.67, "eval_loss": 3.544513463973999, "eval_runtime": 46.8245, "eval_samples_per_second": 877.788, "eval_steps_per_second": 54.864, "step": 960000 }, { "epoch": 19.83, "eval_loss": 3.5230634212493896, "eval_runtime": 47.3348, "eval_samples_per_second": 868.325, "eval_steps_per_second": 54.273, "step": 968000 }, { "epoch": 20.0, "learning_rate": 5.936796464604353e-06, "loss": 3.3899, "step": 976000 }, { "epoch": 20.0, "eval_loss": 3.531026840209961, "eval_runtime": 45.7886, "eval_samples_per_second": 897.647, "eval_steps_per_second": 56.106, "step": 976000 }, { "epoch": 20.16, "eval_loss": 3.53287935256958, "eval_runtime": 46.7771, "eval_samples_per_second": 878.677, "eval_steps_per_second": 54.92, "step": 984000 }, { "epoch": 20.32, "learning_rate": 5.870090886350371e-06, "loss": 3.3918, "step": 992000 }, { "epoch": 20.32, "eval_loss": 3.5158653259277344, "eval_runtime": 46.2173, "eval_samples_per_second": 889.32, "eval_steps_per_second": 55.585, "step": 992000 }, { "epoch": 20.49, "eval_loss": 3.562788486480713, "eval_runtime": 45.7474, "eval_samples_per_second": 898.456, "eval_steps_per_second": 56.156, "step": 1000000 }, { "epoch": 20.65, "learning_rate": 5.80338530809639e-06, "loss": 3.3786, "step": 1008000 }, { "epoch": 20.65, "eval_loss": 3.5290534496307373, "eval_runtime": 46.4581, "eval_samples_per_second": 884.711, "eval_steps_per_second": 55.297, "step": 1008000 }, { "epoch": 20.82, "eval_loss": 3.5163111686706543, "eval_runtime": 45.899, "eval_samples_per_second": 895.487, "eval_steps_per_second": 55.971, "step": 1016000 }, { "epoch": 20.98, "learning_rate": 5.736679729842408e-06, "loss": 3.3862, "step": 1024000 }, { "epoch": 20.98, "eval_loss": 3.531219959259033, "eval_runtime": 45.4959, "eval_samples_per_second": 903.423, "eval_steps_per_second": 56.467, "step": 1024000 }, { "epoch": 21.14, "eval_loss": 3.514033317565918, "eval_runtime": 46.6408, "eval_samples_per_second": 881.245, "eval_steps_per_second": 55.08, "step": 1032000 }, { "epoch": 21.31, "learning_rate": 5.669974151588427e-06, "loss": 3.3855, "step": 1040000 }, { "epoch": 21.31, "eval_loss": 3.5617153644561768, "eval_runtime": 45.7071, "eval_samples_per_second": 899.248, "eval_steps_per_second": 56.206, "step": 1040000 }, { "epoch": 21.47, "eval_loss": 3.5374927520751953, "eval_runtime": 45.668, "eval_samples_per_second": 900.018, "eval_steps_per_second": 56.254, "step": 1048000 }, { "epoch": 21.64, "learning_rate": 5.603268573334446e-06, "loss": 3.3872, "step": 1056000 }, { "epoch": 21.64, "eval_loss": 3.532823085784912, "eval_runtime": 46.5514, "eval_samples_per_second": 882.938, "eval_steps_per_second": 55.186, "step": 1056000 }, { "epoch": 21.8, "eval_loss": 3.561626434326172, "eval_runtime": 45.9586, "eval_samples_per_second": 894.327, "eval_steps_per_second": 55.898, "step": 1064000 }, { "epoch": 21.96, "learning_rate": 5.536562995080464e-06, "loss": 3.3931, "step": 1072000 }, { "epoch": 21.96, "eval_loss": 3.5647873878479004, "eval_runtime": 46.8936, "eval_samples_per_second": 876.495, "eval_steps_per_second": 54.784, "step": 1072000 }, { "epoch": 22.13, "eval_loss": 3.544335126876831, "eval_runtime": 46.3686, "eval_samples_per_second": 886.419, "eval_steps_per_second": 55.404, "step": 1080000 }, { "epoch": 22.29, "learning_rate": 5.469857416826483e-06, "loss": 3.3708, "step": 1088000 }, { "epoch": 22.29, "eval_loss": 3.5400941371917725, "eval_runtime": 45.8359, "eval_samples_per_second": 896.72, "eval_steps_per_second": 56.048, "step": 1088000 }, { "epoch": 22.45, "eval_loss": 3.55292010307312, "eval_runtime": 46.8082, "eval_samples_per_second": 878.095, "eval_steps_per_second": 54.884, "step": 1096000 }, { "epoch": 22.62, "learning_rate": 5.403151838572501e-06, "loss": 3.4099, "step": 1104000 }, { "epoch": 22.62, "eval_loss": 3.533414602279663, "eval_runtime": 46.1107, "eval_samples_per_second": 891.377, "eval_steps_per_second": 55.714, "step": 1104000 }, { "epoch": 22.78, "eval_loss": 3.5325212478637695, "eval_runtime": 46.1351, "eval_samples_per_second": 890.905, "eval_steps_per_second": 55.684, "step": 1112000 }, { "epoch": 22.95, "learning_rate": 5.33644626031852e-06, "loss": 3.4027, "step": 1120000 }, { "epoch": 22.95, "eval_loss": 3.5818660259246826, "eval_runtime": 46.7428, "eval_samples_per_second": 879.323, "eval_steps_per_second": 54.96, "step": 1120000 }, { "epoch": 23.11, "eval_loss": 3.5470829010009766, "eval_runtime": 46.1344, "eval_samples_per_second": 890.92, "eval_steps_per_second": 55.685, "step": 1128000 }, { "epoch": 23.27, "learning_rate": 5.269740682064538e-06, "loss": 3.4035, "step": 1136000 }, { "epoch": 23.27, "eval_loss": 3.548552989959717, "eval_runtime": 46.1071, "eval_samples_per_second": 891.446, "eval_steps_per_second": 55.718, "step": 1136000 }, { "epoch": 23.44, "eval_loss": 3.5470151901245117, "eval_runtime": 46.849, "eval_samples_per_second": 877.33, "eval_steps_per_second": 54.836, "step": 1144000 }, { "epoch": 23.6, "learning_rate": 5.203035103810556e-06, "loss": 3.3964, "step": 1152000 }, { "epoch": 23.6, "eval_loss": 3.572176694869995, "eval_runtime": 46.3661, "eval_samples_per_second": 886.467, "eval_steps_per_second": 55.407, "step": 1152000 }, { "epoch": 23.77, "eval_loss": 3.55098295211792, "eval_runtime": 46.1812, "eval_samples_per_second": 890.015, "eval_steps_per_second": 55.629, "step": 1160000 }, { "epoch": 23.93, "learning_rate": 5.136329525556575e-06, "loss": 3.4115, "step": 1168000 }, { "epoch": 23.93, "eval_loss": 3.561007499694824, "eval_runtime": 47.5429, "eval_samples_per_second": 864.525, "eval_steps_per_second": 54.035, "step": 1168000 }, { "epoch": 24.09, "eval_loss": 3.5757482051849365, "eval_runtime": 46.3962, "eval_samples_per_second": 885.891, "eval_steps_per_second": 55.371, "step": 1176000 }, { "epoch": 24.26, "learning_rate": 5.0696239473025935e-06, "loss": 3.4173, "step": 1184000 }, { "epoch": 24.26, "eval_loss": 3.554094076156616, "eval_runtime": 45.5708, "eval_samples_per_second": 901.936, "eval_steps_per_second": 56.374, "step": 1184000 }, { "epoch": 24.42, "eval_loss": 3.577660083770752, "eval_runtime": 47.0565, "eval_samples_per_second": 873.461, "eval_steps_per_second": 54.594, "step": 1192000 }, { "epoch": 24.59, "learning_rate": 5.002918369048611e-06, "loss": 3.4169, "step": 1200000 }, { "epoch": 24.59, "eval_loss": 3.5637948513031006, "eval_runtime": 47.0711, "eval_samples_per_second": 873.19, "eval_steps_per_second": 54.577, "step": 1200000 }, { "epoch": 24.75, "eval_loss": 3.5462896823883057, "eval_runtime": 46.8215, "eval_samples_per_second": 877.845, "eval_steps_per_second": 54.868, "step": 1208000 }, { "epoch": 24.91, "learning_rate": 4.936212790794631e-06, "loss": 3.4031, "step": 1216000 }, { "epoch": 24.91, "eval_loss": 3.5299670696258545, "eval_runtime": 46.9742, "eval_samples_per_second": 874.99, "eval_steps_per_second": 54.69, "step": 1216000 }, { "epoch": 25.08, "eval_loss": 3.558427333831787, "eval_runtime": 46.1322, "eval_samples_per_second": 890.961, "eval_steps_per_second": 55.688, "step": 1224000 }, { "epoch": 25.24, "learning_rate": 4.869507212540649e-06, "loss": 3.4094, "step": 1232000 }, { "epoch": 25.24, "eval_loss": 3.568174123764038, "eval_runtime": 46.3049, "eval_samples_per_second": 887.638, "eval_steps_per_second": 55.48, "step": 1232000 }, { "epoch": 25.41, "eval_loss": 3.555844783782959, "eval_runtime": 46.0676, "eval_samples_per_second": 892.211, "eval_steps_per_second": 55.766, "step": 1240000 }, { "epoch": 25.57, "learning_rate": 4.802801634286667e-06, "loss": 3.4116, "step": 1248000 }, { "epoch": 25.57, "eval_loss": 3.5629091262817383, "eval_runtime": 45.5765, "eval_samples_per_second": 901.825, "eval_steps_per_second": 56.367, "step": 1248000 }, { "epoch": 25.73, "eval_loss": 3.5490224361419678, "eval_runtime": 46.4409, "eval_samples_per_second": 885.039, "eval_steps_per_second": 55.318, "step": 1256000 }, { "epoch": 25.9, "learning_rate": 4.7360960560326865e-06, "loss": 3.4199, "step": 1264000 }, { "epoch": 25.9, "eval_loss": 3.567878484725952, "eval_runtime": 46.1595, "eval_samples_per_second": 890.434, "eval_steps_per_second": 55.655, "step": 1264000 }, { "epoch": 26.06, "eval_loss": 3.5885465145111084, "eval_runtime": 45.9316, "eval_samples_per_second": 894.853, "eval_steps_per_second": 55.931, "step": 1272000 }, { "epoch": 26.22, "learning_rate": 4.669390477778704e-06, "loss": 3.412, "step": 1280000 }, { "epoch": 26.22, "eval_loss": 3.5578629970550537, "eval_runtime": 46.4337, "eval_samples_per_second": 885.176, "eval_steps_per_second": 55.326, "step": 1280000 }, { "epoch": 26.39, "eval_loss": 3.5465352535247803, "eval_runtime": 45.7517, "eval_samples_per_second": 898.371, "eval_steps_per_second": 56.151, "step": 1288000 }, { "epoch": 26.55, "learning_rate": 4.602684899524723e-06, "loss": 3.4123, "step": 1296000 }, { "epoch": 26.55, "eval_loss": 3.572610855102539, "eval_runtime": 45.5426, "eval_samples_per_second": 902.496, "eval_steps_per_second": 56.409, "step": 1296000 }, { "epoch": 26.72, "eval_loss": 3.577484130859375, "eval_runtime": 46.4204, "eval_samples_per_second": 885.431, "eval_steps_per_second": 55.342, "step": 1304000 }, { "epoch": 26.88, "learning_rate": 4.5359793212707415e-06, "loss": 3.4132, "step": 1312000 }, { "epoch": 26.88, "eval_loss": 3.5477850437164307, "eval_runtime": 45.6512, "eval_samples_per_second": 900.348, "eval_steps_per_second": 56.275, "step": 1312000 }, { "epoch": 27.04, "eval_loss": 3.5588574409484863, "eval_runtime": 46.0446, "eval_samples_per_second": 892.657, "eval_steps_per_second": 55.794, "step": 1320000 }, { "epoch": 27.21, "learning_rate": 4.46927374301676e-06, "loss": 3.4161, "step": 1328000 }, { "epoch": 27.21, "eval_loss": 3.56620717048645, "eval_runtime": 46.4839, "eval_samples_per_second": 884.22, "eval_steps_per_second": 55.266, "step": 1328000 }, { "epoch": 27.37, "eval_loss": 3.589487075805664, "eval_runtime": 46.3966, "eval_samples_per_second": 885.884, "eval_steps_per_second": 55.37, "step": 1336000 }, { "epoch": 27.54, "learning_rate": 4.402568164762779e-06, "loss": 3.4097, "step": 1344000 }, { "epoch": 27.54, "eval_loss": 3.5940632820129395, "eval_runtime": 46.4364, "eval_samples_per_second": 885.125, "eval_steps_per_second": 55.323, "step": 1344000 }, { "epoch": 27.7, "eval_loss": 3.5912110805511475, "eval_runtime": 45.9687, "eval_samples_per_second": 894.131, "eval_steps_per_second": 55.886, "step": 1352000 }, { "epoch": 27.86, "learning_rate": 4.335862586508797e-06, "loss": 3.415, "step": 1360000 }, { "epoch": 27.86, "eval_loss": 3.565756320953369, "eval_runtime": 45.7621, "eval_samples_per_second": 898.168, "eval_steps_per_second": 56.138, "step": 1360000 }, { "epoch": 28.03, "eval_loss": 3.5553781986236572, "eval_runtime": 46.2903, "eval_samples_per_second": 887.919, "eval_steps_per_second": 55.498, "step": 1368000 }, { "epoch": 28.19, "learning_rate": 4.269157008254816e-06, "loss": 3.4193, "step": 1376000 }, { "epoch": 28.19, "eval_loss": 3.589851140975952, "eval_runtime": 45.8411, "eval_samples_per_second": 896.618, "eval_steps_per_second": 56.041, "step": 1376000 }, { "epoch": 28.36, "eval_loss": 3.5652260780334473, "eval_runtime": 45.5538, "eval_samples_per_second": 902.275, "eval_steps_per_second": 56.395, "step": 1384000 }, { "epoch": 28.52, "learning_rate": 4.202451430000834e-06, "loss": 3.4136, "step": 1392000 }, { "epoch": 28.52, "eval_loss": 3.5832390785217285, "eval_runtime": 46.575, "eval_samples_per_second": 882.491, "eval_steps_per_second": 55.158, "step": 1392000 }, { "epoch": 28.68, "eval_loss": 3.5885210037231445, "eval_runtime": 45.9659, "eval_samples_per_second": 894.184, "eval_steps_per_second": 55.889, "step": 1400000 }, { "epoch": 28.85, "learning_rate": 4.135745851746852e-06, "loss": 3.4294, "step": 1408000 }, { "epoch": 28.85, "eval_loss": 3.583249807357788, "eval_runtime": 45.7927, "eval_samples_per_second": 897.568, "eval_steps_per_second": 56.101, "step": 1408000 }, { "epoch": 29.01, "eval_loss": 3.6025209426879883, "eval_runtime": 46.362, "eval_samples_per_second": 886.546, "eval_steps_per_second": 55.412, "step": 1416000 }, { "epoch": 29.17, "learning_rate": 4.069040273492872e-06, "loss": 3.4243, "step": 1424000 }, { "epoch": 29.17, "eval_loss": 3.6040360927581787, "eval_runtime": 45.7855, "eval_samples_per_second": 897.708, "eval_steps_per_second": 56.11, "step": 1424000 }, { "epoch": 29.34, "eval_loss": 3.5890395641326904, "eval_runtime": 46.5109, "eval_samples_per_second": 883.707, "eval_steps_per_second": 55.234, "step": 1432000 }, { "epoch": 29.5, "learning_rate": 4.0023346952388895e-06, "loss": 3.4427, "step": 1440000 }, { "epoch": 29.5, "eval_loss": 3.58347749710083, "eval_runtime": 46.2896, "eval_samples_per_second": 887.931, "eval_steps_per_second": 55.498, "step": 1440000 }, { "epoch": 29.67, "eval_loss": 3.6185286045074463, "eval_runtime": 46.4189, "eval_samples_per_second": 885.459, "eval_steps_per_second": 55.344, "step": 1448000 }, { "epoch": 29.83, "learning_rate": 3.935629116984908e-06, "loss": 3.4293, "step": 1456000 }, { "epoch": 29.83, "eval_loss": 3.6028919219970703, "eval_runtime": 46.7251, "eval_samples_per_second": 879.656, "eval_steps_per_second": 54.981, "step": 1456000 }, { "epoch": 29.99, "eval_loss": 3.616161823272705, "eval_runtime": 45.7265, "eval_samples_per_second": 898.865, "eval_steps_per_second": 56.182, "step": 1464000 }, { "epoch": 30.16, "learning_rate": 3.868923538730927e-06, "loss": 3.4363, "step": 1472000 }, { "epoch": 30.16, "eval_loss": 3.6257941722869873, "eval_runtime": 45.6532, "eval_samples_per_second": 900.308, "eval_steps_per_second": 56.272, "step": 1472000 }, { "epoch": 30.32, "eval_loss": 3.6038014888763428, "eval_runtime": 46.717, "eval_samples_per_second": 879.808, "eval_steps_per_second": 54.991, "step": 1480000 }, { "epoch": 30.49, "learning_rate": 3.8022179604769453e-06, "loss": 3.4532, "step": 1488000 }, { "epoch": 30.49, "eval_loss": 3.6039483547210693, "eval_runtime": 45.742, "eval_samples_per_second": 898.562, "eval_steps_per_second": 56.163, "step": 1488000 }, { "epoch": 30.65, "eval_loss": 3.605367422103882, "eval_runtime": 45.7078, "eval_samples_per_second": 899.234, "eval_steps_per_second": 56.205, "step": 1496000 }, { "epoch": 30.81, "learning_rate": 3.735512382222964e-06, "loss": 3.4401, "step": 1504000 }, { "epoch": 30.81, "eval_loss": 3.6269376277923584, "eval_runtime": 46.6124, "eval_samples_per_second": 881.783, "eval_steps_per_second": 55.114, "step": 1504000 }, { "epoch": 30.98, "eval_loss": 3.600417137145996, "eval_runtime": 47.0146, "eval_samples_per_second": 874.239, "eval_steps_per_second": 54.643, "step": 1512000 }, { "epoch": 31.14, "learning_rate": 3.668806803968982e-06, "loss": 3.4491, "step": 1520000 }, { "epoch": 31.14, "eval_loss": 3.6095597743988037, "eval_runtime": 47.1653, "eval_samples_per_second": 871.446, "eval_steps_per_second": 54.468, "step": 1520000 }, { "epoch": 31.31, "eval_loss": 3.6216766834259033, "eval_runtime": 48.343, "eval_samples_per_second": 850.216, "eval_steps_per_second": 53.141, "step": 1528000 }, { "epoch": 31.47, "learning_rate": 3.6021012257150007e-06, "loss": 3.4438, "step": 1536000 }, { "epoch": 31.47, "eval_loss": 3.6081080436706543, "eval_runtime": 47.4804, "eval_samples_per_second": 865.663, "eval_steps_per_second": 54.107, "step": 1536000 }, { "epoch": 31.63, "eval_loss": 3.6190168857574463, "eval_runtime": 48.3587, "eval_samples_per_second": 849.941, "eval_steps_per_second": 53.124, "step": 1544000 }, { "epoch": 31.8, "learning_rate": 3.535395647461019e-06, "loss": 3.4337, "step": 1552000 }, { "epoch": 31.8, "eval_loss": 3.611992835998535, "eval_runtime": 47.5342, "eval_samples_per_second": 864.683, "eval_steps_per_second": 54.045, "step": 1552000 }, { "epoch": 31.96, "eval_loss": 3.586127996444702, "eval_runtime": 46.8726, "eval_samples_per_second": 876.888, "eval_steps_per_second": 54.808, "step": 1560000 }, { "epoch": 32.13, "learning_rate": 3.468690069207038e-06, "loss": 3.4475, "step": 1568000 }, { "epoch": 32.13, "eval_loss": 3.620932102203369, "eval_runtime": 48.2654, "eval_samples_per_second": 851.582, "eval_steps_per_second": 53.226, "step": 1568000 }, { "epoch": 32.29, "eval_loss": 3.6301937103271484, "eval_runtime": 47.2416, "eval_samples_per_second": 870.039, "eval_steps_per_second": 54.38, "step": 1576000 }, { "epoch": 32.45, "learning_rate": 3.4019844909530565e-06, "loss": 3.4406, "step": 1584000 }, { "epoch": 32.45, "eval_loss": 3.6052932739257812, "eval_runtime": 46.0861, "eval_samples_per_second": 891.852, "eval_steps_per_second": 55.743, "step": 1584000 }, { "epoch": 32.62, "eval_loss": 3.593369960784912, "eval_runtime": 49.6475, "eval_samples_per_second": 827.876, "eval_steps_per_second": 51.745, "step": 1592000 }, { "epoch": 32.78, "learning_rate": 3.3352789126990747e-06, "loss": 3.4392, "step": 1600000 }, { "epoch": 32.78, "eval_loss": 3.594203472137451, "eval_runtime": 47.8907, "eval_samples_per_second": 858.246, "eval_steps_per_second": 53.643, "step": 1600000 }, { "epoch": 32.94, "eval_loss": 3.601329803466797, "eval_runtime": 46.6549, "eval_samples_per_second": 880.98, "eval_steps_per_second": 55.064, "step": 1608000 }, { "epoch": 33.11, "learning_rate": 3.2685733344450933e-06, "loss": 3.4514, "step": 1616000 }, { "epoch": 33.11, "eval_loss": 3.6505630016326904, "eval_runtime": 47.3453, "eval_samples_per_second": 868.132, "eval_steps_per_second": 54.261, "step": 1616000 }, { "epoch": 33.27, "eval_loss": 3.604905128479004, "eval_runtime": 47.3478, "eval_samples_per_second": 868.087, "eval_steps_per_second": 54.258, "step": 1624000 }, { "epoch": 33.44, "learning_rate": 3.2018677561911115e-06, "loss": 3.4406, "step": 1632000 }, { "epoch": 33.44, "eval_loss": 3.6285159587860107, "eval_runtime": 45.2665, "eval_samples_per_second": 908.001, "eval_steps_per_second": 56.753, "step": 1632000 }, { "epoch": 33.6, "eval_loss": 3.6107122898101807, "eval_runtime": 47.0075, "eval_samples_per_second": 874.372, "eval_steps_per_second": 54.651, "step": 1640000 }, { "epoch": 33.76, "learning_rate": 3.1351621779371306e-06, "loss": 3.4522, "step": 1648000 }, { "epoch": 33.76, "eval_loss": 3.6080775260925293, "eval_runtime": 46.384, "eval_samples_per_second": 886.124, "eval_steps_per_second": 55.385, "step": 1648000 }, { "epoch": 33.93, "eval_loss": 3.6121394634246826, "eval_runtime": 47.5808, "eval_samples_per_second": 863.836, "eval_steps_per_second": 53.992, "step": 1656000 }, { "epoch": 34.09, "learning_rate": 3.0684565996831487e-06, "loss": 3.4592, "step": 1664000 }, { "epoch": 34.09, "eval_loss": 3.639568567276001, "eval_runtime": 47.4907, "eval_samples_per_second": 865.474, "eval_steps_per_second": 54.095, "step": 1664000 }, { "epoch": 34.26, "eval_loss": 3.628408432006836, "eval_runtime": 45.8805, "eval_samples_per_second": 895.849, "eval_steps_per_second": 55.993, "step": 1672000 }, { "epoch": 34.42, "learning_rate": 3.0017510214291673e-06, "loss": 3.4587, "step": 1680000 }, { "epoch": 34.42, "eval_loss": 3.619464635848999, "eval_runtime": 46.7813, "eval_samples_per_second": 878.599, "eval_steps_per_second": 54.915, "step": 1680000 }, { "epoch": 34.58, "eval_loss": 3.6168148517608643, "eval_runtime": 46.0408, "eval_samples_per_second": 892.731, "eval_steps_per_second": 55.798, "step": 1688000 }, { "epoch": 34.75, "learning_rate": 2.9350454431751855e-06, "loss": 3.4589, "step": 1696000 }, { "epoch": 34.75, "eval_loss": 3.631527900695801, "eval_runtime": 45.9831, "eval_samples_per_second": 893.85, "eval_steps_per_second": 55.868, "step": 1696000 }, { "epoch": 34.91, "eval_loss": 3.6044745445251465, "eval_runtime": 46.5293, "eval_samples_per_second": 883.356, "eval_steps_per_second": 55.212, "step": 1704000 }, { "epoch": 35.08, "learning_rate": 2.868339864921204e-06, "loss": 3.4703, "step": 1712000 }, { "epoch": 35.08, "eval_loss": 3.6251227855682373, "eval_runtime": 45.5912, "eval_samples_per_second": 901.533, "eval_steps_per_second": 56.349, "step": 1712000 }, { "epoch": 35.24, "eval_loss": 3.6251931190490723, "eval_runtime": 45.7404, "eval_samples_per_second": 898.593, "eval_steps_per_second": 56.165, "step": 1720000 }, { "epoch": 35.4, "learning_rate": 2.801634286667223e-06, "loss": 3.4565, "step": 1728000 }, { "epoch": 35.4, "eval_loss": 3.62538743019104, "eval_runtime": 46.4207, "eval_samples_per_second": 885.423, "eval_steps_per_second": 55.342, "step": 1728000 }, { "epoch": 35.57, "eval_loss": 3.6544113159179688, "eval_runtime": 45.7864, "eval_samples_per_second": 897.691, "eval_steps_per_second": 56.108, "step": 1736000 }, { "epoch": 35.73, "learning_rate": 2.7349287084132413e-06, "loss": 3.4634, "step": 1744000 }, { "epoch": 35.73, "eval_loss": 3.629049062728882, "eval_runtime": 46.556, "eval_samples_per_second": 882.85, "eval_steps_per_second": 55.181, "step": 1744000 }, { "epoch": 35.9, "eval_loss": 3.612429618835449, "eval_runtime": 46.5059, "eval_samples_per_second": 883.802, "eval_steps_per_second": 55.24, "step": 1752000 }, { "epoch": 36.06, "learning_rate": 2.66822313015926e-06, "loss": 3.4625, "step": 1760000 }, { "epoch": 36.06, "eval_loss": 3.6262378692626953, "eval_runtime": 45.8554, "eval_samples_per_second": 896.34, "eval_steps_per_second": 56.024, "step": 1760000 }, { "epoch": 36.22, "eval_loss": 3.6317975521087646, "eval_runtime": 46.7318, "eval_samples_per_second": 879.529, "eval_steps_per_second": 54.973, "step": 1768000 }, { "epoch": 36.39, "learning_rate": 2.601517551905278e-06, "loss": 3.457, "step": 1776000 }, { "epoch": 36.39, "eval_loss": 3.640812397003174, "eval_runtime": 45.9688, "eval_samples_per_second": 894.129, "eval_steps_per_second": 55.886, "step": 1776000 }, { "epoch": 36.55, "eval_loss": 3.6433026790618896, "eval_runtime": 45.8154, "eval_samples_per_second": 897.122, "eval_steps_per_second": 56.073, "step": 1784000 }, { "epoch": 36.71, "learning_rate": 2.5348119736512967e-06, "loss": 3.4618, "step": 1792000 }, { "epoch": 36.71, "eval_loss": 3.627612352371216, "eval_runtime": 46.6149, "eval_samples_per_second": 881.735, "eval_steps_per_second": 55.111, "step": 1792000 }, { "epoch": 36.88, "eval_loss": 3.631366014480591, "eval_runtime": 46.0925, "eval_samples_per_second": 891.729, "eval_steps_per_second": 55.736, "step": 1800000 }, { "epoch": 37.04, "learning_rate": 2.4681063953973154e-06, "loss": 3.4611, "step": 1808000 }, { "epoch": 37.04, "eval_loss": 3.6415860652923584, "eval_runtime": 46.287, "eval_samples_per_second": 887.982, "eval_steps_per_second": 55.502, "step": 1808000 }, { "epoch": 37.21, "eval_loss": 3.665800094604492, "eval_runtime": 46.839, "eval_samples_per_second": 877.517, "eval_steps_per_second": 54.847, "step": 1816000 }, { "epoch": 37.37, "learning_rate": 2.4014008171433335e-06, "loss": 3.4651, "step": 1824000 }, { "epoch": 37.37, "eval_loss": 3.638195037841797, "eval_runtime": 46.0815, "eval_samples_per_second": 891.942, "eval_steps_per_second": 55.749, "step": 1824000 }, { "epoch": 37.53, "eval_loss": 3.656243085861206, "eval_runtime": 45.3257, "eval_samples_per_second": 906.815, "eval_steps_per_second": 56.679, "step": 1832000 }, { "epoch": 37.7, "learning_rate": 2.334695238889352e-06, "loss": 3.4625, "step": 1840000 }, { "epoch": 37.7, "eval_loss": 3.6376214027404785, "eval_runtime": 47.1734, "eval_samples_per_second": 871.296, "eval_steps_per_second": 54.459, "step": 1840000 }, { "epoch": 37.86, "eval_loss": 3.651963710784912, "eval_runtime": 46.059, "eval_samples_per_second": 892.377, "eval_steps_per_second": 55.776, "step": 1848000 }, { "epoch": 38.03, "learning_rate": 2.2679896606353707e-06, "loss": 3.4561, "step": 1856000 }, { "epoch": 38.03, "eval_loss": 3.6300716400146484, "eval_runtime": 46.8158, "eval_samples_per_second": 877.951, "eval_steps_per_second": 54.875, "step": 1856000 }, { "epoch": 38.19, "eval_loss": 3.619462728500366, "eval_runtime": 45.8596, "eval_samples_per_second": 896.258, "eval_steps_per_second": 56.019, "step": 1864000 }, { "epoch": 38.35, "learning_rate": 2.2012840823813894e-06, "loss": 3.4655, "step": 1872000 }, { "epoch": 38.35, "eval_loss": 3.6279447078704834, "eval_runtime": 46.2215, "eval_samples_per_second": 889.241, "eval_steps_per_second": 55.58, "step": 1872000 }, { "epoch": 38.52, "eval_loss": 3.636460542678833, "eval_runtime": 46.7533, "eval_samples_per_second": 879.125, "eval_steps_per_second": 54.948, "step": 1880000 }, { "epoch": 38.68, "learning_rate": 2.134578504127408e-06, "loss": 3.4637, "step": 1888000 }, { "epoch": 38.68, "eval_loss": 3.638620138168335, "eval_runtime": 46.2177, "eval_samples_per_second": 889.313, "eval_steps_per_second": 55.585, "step": 1888000 }, { "epoch": 38.85, "eval_loss": 3.643373489379883, "eval_runtime": 45.9947, "eval_samples_per_second": 893.624, "eval_steps_per_second": 55.854, "step": 1896000 }, { "epoch": 39.01, "learning_rate": 2.067872925873426e-06, "loss": 3.458, "step": 1904000 }, { "epoch": 39.01, "eval_loss": 3.65189266204834, "eval_runtime": 46.7003, "eval_samples_per_second": 880.122, "eval_steps_per_second": 55.01, "step": 1904000 }, { "epoch": 39.17, "eval_loss": 3.6438076496124268, "eval_runtime": 46.3785, "eval_samples_per_second": 886.229, "eval_steps_per_second": 55.392, "step": 1912000 }, { "epoch": 39.34, "learning_rate": 2.0011673476194448e-06, "loss": 3.4523, "step": 1920000 }, { "epoch": 39.34, "eval_loss": 3.640777349472046, "eval_runtime": 46.701, "eval_samples_per_second": 880.109, "eval_steps_per_second": 55.01, "step": 1920000 }, { "epoch": 39.5, "eval_loss": 3.6513171195983887, "eval_runtime": 46.884, "eval_samples_per_second": 876.675, "eval_steps_per_second": 54.795, "step": 1928000 }, { "epoch": 39.66, "learning_rate": 1.9344617693654634e-06, "loss": 3.4743, "step": 1936000 }, { "epoch": 39.66, "eval_loss": 3.6177797317504883, "eval_runtime": 46.0686, "eval_samples_per_second": 892.192, "eval_steps_per_second": 55.765, "step": 1936000 }, { "epoch": 39.83, "eval_loss": 3.6398518085479736, "eval_runtime": 46.8575, "eval_samples_per_second": 877.171, "eval_steps_per_second": 54.826, "step": 1944000 }, { "epoch": 39.99, "learning_rate": 1.867756191111482e-06, "loss": 3.4626, "step": 1952000 }, { "epoch": 39.99, "eval_loss": 3.624283790588379, "eval_runtime": 46.1682, "eval_samples_per_second": 890.266, "eval_steps_per_second": 55.644, "step": 1952000 }, { "epoch": 40.16, "eval_loss": 3.6325714588165283, "eval_runtime": 45.9837, "eval_samples_per_second": 893.838, "eval_steps_per_second": 55.868, "step": 1960000 }, { "epoch": 40.32, "learning_rate": 1.8010506128575004e-06, "loss": 3.4692, "step": 1968000 }, { "epoch": 40.32, "eval_loss": 3.6723103523254395, "eval_runtime": 46.8787, "eval_samples_per_second": 876.773, "eval_steps_per_second": 54.801, "step": 1968000 }, { "epoch": 40.48, "eval_loss": 3.6456410884857178, "eval_runtime": 46.0442, "eval_samples_per_second": 892.664, "eval_steps_per_second": 55.794, "step": 1976000 }, { "epoch": 40.65, "learning_rate": 1.734345034603519e-06, "loss": 3.4765, "step": 1984000 }, { "epoch": 40.65, "eval_loss": 3.6437156200408936, "eval_runtime": 45.2826, "eval_samples_per_second": 907.678, "eval_steps_per_second": 56.733, "step": 1984000 }, { "epoch": 40.81, "eval_loss": 3.647704839706421, "eval_runtime": 46.8981, "eval_samples_per_second": 876.41, "eval_steps_per_second": 54.778, "step": 1992000 }, { "epoch": 40.98, "learning_rate": 1.6676394563495374e-06, "loss": 3.4747, "step": 2000000 }, { "epoch": 40.98, "eval_loss": 3.638388156890869, "eval_runtime": 46.0328, "eval_samples_per_second": 892.886, "eval_steps_per_second": 55.808, "step": 2000000 }, { "epoch": 41.14, "eval_loss": 3.6370368003845215, "eval_runtime": 46.7372, "eval_samples_per_second": 879.427, "eval_steps_per_second": 54.967, "step": 2008000 }, { "epoch": 41.3, "learning_rate": 1.6009338780955558e-06, "loss": 3.4683, "step": 2016000 }, { "epoch": 41.3, "eval_loss": 3.662468433380127, "eval_runtime": 46.61, "eval_samples_per_second": 881.828, "eval_steps_per_second": 55.117, "step": 2016000 }, { "epoch": 41.47, "eval_loss": 3.6453213691711426, "eval_runtime": 45.8611, "eval_samples_per_second": 896.229, "eval_steps_per_second": 56.017, "step": 2024000 }, { "epoch": 41.63, "learning_rate": 1.5342282998415744e-06, "loss": 3.4599, "step": 2032000 }, { "epoch": 41.63, "eval_loss": 3.64886212348938, "eval_runtime": 46.762, "eval_samples_per_second": 878.962, "eval_steps_per_second": 54.938, "step": 2032000 }, { "epoch": 41.8, "eval_loss": 3.6310884952545166, "eval_runtime": 46.4576, "eval_samples_per_second": 884.72, "eval_steps_per_second": 55.298, "step": 2040000 }, { "epoch": 41.96, "learning_rate": 1.4675227215875928e-06, "loss": 3.4713, "step": 2048000 }, { "epoch": 41.96, "eval_loss": 3.619154691696167, "eval_runtime": 45.9184, "eval_samples_per_second": 895.109, "eval_steps_per_second": 55.947, "step": 2048000 }, { "epoch": 42.12, "eval_loss": 3.651060104370117, "eval_runtime": 47.0032, "eval_samples_per_second": 874.451, "eval_steps_per_second": 54.656, "step": 2056000 }, { "epoch": 42.29, "learning_rate": 1.4008171433336116e-06, "loss": 3.4677, "step": 2064000 }, { "epoch": 42.29, "eval_loss": 3.6425869464874268, "eval_runtime": 46.3503, "eval_samples_per_second": 886.769, "eval_steps_per_second": 55.426, "step": 2064000 }, { "epoch": 42.45, "eval_loss": 3.6362836360931396, "eval_runtime": 46.2845, "eval_samples_per_second": 888.029, "eval_steps_per_second": 55.505, "step": 2072000 }, { "epoch": 42.62, "learning_rate": 1.33411156507963e-06, "loss": 3.4689, "step": 2080000 }, { "epoch": 42.62, "eval_loss": 3.6378438472747803, "eval_runtime": 47.0132, "eval_samples_per_second": 874.265, "eval_steps_per_second": 54.644, "step": 2080000 }, { "epoch": 42.78, "eval_loss": 3.6450445652008057, "eval_runtime": 46.1055, "eval_samples_per_second": 891.478, "eval_steps_per_second": 55.72, "step": 2088000 }, { "epoch": 42.94, "learning_rate": 1.2674059868256484e-06, "loss": 3.4598, "step": 2096000 }, { "epoch": 42.94, "eval_loss": 3.64805006980896, "eval_runtime": 46.8684, "eval_samples_per_second": 876.967, "eval_steps_per_second": 54.813, "step": 2096000 }, { "epoch": 43.11, "eval_loss": 3.6675028800964355, "eval_runtime": 46.4765, "eval_samples_per_second": 884.36, "eval_steps_per_second": 55.275, "step": 2104000 }, { "epoch": 43.27, "learning_rate": 1.2007004085716668e-06, "loss": 3.4487, "step": 2112000 }, { "epoch": 43.27, "eval_loss": 3.6557657718658447, "eval_runtime": 46.0356, "eval_samples_per_second": 892.83, "eval_steps_per_second": 55.805, "step": 2112000 }, { "epoch": 43.43, "eval_loss": 3.6451427936553955, "eval_runtime": 47.3121, "eval_samples_per_second": 868.741, "eval_steps_per_second": 54.299, "step": 2120000 }, { "epoch": 43.6, "learning_rate": 1.1339948303176854e-06, "loss": 3.4555, "step": 2128000 }, { "epoch": 43.6, "eval_loss": 3.643132448196411, "eval_runtime": 46.2499, "eval_samples_per_second": 888.694, "eval_steps_per_second": 55.546, "step": 2128000 }, { "epoch": 43.76, "eval_loss": 3.6470389366149902, "eval_runtime": 45.8331, "eval_samples_per_second": 896.776, "eval_steps_per_second": 56.051, "step": 2136000 }, { "epoch": 43.93, "learning_rate": 1.067289252063704e-06, "loss": 3.4727, "step": 2144000 }, { "epoch": 43.93, "eval_loss": 3.6265406608581543, "eval_runtime": 47.1162, "eval_samples_per_second": 872.353, "eval_steps_per_second": 54.525, "step": 2144000 }, { "epoch": 44.09, "eval_loss": 3.6335132122039795, "eval_runtime": 45.9499, "eval_samples_per_second": 894.497, "eval_steps_per_second": 55.909, "step": 2152000 }, { "epoch": 44.25, "learning_rate": 1.0005836738097224e-06, "loss": 3.4626, "step": 2160000 }, { "epoch": 44.25, "eval_loss": 3.639557123184204, "eval_runtime": 46.75, "eval_samples_per_second": 879.187, "eval_steps_per_second": 54.952, "step": 2160000 }, { "epoch": 44.42, "eval_loss": 3.653687000274658, "eval_runtime": 47.165, "eval_samples_per_second": 871.452, "eval_steps_per_second": 54.468, "step": 2168000 }, { "epoch": 44.58, "learning_rate": 9.33878095555741e-07, "loss": 3.4724, "step": 2176000 }, { "epoch": 44.58, "eval_loss": 3.61678409576416, "eval_runtime": 46.2585, "eval_samples_per_second": 888.528, "eval_steps_per_second": 55.536, "step": 2176000 }, { "epoch": 44.75, "eval_loss": 3.644352674484253, "eval_runtime": 47.0469, "eval_samples_per_second": 873.64, "eval_steps_per_second": 54.605, "step": 2184000 }, { "epoch": 44.91, "learning_rate": 8.671725173017595e-07, "loss": 3.4545, "step": 2192000 }, { "epoch": 44.91, "eval_loss": 3.6440114974975586, "eval_runtime": 46.2426, "eval_samples_per_second": 888.835, "eval_steps_per_second": 55.555, "step": 2192000 }, { "epoch": 45.07, "eval_loss": 3.6327061653137207, "eval_runtime": 46.09, "eval_samples_per_second": 891.776, "eval_steps_per_second": 55.739, "step": 2200000 }, { "epoch": 45.24, "learning_rate": 8.004669390477779e-07, "loss": 3.461, "step": 2208000 }, { "epoch": 45.24, "eval_loss": 3.6362533569335938, "eval_runtime": 47.1445, "eval_samples_per_second": 871.831, "eval_steps_per_second": 54.492, "step": 2208000 }, { "epoch": 45.4, "eval_loss": 3.653747081756592, "eval_runtime": 46.2235, "eval_samples_per_second": 889.202, "eval_steps_per_second": 55.578, "step": 2216000 }, { "epoch": 45.57, "learning_rate": 7.337613607937964e-07, "loss": 3.4702, "step": 2224000 }, { "epoch": 45.57, "eval_loss": 3.6123247146606445, "eval_runtime": 46.168, "eval_samples_per_second": 890.27, "eval_steps_per_second": 55.645, "step": 2224000 }, { "epoch": 45.73, "eval_loss": 3.6554455757141113, "eval_runtime": 47.1193, "eval_samples_per_second": 872.296, "eval_steps_per_second": 54.521, "step": 2232000 }, { "epoch": 45.89, "learning_rate": 6.67055782539815e-07, "loss": 3.4565, "step": 2240000 }, { "epoch": 45.89, "eval_loss": 3.6522979736328125, "eval_runtime": 46.1449, "eval_samples_per_second": 890.716, "eval_steps_per_second": 55.672, "step": 2240000 }, { "epoch": 46.06, "eval_loss": 3.6339659690856934, "eval_runtime": 47.2579, "eval_samples_per_second": 869.739, "eval_steps_per_second": 54.361, "step": 2248000 }, { "epoch": 46.22, "learning_rate": 6.003502042858334e-07, "loss": 3.4517, "step": 2256000 }, { "epoch": 46.22, "eval_loss": 3.6459498405456543, "eval_runtime": 46.9038, "eval_samples_per_second": 876.305, "eval_steps_per_second": 54.772, "step": 2256000 }, { "epoch": 46.38, "eval_loss": 3.656141996383667, "eval_runtime": 46.3654, "eval_samples_per_second": 886.48, "eval_steps_per_second": 55.408, "step": 2264000 }, { "epoch": 46.55, "learning_rate": 5.33644626031852e-07, "loss": 3.4631, "step": 2272000 }, { "epoch": 46.55, "eval_loss": 3.6547927856445312, "eval_runtime": 47.1154, "eval_samples_per_second": 872.368, "eval_steps_per_second": 54.526, "step": 2272000 }, { "epoch": 46.71, "eval_loss": 3.6228716373443604, "eval_runtime": 46.2908, "eval_samples_per_second": 887.908, "eval_steps_per_second": 55.497, "step": 2280000 }, { "epoch": 46.88, "learning_rate": 4.669390477778705e-07, "loss": 3.4518, "step": 2288000 }, { "epoch": 46.88, "eval_loss": 3.6350128650665283, "eval_runtime": 46.3584, "eval_samples_per_second": 886.613, "eval_steps_per_second": 55.416, "step": 2288000 }, { "epoch": 47.04, "eval_loss": 3.6483192443847656, "eval_runtime": 47.24, "eval_samples_per_second": 870.067, "eval_steps_per_second": 54.382, "step": 2296000 }, { "epoch": 47.2, "learning_rate": 4.0023346952388894e-07, "loss": 3.4592, "step": 2304000 }, { "epoch": 47.2, "eval_loss": 3.6263089179992676, "eval_runtime": 47.0185, "eval_samples_per_second": 874.166, "eval_steps_per_second": 54.638, "step": 2304000 }, { "epoch": 47.37, "eval_loss": 3.6339097023010254, "eval_runtime": 46.0199, "eval_samples_per_second": 893.135, "eval_steps_per_second": 55.824, "step": 2312000 }, { "epoch": 47.53, "learning_rate": 3.335278912699075e-07, "loss": 3.4569, "step": 2320000 }, { "epoch": 47.53, "eval_loss": 3.659444808959961, "eval_runtime": 47.1636, "eval_samples_per_second": 871.477, "eval_steps_per_second": 54.47, "step": 2320000 }, { "epoch": 47.7, "eval_loss": 3.638535737991333, "eval_runtime": 46.1693, "eval_samples_per_second": 890.246, "eval_steps_per_second": 55.643, "step": 2328000 }, { "epoch": 47.86, "learning_rate": 2.66822313015926e-07, "loss": 3.4524, "step": 2336000 }, { "epoch": 47.86, "eval_loss": 3.6434078216552734, "eval_runtime": 47.0318, "eval_samples_per_second": 873.919, "eval_steps_per_second": 54.623, "step": 2336000 }, { "epoch": 48.02, "eval_loss": 3.650230646133423, "eval_runtime": 46.5514, "eval_samples_per_second": 882.938, "eval_steps_per_second": 55.186, "step": 2344000 }, { "epoch": 48.19, "learning_rate": 2.0011673476194447e-07, "loss": 3.4644, "step": 2352000 }, { "epoch": 48.19, "eval_loss": 3.617619276046753, "eval_runtime": 46.2116, "eval_samples_per_second": 889.43, "eval_steps_per_second": 55.592, "step": 2352000 }, { "epoch": 48.35, "eval_loss": 3.6293184757232666, "eval_runtime": 47.399, "eval_samples_per_second": 867.15, "eval_steps_per_second": 54.199, "step": 2360000 }, { "epoch": 48.52, "learning_rate": 1.33411156507963e-07, "loss": 3.4586, "step": 2368000 }, { "epoch": 48.52, "eval_loss": 3.630380392074585, "eval_runtime": 46.3912, "eval_samples_per_second": 885.987, "eval_steps_per_second": 55.377, "step": 2368000 }, { "epoch": 48.68, "eval_loss": 3.6343326568603516, "eval_runtime": 46.2144, "eval_samples_per_second": 889.376, "eval_steps_per_second": 55.589, "step": 2376000 }, { "epoch": 48.84, "learning_rate": 6.67055782539815e-08, "loss": 3.4439, "step": 2384000 }, { "epoch": 48.84, "eval_loss": 3.6090333461761475, "eval_runtime": 47.3482, "eval_samples_per_second": 868.08, "eval_steps_per_second": 54.258, "step": 2384000 }, { "epoch": 49.01, "eval_loss": 3.6414153575897217, "eval_runtime": 46.5994, "eval_samples_per_second": 882.029, "eval_steps_per_second": 55.13, "step": 2392000 }, { "epoch": 49.17, "learning_rate": 0.0, "loss": 3.4474, "step": 2400000 }, { "epoch": 49.17, "eval_loss": 3.620838165283203, "eval_runtime": 46.9825, "eval_samples_per_second": 874.835, "eval_steps_per_second": 54.68, "step": 2400000 }, { "epoch": 49.17, "step": 2400000, "total_flos": 6.906141294629226e+17, "train_loss": 3.376089767252604, "train_runtime": 158003.2062, "train_samples_per_second": 243.033, "train_steps_per_second": 15.19 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 50, "save_steps": 32000, "total_flos": 6.906141294629226e+17, "trial_name": null, "trial_params": null }