{ "best_metric": 1.8842545747756958, "best_model_checkpoint": "./model_tweets_2020_Q1_90/checkpoint-1440000", "epoch": 49.171259398881354, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "eval_loss": 2.2375540733337402, "eval_runtime": 45.4292, "eval_samples_per_second": 904.749, "eval_steps_per_second": 56.55, "step": 8000 }, { "epoch": 0.33, "learning_rate": 4.0726666666666665e-07, "loss": 2.4394, "step": 16000 }, { "epoch": 0.33, "eval_loss": 2.155681610107422, "eval_runtime": 44.9417, "eval_samples_per_second": 914.562, "eval_steps_per_second": 57.163, "step": 16000 }, { "epoch": 0.49, "eval_loss": 2.0965232849121094, "eval_runtime": 45.1589, "eval_samples_per_second": 910.165, "eval_steps_per_second": 56.888, "step": 24000 }, { "epoch": 0.66, "learning_rate": 4.0453333333333336e-07, "loss": 2.2403, "step": 32000 }, { "epoch": 0.66, "eval_loss": 2.063685894012451, "eval_runtime": 45.4617, "eval_samples_per_second": 904.101, "eval_steps_per_second": 56.509, "step": 32000 }, { "epoch": 0.82, "eval_loss": 2.062025308609009, "eval_runtime": 46.1664, "eval_samples_per_second": 890.301, "eval_steps_per_second": 55.647, "step": 40000 }, { "epoch": 0.98, "learning_rate": 4.018e-07, "loss": 2.1859, "step": 48000 }, { "epoch": 0.98, "eval_loss": 2.0426833629608154, "eval_runtime": 44.881, "eval_samples_per_second": 915.8, "eval_steps_per_second": 57.24, "step": 48000 }, { "epoch": 1.15, "eval_loss": 2.044032335281372, "eval_runtime": 45.0876, "eval_samples_per_second": 911.602, "eval_steps_per_second": 56.978, "step": 56000 }, { "epoch": 1.31, "learning_rate": 3.9906666666666667e-07, "loss": 2.1472, "step": 64000 }, { "epoch": 1.31, "eval_loss": 2.017690420150757, "eval_runtime": 45.0, "eval_samples_per_second": 913.377, "eval_steps_per_second": 57.089, "step": 64000 }, { "epoch": 1.48, "eval_loss": 1.9980425834655762, "eval_runtime": 44.953, "eval_samples_per_second": 914.333, "eval_steps_per_second": 57.149, "step": 72000 }, { "epoch": 1.64, "learning_rate": 3.963333333333333e-07, "loss": 2.1334, "step": 80000 }, { "epoch": 1.64, "eval_loss": 2.0021114349365234, "eval_runtime": 45.3203, "eval_samples_per_second": 906.923, "eval_steps_per_second": 56.685, "step": 80000 }, { "epoch": 1.8, "eval_loss": 1.9963111877441406, "eval_runtime": 44.8632, "eval_samples_per_second": 916.163, "eval_steps_per_second": 57.263, "step": 88000 }, { "epoch": 1.97, "learning_rate": 3.936e-07, "loss": 2.1271, "step": 96000 }, { "epoch": 1.97, "eval_loss": 1.9917570352554321, "eval_runtime": 45.4669, "eval_samples_per_second": 903.999, "eval_steps_per_second": 56.503, "step": 96000 }, { "epoch": 2.13, "eval_loss": 1.988930344581604, "eval_runtime": 45.0716, "eval_samples_per_second": 911.927, "eval_steps_per_second": 56.998, "step": 104000 }, { "epoch": 2.29, "learning_rate": 3.908666666666667e-07, "loss": 2.1065, "step": 112000 }, { "epoch": 2.29, "eval_loss": 1.9688993692398071, "eval_runtime": 44.5304, "eval_samples_per_second": 923.009, "eval_steps_per_second": 57.691, "step": 112000 }, { "epoch": 2.46, "eval_loss": 1.991935133934021, "eval_runtime": 45.9679, "eval_samples_per_second": 894.146, "eval_steps_per_second": 55.887, "step": 120000 }, { "epoch": 2.62, "learning_rate": 3.8813333333333334e-07, "loss": 2.105, "step": 128000 }, { "epoch": 2.62, "eval_loss": 1.9705941677093506, "eval_runtime": 45.4865, "eval_samples_per_second": 903.609, "eval_steps_per_second": 56.478, "step": 128000 }, { "epoch": 2.79, "eval_loss": 1.9724736213684082, "eval_runtime": 44.9361, "eval_samples_per_second": 914.676, "eval_steps_per_second": 57.17, "step": 136000 }, { "epoch": 2.95, "learning_rate": 3.854e-07, "loss": 2.1033, "step": 144000 }, { "epoch": 2.95, "eval_loss": 2.000924825668335, "eval_runtime": 45.1849, "eval_samples_per_second": 909.641, "eval_steps_per_second": 56.855, "step": 144000 }, { "epoch": 3.11, "eval_loss": 1.9660991430282593, "eval_runtime": 45.122, "eval_samples_per_second": 910.909, "eval_steps_per_second": 56.935, "step": 152000 }, { "epoch": 3.28, "learning_rate": 3.8266666666666665e-07, "loss": 2.0934, "step": 160000 }, { "epoch": 3.28, "eval_loss": 1.964065432548523, "eval_runtime": 45.5781, "eval_samples_per_second": 901.792, "eval_steps_per_second": 56.365, "step": 160000 }, { "epoch": 3.44, "eval_loss": 1.973306655883789, "eval_runtime": 45.0146, "eval_samples_per_second": 913.081, "eval_steps_per_second": 57.07, "step": 168000 }, { "epoch": 3.61, "learning_rate": 3.799333333333333e-07, "loss": 2.0899, "step": 176000 }, { "epoch": 3.61, "eval_loss": 1.9746649265289307, "eval_runtime": 45.1942, "eval_samples_per_second": 909.454, "eval_steps_per_second": 56.844, "step": 176000 }, { "epoch": 3.77, "eval_loss": 1.9442167282104492, "eval_runtime": 44.9807, "eval_samples_per_second": 913.769, "eval_steps_per_second": 57.113, "step": 184000 }, { "epoch": 3.93, "learning_rate": 3.772e-07, "loss": 2.0903, "step": 192000 }, { "epoch": 3.93, "eval_loss": 1.9585589170455933, "eval_runtime": 45.3512, "eval_samples_per_second": 906.304, "eval_steps_per_second": 56.647, "step": 192000 }, { "epoch": 4.1, "eval_loss": 1.9585614204406738, "eval_runtime": 44.9754, "eval_samples_per_second": 913.877, "eval_steps_per_second": 57.12, "step": 200000 }, { "epoch": 4.26, "learning_rate": 3.7446666666666667e-07, "loss": 2.0842, "step": 208000 }, { "epoch": 4.26, "eval_loss": 1.9401671886444092, "eval_runtime": 45.4042, "eval_samples_per_second": 905.247, "eval_steps_per_second": 56.581, "step": 208000 }, { "epoch": 4.43, "eval_loss": 1.9483397006988525, "eval_runtime": 45.1835, "eval_samples_per_second": 909.668, "eval_steps_per_second": 56.857, "step": 216000 }, { "epoch": 4.59, "learning_rate": 3.7173333333333333e-07, "loss": 2.0761, "step": 224000 }, { "epoch": 4.59, "eval_loss": 1.9532350301742554, "eval_runtime": 45.6838, "eval_samples_per_second": 899.706, "eval_steps_per_second": 56.234, "step": 224000 }, { "epoch": 4.75, "eval_loss": 1.945618748664856, "eval_runtime": 44.9815, "eval_samples_per_second": 913.754, "eval_steps_per_second": 57.112, "step": 232000 }, { "epoch": 4.92, "learning_rate": 3.69e-07, "loss": 2.0799, "step": 240000 }, { "epoch": 4.92, "eval_loss": 1.9322354793548584, "eval_runtime": 45.7219, "eval_samples_per_second": 898.957, "eval_steps_per_second": 56.188, "step": 240000 }, { "epoch": 5.08, "eval_loss": 1.9460111856460571, "eval_runtime": 44.9207, "eval_samples_per_second": 914.991, "eval_steps_per_second": 57.19, "step": 248000 }, { "epoch": 5.24, "learning_rate": 3.6626666666666664e-07, "loss": 2.0704, "step": 256000 }, { "epoch": 5.24, "eval_loss": 1.9477649927139282, "eval_runtime": 45.4934, "eval_samples_per_second": 903.472, "eval_steps_per_second": 56.47, "step": 256000 }, { "epoch": 5.41, "eval_loss": 1.943530559539795, "eval_runtime": 45.5332, "eval_samples_per_second": 902.683, "eval_steps_per_second": 56.42, "step": 264000 }, { "epoch": 5.57, "learning_rate": 3.6353333333333335e-07, "loss": 2.0727, "step": 272000 }, { "epoch": 5.57, "eval_loss": 1.9356465339660645, "eval_runtime": 45.2443, "eval_samples_per_second": 908.445, "eval_steps_per_second": 56.781, "step": 272000 }, { "epoch": 5.74, "eval_loss": 1.9543371200561523, "eval_runtime": 44.9666, "eval_samples_per_second": 914.056, "eval_steps_per_second": 57.131, "step": 280000 }, { "epoch": 5.9, "learning_rate": 3.608e-07, "loss": 2.073, "step": 288000 }, { "epoch": 5.9, "eval_loss": 1.9541795253753662, "eval_runtime": 45.5545, "eval_samples_per_second": 902.26, "eval_steps_per_second": 56.394, "step": 288000 }, { "epoch": 6.06, "eval_loss": 1.9503259658813477, "eval_runtime": 45.0063, "eval_samples_per_second": 913.25, "eval_steps_per_second": 57.081, "step": 296000 }, { "epoch": 6.23, "learning_rate": 3.5806666666666666e-07, "loss": 2.0647, "step": 304000 }, { "epoch": 6.23, "eval_loss": 1.9437284469604492, "eval_runtime": 45.0801, "eval_samples_per_second": 911.755, "eval_steps_per_second": 56.987, "step": 304000 }, { "epoch": 6.39, "eval_loss": 1.9450260400772095, "eval_runtime": 45.0125, "eval_samples_per_second": 913.125, "eval_steps_per_second": 57.073, "step": 312000 }, { "epoch": 6.56, "learning_rate": 3.553333333333333e-07, "loss": 2.0668, "step": 320000 }, { "epoch": 6.56, "eval_loss": 1.9220808744430542, "eval_runtime": 45.7281, "eval_samples_per_second": 898.834, "eval_steps_per_second": 56.18, "step": 320000 }, { "epoch": 6.72, "eval_loss": 1.927699089050293, "eval_runtime": 45.3402, "eval_samples_per_second": 906.525, "eval_steps_per_second": 56.661, "step": 328000 }, { "epoch": 6.88, "learning_rate": 3.5259999999999997e-07, "loss": 2.0695, "step": 336000 }, { "epoch": 6.88, "eval_loss": 1.935697317123413, "eval_runtime": 45.1353, "eval_samples_per_second": 910.64, "eval_steps_per_second": 56.918, "step": 336000 }, { "epoch": 7.05, "eval_loss": 1.924439549446106, "eval_runtime": 45.0671, "eval_samples_per_second": 912.017, "eval_steps_per_second": 57.004, "step": 344000 }, { "epoch": 7.21, "learning_rate": 3.498666666666667e-07, "loss": 2.0652, "step": 352000 }, { "epoch": 7.21, "eval_loss": 1.938655138015747, "eval_runtime": 45.305, "eval_samples_per_second": 907.228, "eval_steps_per_second": 56.704, "step": 352000 }, { "epoch": 7.38, "eval_loss": 1.9354636669158936, "eval_runtime": 45.6689, "eval_samples_per_second": 900.0, "eval_steps_per_second": 56.253, "step": 360000 }, { "epoch": 7.54, "learning_rate": 3.4713333333333333e-07, "loss": 2.0607, "step": 368000 }, { "epoch": 7.54, "eval_loss": 1.9389930963516235, "eval_runtime": 45.2501, "eval_samples_per_second": 908.33, "eval_steps_per_second": 56.773, "step": 368000 }, { "epoch": 7.7, "eval_loss": 1.9358594417572021, "eval_runtime": 45.5644, "eval_samples_per_second": 902.064, "eval_steps_per_second": 56.382, "step": 376000 }, { "epoch": 7.87, "learning_rate": 3.444e-07, "loss": 2.0707, "step": 384000 }, { "epoch": 7.87, "eval_loss": 1.9393250942230225, "eval_runtime": 45.0739, "eval_samples_per_second": 911.881, "eval_steps_per_second": 56.995, "step": 384000 }, { "epoch": 8.03, "eval_loss": 1.9217476844787598, "eval_runtime": 45.0711, "eval_samples_per_second": 911.938, "eval_steps_per_second": 56.999, "step": 392000 }, { "epoch": 8.2, "learning_rate": 3.416666666666667e-07, "loss": 2.0621, "step": 400000 }, { "epoch": 8.2, "eval_loss": 1.9284311532974243, "eval_runtime": 45.6293, "eval_samples_per_second": 900.781, "eval_steps_per_second": 56.302, "step": 400000 }, { "epoch": 8.36, "eval_loss": 1.9383472204208374, "eval_runtime": 46.4801, "eval_samples_per_second": 884.292, "eval_steps_per_second": 55.271, "step": 408000 }, { "epoch": 8.52, "learning_rate": 3.3893333333333335e-07, "loss": 2.0643, "step": 416000 }, { "epoch": 8.52, "eval_loss": 1.922420859336853, "eval_runtime": 45.4784, "eval_samples_per_second": 903.77, "eval_steps_per_second": 56.488, "step": 416000 }, { "epoch": 8.69, "eval_loss": 1.928429365158081, "eval_runtime": 45.9472, "eval_samples_per_second": 894.549, "eval_steps_per_second": 55.912, "step": 424000 }, { "epoch": 8.85, "learning_rate": 3.3619999999999995e-07, "loss": 2.0498, "step": 432000 }, { "epoch": 8.85, "eval_loss": 1.9399133920669556, "eval_runtime": 46.3003, "eval_samples_per_second": 887.726, "eval_steps_per_second": 55.486, "step": 432000 }, { "epoch": 9.01, "eval_loss": 1.9427006244659424, "eval_runtime": 46.6439, "eval_samples_per_second": 881.187, "eval_steps_per_second": 55.077, "step": 440000 }, { "epoch": 9.18, "learning_rate": 3.3346666666666666e-07, "loss": 2.0689, "step": 448000 }, { "epoch": 9.18, "eval_loss": 1.9351186752319336, "eval_runtime": 45.2002, "eval_samples_per_second": 909.333, "eval_steps_per_second": 56.836, "step": 448000 }, { "epoch": 9.34, "eval_loss": 1.9312766790390015, "eval_runtime": 47.0788, "eval_samples_per_second": 873.047, "eval_steps_per_second": 54.568, "step": 456000 }, { "epoch": 9.51, "learning_rate": 3.307333333333333e-07, "loss": 2.0511, "step": 464000 }, { "epoch": 9.51, "eval_loss": 1.9392344951629639, "eval_runtime": 47.5719, "eval_samples_per_second": 863.997, "eval_steps_per_second": 54.002, "step": 464000 }, { "epoch": 9.67, "eval_loss": 1.9278026819229126, "eval_runtime": 48.6246, "eval_samples_per_second": 845.292, "eval_steps_per_second": 52.833, "step": 472000 }, { "epoch": 9.83, "learning_rate": 3.28e-07, "loss": 2.0664, "step": 480000 }, { "epoch": 9.83, "eval_loss": 1.9384809732437134, "eval_runtime": 48.0322, "eval_samples_per_second": 855.717, "eval_steps_per_second": 53.485, "step": 480000 }, { "epoch": 10.0, "eval_loss": 1.9471704959869385, "eval_runtime": 46.9, "eval_samples_per_second": 876.375, "eval_steps_per_second": 54.776, "step": 488000 }, { "epoch": 10.16, "learning_rate": 3.252666666666667e-07, "loss": 2.0565, "step": 496000 }, { "epoch": 10.16, "eval_loss": 1.937743902206421, "eval_runtime": 48.1967, "eval_samples_per_second": 852.796, "eval_steps_per_second": 53.302, "step": 496000 }, { "epoch": 10.33, "eval_loss": 1.9480650424957275, "eval_runtime": 47.3213, "eval_samples_per_second": 868.572, "eval_steps_per_second": 54.288, "step": 504000 }, { "epoch": 10.49, "learning_rate": 3.2253333333333334e-07, "loss": 2.0566, "step": 512000 }, { "epoch": 10.49, "eval_loss": 1.9453697204589844, "eval_runtime": 47.4611, "eval_samples_per_second": 866.014, "eval_steps_per_second": 54.128, "step": 512000 }, { "epoch": 10.65, "eval_loss": 1.9244284629821777, "eval_runtime": 48.2751, "eval_samples_per_second": 851.412, "eval_steps_per_second": 53.216, "step": 520000 }, { "epoch": 10.82, "learning_rate": 3.198e-07, "loss": 2.0523, "step": 528000 }, { "epoch": 10.82, "eval_loss": 1.935764193534851, "eval_runtime": 47.3047, "eval_samples_per_second": 868.877, "eval_steps_per_second": 54.307, "step": 528000 }, { "epoch": 10.98, "eval_loss": 1.9176356792449951, "eval_runtime": 46.998, "eval_samples_per_second": 874.549, "eval_steps_per_second": 54.662, "step": 536000 }, { "epoch": 11.15, "learning_rate": 3.1706666666666665e-07, "loss": 2.0554, "step": 544000 }, { "epoch": 11.15, "eval_loss": 1.9284305572509766, "eval_runtime": 47.0847, "eval_samples_per_second": 872.937, "eval_steps_per_second": 54.561, "step": 544000 }, { "epoch": 11.31, "eval_loss": 1.9287078380584717, "eval_runtime": 47.0031, "eval_samples_per_second": 874.453, "eval_steps_per_second": 54.656, "step": 552000 }, { "epoch": 11.47, "learning_rate": 3.1433333333333336e-07, "loss": 2.0485, "step": 560000 }, { "epoch": 11.47, "eval_loss": 1.923736810684204, "eval_runtime": 45.756, "eval_samples_per_second": 898.286, "eval_steps_per_second": 56.146, "step": 560000 }, { "epoch": 11.64, "eval_loss": 1.920929193496704, "eval_runtime": 48.761, "eval_samples_per_second": 842.927, "eval_steps_per_second": 52.686, "step": 568000 }, { "epoch": 11.8, "learning_rate": 3.116e-07, "loss": 2.0485, "step": 576000 }, { "epoch": 11.8, "eval_loss": 1.9261997938156128, "eval_runtime": 47.8227, "eval_samples_per_second": 859.467, "eval_steps_per_second": 53.719, "step": 576000 }, { "epoch": 11.97, "eval_loss": 1.9207805395126343, "eval_runtime": 47.2192, "eval_samples_per_second": 870.452, "eval_steps_per_second": 54.406, "step": 584000 }, { "epoch": 12.13, "learning_rate": 3.0886666666666667e-07, "loss": 2.0542, "step": 592000 }, { "epoch": 12.13, "eval_loss": 1.9320107698440552, "eval_runtime": 47.8604, "eval_samples_per_second": 858.789, "eval_steps_per_second": 53.677, "step": 592000 }, { "epoch": 12.29, "eval_loss": 1.9076824188232422, "eval_runtime": 48.2616, "eval_samples_per_second": 851.651, "eval_steps_per_second": 53.231, "step": 600000 }, { "epoch": 12.46, "learning_rate": 3.061333333333333e-07, "loss": 2.0527, "step": 608000 }, { "epoch": 12.46, "eval_loss": 1.9248530864715576, "eval_runtime": 46.9547, "eval_samples_per_second": 875.353, "eval_steps_per_second": 54.712, "step": 608000 }, { "epoch": 12.62, "eval_loss": 1.919195532798767, "eval_runtime": 47.8559, "eval_samples_per_second": 858.87, "eval_steps_per_second": 53.682, "step": 616000 }, { "epoch": 12.78, "learning_rate": 3.034e-07, "loss": 2.0606, "step": 624000 }, { "epoch": 12.78, "eval_loss": 1.9151537418365479, "eval_runtime": 47.7757, "eval_samples_per_second": 860.311, "eval_steps_per_second": 53.772, "step": 624000 }, { "epoch": 12.95, "eval_loss": 1.9194060564041138, "eval_runtime": 46.9693, "eval_samples_per_second": 875.082, "eval_steps_per_second": 54.695, "step": 632000 }, { "epoch": 13.11, "learning_rate": 3.0066666666666663e-07, "loss": 2.0542, "step": 640000 }, { "epoch": 13.11, "eval_loss": 1.919755458831787, "eval_runtime": 48.4882, "eval_samples_per_second": 847.671, "eval_steps_per_second": 52.982, "step": 640000 }, { "epoch": 13.28, "eval_loss": 1.9134645462036133, "eval_runtime": 49.7276, "eval_samples_per_second": 826.544, "eval_steps_per_second": 51.662, "step": 648000 }, { "epoch": 13.44, "learning_rate": 2.9793333333333334e-07, "loss": 2.0593, "step": 656000 }, { "epoch": 13.44, "eval_loss": 1.919248342514038, "eval_runtime": 46.444, "eval_samples_per_second": 884.98, "eval_steps_per_second": 55.314, "step": 656000 }, { "epoch": 13.6, "eval_loss": 1.9256792068481445, "eval_runtime": 46.3375, "eval_samples_per_second": 887.013, "eval_steps_per_second": 55.441, "step": 664000 }, { "epoch": 13.77, "learning_rate": 2.952e-07, "loss": 2.0467, "step": 672000 }, { "epoch": 13.77, "eval_loss": 1.9134962558746338, "eval_runtime": 48.3494, "eval_samples_per_second": 850.104, "eval_steps_per_second": 53.134, "step": 672000 }, { "epoch": 13.93, "eval_loss": 1.8994532823562622, "eval_runtime": 46.5628, "eval_samples_per_second": 882.721, "eval_steps_per_second": 55.173, "step": 680000 }, { "epoch": 14.1, "learning_rate": 2.9246666666666665e-07, "loss": 2.0535, "step": 688000 }, { "epoch": 14.1, "eval_loss": 1.9305521249771118, "eval_runtime": 46.8843, "eval_samples_per_second": 876.669, "eval_steps_per_second": 54.795, "step": 688000 }, { "epoch": 14.26, "eval_loss": 1.927839994430542, "eval_runtime": 48.051, "eval_samples_per_second": 855.384, "eval_steps_per_second": 53.464, "step": 696000 }, { "epoch": 14.42, "learning_rate": 2.897333333333333e-07, "loss": 2.0559, "step": 704000 }, { "epoch": 14.42, "eval_loss": 1.9136629104614258, "eval_runtime": 47.4852, "eval_samples_per_second": 865.574, "eval_steps_per_second": 54.101, "step": 704000 }, { "epoch": 14.59, "eval_loss": 1.9164540767669678, "eval_runtime": 46.9282, "eval_samples_per_second": 875.849, "eval_steps_per_second": 54.743, "step": 712000 }, { "epoch": 14.75, "learning_rate": 2.8699999999999996e-07, "loss": 2.0544, "step": 720000 }, { "epoch": 14.75, "eval_loss": 1.921919345855713, "eval_runtime": 50.0641, "eval_samples_per_second": 820.987, "eval_steps_per_second": 51.314, "step": 720000 }, { "epoch": 14.92, "eval_loss": 1.9200448989868164, "eval_runtime": 50.8477, "eval_samples_per_second": 808.336, "eval_steps_per_second": 50.523, "step": 728000 }, { "epoch": 15.08, "learning_rate": 2.8426666666666667e-07, "loss": 2.0493, "step": 736000 }, { "epoch": 15.08, "eval_loss": 1.924161434173584, "eval_runtime": 46.3633, "eval_samples_per_second": 886.519, "eval_steps_per_second": 55.41, "step": 736000 }, { "epoch": 15.24, "eval_loss": 1.9263921976089478, "eval_runtime": 48.7265, "eval_samples_per_second": 843.525, "eval_steps_per_second": 52.723, "step": 744000 }, { "epoch": 15.41, "learning_rate": 2.815333333333333e-07, "loss": 2.0538, "step": 752000 }, { "epoch": 15.41, "eval_loss": 1.9333122968673706, "eval_runtime": 48.0632, "eval_samples_per_second": 855.165, "eval_steps_per_second": 53.45, "step": 752000 }, { "epoch": 15.57, "eval_loss": 1.912643551826477, "eval_runtime": 46.2548, "eval_samples_per_second": 888.599, "eval_steps_per_second": 55.54, "step": 760000 }, { "epoch": 15.73, "learning_rate": 2.7880000000000003e-07, "loss": 2.0457, "step": 768000 }, { "epoch": 15.73, "eval_loss": 1.9022926092147827, "eval_runtime": 48.0637, "eval_samples_per_second": 855.157, "eval_steps_per_second": 53.45, "step": 768000 }, { "epoch": 15.9, "eval_loss": 1.9153952598571777, "eval_runtime": 47.4167, "eval_samples_per_second": 866.825, "eval_steps_per_second": 54.179, "step": 776000 }, { "epoch": 16.06, "learning_rate": 2.7606666666666664e-07, "loss": 2.0436, "step": 784000 }, { "epoch": 16.06, "eval_loss": 1.894668698310852, "eval_runtime": 47.3621, "eval_samples_per_second": 867.824, "eval_steps_per_second": 54.242, "step": 784000 }, { "epoch": 16.23, "eval_loss": 1.9197973012924194, "eval_runtime": 48.7318, "eval_samples_per_second": 843.432, "eval_steps_per_second": 52.717, "step": 792000 }, { "epoch": 16.39, "learning_rate": 2.733333333333333e-07, "loss": 2.0527, "step": 800000 }, { "epoch": 16.39, "eval_loss": 1.912539005279541, "eval_runtime": 47.1232, "eval_samples_per_second": 872.224, "eval_steps_per_second": 54.517, "step": 800000 }, { "epoch": 16.55, "eval_loss": 1.9129201173782349, "eval_runtime": 46.5511, "eval_samples_per_second": 882.944, "eval_steps_per_second": 55.187, "step": 808000 }, { "epoch": 16.72, "learning_rate": 2.706e-07, "loss": 2.0484, "step": 816000 }, { "epoch": 16.72, "eval_loss": 1.930238127708435, "eval_runtime": 48.6118, "eval_samples_per_second": 845.514, "eval_steps_per_second": 52.847, "step": 816000 }, { "epoch": 16.88, "eval_loss": 1.915164589881897, "eval_runtime": 47.3129, "eval_samples_per_second": 868.727, "eval_steps_per_second": 54.298, "step": 824000 }, { "epoch": 17.05, "learning_rate": 2.6786666666666666e-07, "loss": 2.0535, "step": 832000 }, { "epoch": 17.05, "eval_loss": 1.9222667217254639, "eval_runtime": 48.8011, "eval_samples_per_second": 842.235, "eval_steps_per_second": 52.642, "step": 832000 }, { "epoch": 17.21, "eval_loss": 1.9195363521575928, "eval_runtime": 47.9497, "eval_samples_per_second": 857.19, "eval_steps_per_second": 53.577, "step": 840000 }, { "epoch": 17.37, "learning_rate": 2.651333333333333e-07, "loss": 2.0516, "step": 848000 }, { "epoch": 17.37, "eval_loss": 1.9071532487869263, "eval_runtime": 46.9027, "eval_samples_per_second": 876.325, "eval_steps_per_second": 54.773, "step": 848000 }, { "epoch": 17.54, "eval_loss": 1.9210063219070435, "eval_runtime": 48.1848, "eval_samples_per_second": 853.007, "eval_steps_per_second": 53.316, "step": 856000 }, { "epoch": 17.7, "learning_rate": 2.624e-07, "loss": 2.0546, "step": 864000 }, { "epoch": 17.7, "eval_loss": 1.9014463424682617, "eval_runtime": 49.087, "eval_samples_per_second": 837.329, "eval_steps_per_second": 52.336, "step": 864000 }, { "epoch": 17.87, "eval_loss": 1.9129884243011475, "eval_runtime": 46.451, "eval_samples_per_second": 884.847, "eval_steps_per_second": 55.306, "step": 872000 }, { "epoch": 18.03, "learning_rate": 2.596666666666667e-07, "loss": 2.0439, "step": 880000 }, { "epoch": 18.03, "eval_loss": 1.9064290523529053, "eval_runtime": 48.2272, "eval_samples_per_second": 852.258, "eval_steps_per_second": 53.269, "step": 880000 }, { "epoch": 18.19, "eval_loss": 1.9166321754455566, "eval_runtime": 48.6696, "eval_samples_per_second": 844.511, "eval_steps_per_second": 52.785, "step": 888000 }, { "epoch": 18.36, "learning_rate": 2.5693333333333333e-07, "loss": 2.0391, "step": 896000 }, { "epoch": 18.36, "eval_loss": 1.930993676185608, "eval_runtime": 47.1469, "eval_samples_per_second": 871.786, "eval_steps_per_second": 54.489, "step": 896000 }, { "epoch": 18.52, "eval_loss": 1.9064345359802246, "eval_runtime": 45.9523, "eval_samples_per_second": 894.45, "eval_steps_per_second": 55.906, "step": 904000 }, { "epoch": 18.69, "learning_rate": 2.542e-07, "loss": 2.0568, "step": 912000 }, { "epoch": 18.69, "eval_loss": 1.9106640815734863, "eval_runtime": 47.4421, "eval_samples_per_second": 866.361, "eval_steps_per_second": 54.15, "step": 912000 }, { "epoch": 18.85, "eval_loss": 1.931686520576477, "eval_runtime": 47.8842, "eval_samples_per_second": 858.363, "eval_steps_per_second": 53.65, "step": 920000 }, { "epoch": 19.01, "learning_rate": 2.5146666666666664e-07, "loss": 2.047, "step": 928000 }, { "epoch": 19.01, "eval_loss": 1.9234881401062012, "eval_runtime": 47.1905, "eval_samples_per_second": 870.981, "eval_steps_per_second": 54.439, "step": 928000 }, { "epoch": 19.18, "eval_loss": 1.917264699935913, "eval_runtime": 48.2222, "eval_samples_per_second": 852.346, "eval_steps_per_second": 53.274, "step": 936000 }, { "epoch": 19.34, "learning_rate": 2.4873333333333335e-07, "loss": 2.0431, "step": 944000 }, { "epoch": 19.34, "eval_loss": 1.8946489095687866, "eval_runtime": 48.6788, "eval_samples_per_second": 844.352, "eval_steps_per_second": 52.775, "step": 944000 }, { "epoch": 19.5, "eval_loss": 1.9247366189956665, "eval_runtime": 47.3908, "eval_samples_per_second": 867.3, "eval_steps_per_second": 54.209, "step": 952000 }, { "epoch": 19.67, "learning_rate": 2.46e-07, "loss": 2.0444, "step": 960000 }, { "epoch": 19.67, "eval_loss": 1.9105546474456787, "eval_runtime": 47.3389, "eval_samples_per_second": 868.249, "eval_steps_per_second": 54.268, "step": 960000 }, { "epoch": 19.83, "eval_loss": 1.9022789001464844, "eval_runtime": 48.8131, "eval_samples_per_second": 842.027, "eval_steps_per_second": 52.629, "step": 968000 }, { "epoch": 20.0, "learning_rate": 2.4326666666666666e-07, "loss": 2.0465, "step": 976000 }, { "epoch": 20.0, "eval_loss": 1.9196343421936035, "eval_runtime": 47.9588, "eval_samples_per_second": 857.027, "eval_steps_per_second": 53.567, "step": 976000 }, { "epoch": 20.16, "eval_loss": 1.9111627340316772, "eval_runtime": 48.5638, "eval_samples_per_second": 846.35, "eval_steps_per_second": 52.899, "step": 984000 }, { "epoch": 20.32, "learning_rate": 2.405333333333333e-07, "loss": 2.0454, "step": 992000 }, { "epoch": 20.32, "eval_loss": 1.9045841693878174, "eval_runtime": 47.3858, "eval_samples_per_second": 867.39, "eval_steps_per_second": 54.215, "step": 992000 }, { "epoch": 20.49, "eval_loss": 1.9031916856765747, "eval_runtime": 47.9829, "eval_samples_per_second": 856.596, "eval_steps_per_second": 53.54, "step": 1000000 }, { "epoch": 20.65, "learning_rate": 2.3779999999999997e-07, "loss": 2.04, "step": 1008000 }, { "epoch": 20.65, "eval_loss": 1.9129735231399536, "eval_runtime": 48.8696, "eval_samples_per_second": 841.055, "eval_steps_per_second": 52.568, "step": 1008000 }, { "epoch": 20.82, "eval_loss": 1.922319769859314, "eval_runtime": 48.2743, "eval_samples_per_second": 851.427, "eval_steps_per_second": 53.217, "step": 1016000 }, { "epoch": 20.98, "learning_rate": 2.3506666666666668e-07, "loss": 2.0406, "step": 1024000 }, { "epoch": 20.98, "eval_loss": 1.9261207580566406, "eval_runtime": 48.256, "eval_samples_per_second": 851.749, "eval_steps_per_second": 53.237, "step": 1024000 }, { "epoch": 21.14, "eval_loss": 1.9014304876327515, "eval_runtime": 47.3123, "eval_samples_per_second": 868.737, "eval_steps_per_second": 54.299, "step": 1032000 }, { "epoch": 21.31, "learning_rate": 2.3233333333333334e-07, "loss": 2.0401, "step": 1040000 }, { "epoch": 21.31, "eval_loss": 1.905246615409851, "eval_runtime": 47.017, "eval_samples_per_second": 874.195, "eval_steps_per_second": 54.64, "step": 1040000 }, { "epoch": 21.47, "eval_loss": 1.9004502296447754, "eval_runtime": 47.6152, "eval_samples_per_second": 863.213, "eval_steps_per_second": 53.953, "step": 1048000 }, { "epoch": 21.64, "learning_rate": 2.2960000000000002e-07, "loss": 2.044, "step": 1056000 }, { "epoch": 21.64, "eval_loss": 1.9044198989868164, "eval_runtime": 48.1051, "eval_samples_per_second": 854.421, "eval_steps_per_second": 53.404, "step": 1056000 }, { "epoch": 21.8, "eval_loss": 1.9169893264770508, "eval_runtime": 49.4919, "eval_samples_per_second": 830.479, "eval_steps_per_second": 51.907, "step": 1064000 }, { "epoch": 21.96, "learning_rate": 2.2686666666666667e-07, "loss": 2.0401, "step": 1072000 }, { "epoch": 21.96, "eval_loss": 1.9103343486785889, "eval_runtime": 47.4547, "eval_samples_per_second": 866.132, "eval_steps_per_second": 54.136, "step": 1072000 }, { "epoch": 22.13, "eval_loss": 1.8970900774002075, "eval_runtime": 46.9831, "eval_samples_per_second": 874.825, "eval_steps_per_second": 54.679, "step": 1080000 }, { "epoch": 22.29, "learning_rate": 2.2413333333333333e-07, "loss": 2.0458, "step": 1088000 }, { "epoch": 22.29, "eval_loss": 1.9256750345230103, "eval_runtime": 49.8527, "eval_samples_per_second": 824.47, "eval_steps_per_second": 51.532, "step": 1088000 }, { "epoch": 22.45, "eval_loss": 1.9029202461242676, "eval_runtime": 48.7208, "eval_samples_per_second": 843.623, "eval_steps_per_second": 52.729, "step": 1096000 }, { "epoch": 22.62, "learning_rate": 2.214e-07, "loss": 2.0414, "step": 1104000 }, { "epoch": 22.62, "eval_loss": 1.915024995803833, "eval_runtime": 47.4381, "eval_samples_per_second": 866.434, "eval_steps_per_second": 54.155, "step": 1104000 }, { "epoch": 22.78, "eval_loss": 1.9123761653900146, "eval_runtime": 48.794, "eval_samples_per_second": 842.358, "eval_steps_per_second": 52.65, "step": 1112000 }, { "epoch": 22.95, "learning_rate": 2.1866666666666667e-07, "loss": 2.0419, "step": 1120000 }, { "epoch": 22.95, "eval_loss": 1.9029767513275146, "eval_runtime": 46.4756, "eval_samples_per_second": 884.378, "eval_steps_per_second": 55.276, "step": 1120000 }, { "epoch": 23.11, "eval_loss": 1.9144717454910278, "eval_runtime": 47.2249, "eval_samples_per_second": 870.345, "eval_steps_per_second": 54.399, "step": 1128000 }, { "epoch": 23.27, "learning_rate": 2.1593333333333332e-07, "loss": 2.0415, "step": 1136000 }, { "epoch": 23.27, "eval_loss": 1.9132155179977417, "eval_runtime": 49.2316, "eval_samples_per_second": 834.869, "eval_steps_per_second": 52.182, "step": 1136000 }, { "epoch": 23.44, "eval_loss": 1.9053661823272705, "eval_runtime": 47.1942, "eval_samples_per_second": 870.913, "eval_steps_per_second": 54.435, "step": 1144000 }, { "epoch": 23.6, "learning_rate": 2.132e-07, "loss": 2.0394, "step": 1152000 }, { "epoch": 23.6, "eval_loss": 1.915486216545105, "eval_runtime": 47.1108, "eval_samples_per_second": 872.454, "eval_steps_per_second": 54.531, "step": 1152000 }, { "epoch": 23.77, "eval_loss": 1.9147080183029175, "eval_runtime": 48.0401, "eval_samples_per_second": 855.578, "eval_steps_per_second": 53.476, "step": 1160000 }, { "epoch": 23.93, "learning_rate": 2.1046666666666666e-07, "loss": 2.0414, "step": 1168000 }, { "epoch": 23.93, "eval_loss": 1.9130446910858154, "eval_runtime": 47.5065, "eval_samples_per_second": 865.187, "eval_steps_per_second": 54.077, "step": 1168000 }, { "epoch": 24.09, "eval_loss": 1.9002370834350586, "eval_runtime": 46.3893, "eval_samples_per_second": 886.023, "eval_steps_per_second": 55.379, "step": 1176000 }, { "epoch": 24.26, "learning_rate": 2.0773333333333334e-07, "loss": 2.036, "step": 1184000 }, { "epoch": 24.26, "eval_loss": 1.899147391319275, "eval_runtime": 49.4488, "eval_samples_per_second": 831.204, "eval_steps_per_second": 51.953, "step": 1184000 }, { "epoch": 24.42, "eval_loss": 1.9202589988708496, "eval_runtime": 47.2347, "eval_samples_per_second": 870.165, "eval_steps_per_second": 54.388, "step": 1192000 }, { "epoch": 24.59, "learning_rate": 2.05e-07, "loss": 2.0393, "step": 1200000 }, { "epoch": 24.59, "eval_loss": 1.9327338933944702, "eval_runtime": 47.2461, "eval_samples_per_second": 869.956, "eval_steps_per_second": 54.375, "step": 1200000 }, { "epoch": 24.75, "eval_loss": 1.9098608493804932, "eval_runtime": 49.039, "eval_samples_per_second": 838.15, "eval_steps_per_second": 52.387, "step": 1208000 }, { "epoch": 24.91, "learning_rate": 2.0226666666666668e-07, "loss": 2.0375, "step": 1216000 }, { "epoch": 24.91, "eval_loss": 1.9097236394882202, "eval_runtime": 46.2815, "eval_samples_per_second": 888.088, "eval_steps_per_second": 55.508, "step": 1216000 }, { "epoch": 25.08, "eval_loss": 1.900553584098816, "eval_runtime": 47.0809, "eval_samples_per_second": 873.008, "eval_steps_per_second": 54.566, "step": 1224000 }, { "epoch": 25.24, "learning_rate": 1.9953333333333333e-07, "loss": 2.0384, "step": 1232000 }, { "epoch": 25.24, "eval_loss": 1.9062752723693848, "eval_runtime": 48.6722, "eval_samples_per_second": 844.466, "eval_steps_per_second": 52.782, "step": 1232000 }, { "epoch": 25.41, "eval_loss": 1.9056702852249146, "eval_runtime": 46.3936, "eval_samples_per_second": 885.941, "eval_steps_per_second": 55.374, "step": 1240000 }, { "epoch": 25.57, "learning_rate": 1.968e-07, "loss": 2.0392, "step": 1248000 }, { "epoch": 25.57, "eval_loss": 1.9037301540374756, "eval_runtime": 47.9627, "eval_samples_per_second": 856.958, "eval_steps_per_second": 53.563, "step": 1248000 }, { "epoch": 25.73, "eval_loss": 1.9013148546218872, "eval_runtime": 47.5129, "eval_samples_per_second": 865.07, "eval_steps_per_second": 54.07, "step": 1256000 }, { "epoch": 25.9, "learning_rate": 1.9406666666666667e-07, "loss": 2.0503, "step": 1264000 }, { "epoch": 25.9, "eval_loss": 1.903662085533142, "eval_runtime": 47.6911, "eval_samples_per_second": 861.838, "eval_steps_per_second": 53.867, "step": 1264000 }, { "epoch": 26.06, "eval_loss": 1.904166579246521, "eval_runtime": 51.6224, "eval_samples_per_second": 796.204, "eval_steps_per_second": 49.765, "step": 1272000 }, { "epoch": 26.22, "learning_rate": 1.9133333333333333e-07, "loss": 2.0418, "step": 1280000 }, { "epoch": 26.22, "eval_loss": 1.896562933921814, "eval_runtime": 47.6826, "eval_samples_per_second": 861.992, "eval_steps_per_second": 53.877, "step": 1280000 }, { "epoch": 26.39, "eval_loss": 1.9186903238296509, "eval_runtime": 49.1139, "eval_samples_per_second": 836.872, "eval_steps_per_second": 52.307, "step": 1288000 }, { "epoch": 26.55, "learning_rate": 1.886e-07, "loss": 2.0416, "step": 1296000 }, { "epoch": 26.55, "eval_loss": 1.9097530841827393, "eval_runtime": 48.6689, "eval_samples_per_second": 844.523, "eval_steps_per_second": 52.785, "step": 1296000 }, { "epoch": 26.72, "eval_loss": 1.9153447151184082, "eval_runtime": 49.0115, "eval_samples_per_second": 838.62, "eval_steps_per_second": 52.416, "step": 1304000 }, { "epoch": 26.88, "learning_rate": 1.8586666666666666e-07, "loss": 2.0396, "step": 1312000 }, { "epoch": 26.88, "eval_loss": 1.9163771867752075, "eval_runtime": 48.0083, "eval_samples_per_second": 856.144, "eval_steps_per_second": 53.512, "step": 1312000 }, { "epoch": 27.04, "eval_loss": 1.886674404144287, "eval_runtime": 48.4626, "eval_samples_per_second": 848.117, "eval_steps_per_second": 53.01, "step": 1320000 }, { "epoch": 27.21, "learning_rate": 1.8313333333333332e-07, "loss": 2.0397, "step": 1328000 }, { "epoch": 27.21, "eval_loss": 1.8968819379806519, "eval_runtime": 48.9216, "eval_samples_per_second": 840.161, "eval_steps_per_second": 52.513, "step": 1328000 }, { "epoch": 27.37, "eval_loss": 1.915518879890442, "eval_runtime": 46.9702, "eval_samples_per_second": 875.066, "eval_steps_per_second": 54.694, "step": 1336000 }, { "epoch": 27.54, "learning_rate": 1.804e-07, "loss": 2.0442, "step": 1344000 }, { "epoch": 27.54, "eval_loss": 1.9004348516464233, "eval_runtime": 47.3437, "eval_samples_per_second": 868.163, "eval_steps_per_second": 54.263, "step": 1344000 }, { "epoch": 27.7, "eval_loss": 1.9026644229888916, "eval_runtime": 47.3596, "eval_samples_per_second": 867.87, "eval_steps_per_second": 54.245, "step": 1352000 }, { "epoch": 27.86, "learning_rate": 1.7766666666666666e-07, "loss": 2.0332, "step": 1360000 }, { "epoch": 27.86, "eval_loss": 1.9095487594604492, "eval_runtime": 48.6586, "eval_samples_per_second": 844.702, "eval_steps_per_second": 52.796, "step": 1360000 }, { "epoch": 28.03, "eval_loss": 1.9133695363998413, "eval_runtime": 46.4591, "eval_samples_per_second": 884.693, "eval_steps_per_second": 55.296, "step": 1368000 }, { "epoch": 28.19, "learning_rate": 1.7493333333333334e-07, "loss": 2.0398, "step": 1376000 }, { "epoch": 28.19, "eval_loss": 1.9082748889923096, "eval_runtime": 48.6075, "eval_samples_per_second": 845.589, "eval_steps_per_second": 52.852, "step": 1376000 }, { "epoch": 28.36, "eval_loss": 1.9041306972503662, "eval_runtime": 47.5314, "eval_samples_per_second": 864.733, "eval_steps_per_second": 54.048, "step": 1384000 }, { "epoch": 28.52, "learning_rate": 1.722e-07, "loss": 2.0387, "step": 1392000 }, { "epoch": 28.52, "eval_loss": 1.8979713916778564, "eval_runtime": 47.145, "eval_samples_per_second": 871.821, "eval_steps_per_second": 54.491, "step": 1392000 }, { "epoch": 28.68, "eval_loss": 1.9209250211715698, "eval_runtime": 48.0076, "eval_samples_per_second": 856.156, "eval_steps_per_second": 53.512, "step": 1400000 }, { "epoch": 28.85, "learning_rate": 1.6946666666666668e-07, "loss": 2.0378, "step": 1408000 }, { "epoch": 28.85, "eval_loss": 1.896246314048767, "eval_runtime": 47.2348, "eval_samples_per_second": 870.163, "eval_steps_per_second": 54.388, "step": 1408000 }, { "epoch": 29.01, "eval_loss": 1.8981308937072754, "eval_runtime": 46.2446, "eval_samples_per_second": 888.796, "eval_steps_per_second": 55.552, "step": 1416000 }, { "epoch": 29.17, "learning_rate": 1.6673333333333333e-07, "loss": 2.0359, "step": 1424000 }, { "epoch": 29.17, "eval_loss": 1.9077860116958618, "eval_runtime": 47.7404, "eval_samples_per_second": 860.949, "eval_steps_per_second": 53.812, "step": 1424000 }, { "epoch": 29.34, "eval_loss": 1.8961683511734009, "eval_runtime": 48.4809, "eval_samples_per_second": 847.798, "eval_steps_per_second": 52.99, "step": 1432000 }, { "epoch": 29.5, "learning_rate": 1.64e-07, "loss": 2.0357, "step": 1440000 }, { "epoch": 29.5, "eval_loss": 1.8842545747756958, "eval_runtime": 46.6243, "eval_samples_per_second": 881.558, "eval_steps_per_second": 55.1, "step": 1440000 }, { "epoch": 29.67, "eval_loss": 1.9157154560089111, "eval_runtime": 47.0518, "eval_samples_per_second": 873.548, "eval_steps_per_second": 54.599, "step": 1448000 }, { "epoch": 29.83, "learning_rate": 1.6126666666666667e-07, "loss": 2.0367, "step": 1456000 }, { "epoch": 29.83, "eval_loss": 1.9277722835540771, "eval_runtime": 48.4635, "eval_samples_per_second": 848.102, "eval_steps_per_second": 53.009, "step": 1456000 }, { "epoch": 29.99, "eval_loss": 1.900920033454895, "eval_runtime": 48.4837, "eval_samples_per_second": 847.75, "eval_steps_per_second": 52.987, "step": 1464000 }, { "epoch": 30.16, "learning_rate": 1.5853333333333332e-07, "loss": 2.0442, "step": 1472000 }, { "epoch": 30.16, "eval_loss": 1.8968621492385864, "eval_runtime": 47.5534, "eval_samples_per_second": 864.334, "eval_steps_per_second": 54.024, "step": 1472000 }, { "epoch": 30.32, "eval_loss": 1.908553123474121, "eval_runtime": 49.61, "eval_samples_per_second": 828.502, "eval_steps_per_second": 51.784, "step": 1480000 }, { "epoch": 30.49, "learning_rate": 1.558e-07, "loss": 2.0401, "step": 1488000 }, { "epoch": 30.49, "eval_loss": 1.9059042930603027, "eval_runtime": 47.0678, "eval_samples_per_second": 873.252, "eval_steps_per_second": 54.581, "step": 1488000 }, { "epoch": 30.65, "eval_loss": 1.8996624946594238, "eval_runtime": 46.6534, "eval_samples_per_second": 881.007, "eval_steps_per_second": 55.066, "step": 1496000 }, { "epoch": 30.81, "learning_rate": 1.5306666666666666e-07, "loss": 2.0293, "step": 1504000 }, { "epoch": 30.81, "eval_loss": 1.9013988971710205, "eval_runtime": 48.769, "eval_samples_per_second": 842.79, "eval_steps_per_second": 52.677, "step": 1504000 }, { "epoch": 30.98, "eval_loss": 1.8971112966537476, "eval_runtime": 46.4191, "eval_samples_per_second": 885.455, "eval_steps_per_second": 55.344, "step": 1512000 }, { "epoch": 31.14, "learning_rate": 1.5033333333333332e-07, "loss": 2.035, "step": 1520000 }, { "epoch": 31.14, "eval_loss": 1.911407470703125, "eval_runtime": 48.2406, "eval_samples_per_second": 852.021, "eval_steps_per_second": 53.254, "step": 1520000 }, { "epoch": 31.31, "eval_loss": 1.9107669591903687, "eval_runtime": 48.5327, "eval_samples_per_second": 846.893, "eval_steps_per_second": 52.933, "step": 1528000 }, { "epoch": 31.47, "learning_rate": 1.476e-07, "loss": 2.0389, "step": 1536000 }, { "epoch": 31.47, "eval_loss": 1.897054672241211, "eval_runtime": 47.3694, "eval_samples_per_second": 867.69, "eval_steps_per_second": 54.233, "step": 1536000 }, { "epoch": 31.63, "eval_loss": 1.9081885814666748, "eval_runtime": 47.7941, "eval_samples_per_second": 859.98, "eval_steps_per_second": 53.751, "step": 1544000 }, { "epoch": 31.8, "learning_rate": 1.4486666666666665e-07, "loss": 2.0346, "step": 1552000 }, { "epoch": 31.8, "eval_loss": 1.9208694696426392, "eval_runtime": 47.5276, "eval_samples_per_second": 864.802, "eval_steps_per_second": 54.053, "step": 1552000 }, { "epoch": 31.96, "eval_loss": 1.9018235206604004, "eval_runtime": 48.2326, "eval_samples_per_second": 852.162, "eval_steps_per_second": 53.263, "step": 1560000 }, { "epoch": 32.13, "learning_rate": 1.4213333333333334e-07, "loss": 2.0428, "step": 1568000 }, { "epoch": 32.13, "eval_loss": 1.8987597227096558, "eval_runtime": 48.8575, "eval_samples_per_second": 841.263, "eval_steps_per_second": 52.581, "step": 1568000 }, { "epoch": 32.29, "eval_loss": 1.9089306592941284, "eval_runtime": 48.142, "eval_samples_per_second": 853.766, "eval_steps_per_second": 53.363, "step": 1576000 }, { "epoch": 32.45, "learning_rate": 1.3940000000000002e-07, "loss": 2.0286, "step": 1584000 }, { "epoch": 32.45, "eval_loss": 1.8982652425765991, "eval_runtime": 47.0839, "eval_samples_per_second": 872.953, "eval_steps_per_second": 54.562, "step": 1584000 }, { "epoch": 32.62, "eval_loss": 1.9029755592346191, "eval_runtime": 47.4849, "eval_samples_per_second": 865.581, "eval_steps_per_second": 54.101, "step": 1592000 }, { "epoch": 32.78, "learning_rate": 1.3666666666666665e-07, "loss": 2.037, "step": 1600000 }, { "epoch": 32.78, "eval_loss": 1.9041838645935059, "eval_runtime": 47.6583, "eval_samples_per_second": 862.431, "eval_steps_per_second": 53.905, "step": 1600000 }, { "epoch": 32.94, "eval_loss": 1.9021574258804321, "eval_runtime": 47.636, "eval_samples_per_second": 862.836, "eval_steps_per_second": 53.93, "step": 1608000 }, { "epoch": 33.11, "learning_rate": 1.3393333333333333e-07, "loss": 2.0348, "step": 1616000 }, { "epoch": 33.11, "eval_loss": 1.8987573385238647, "eval_runtime": 48.3458, "eval_samples_per_second": 850.167, "eval_steps_per_second": 53.138, "step": 1616000 }, { "epoch": 33.27, "eval_loss": 1.9159427881240845, "eval_runtime": 48.9467, "eval_samples_per_second": 839.73, "eval_steps_per_second": 52.486, "step": 1624000 }, { "epoch": 33.44, "learning_rate": 1.312e-07, "loss": 2.042, "step": 1632000 }, { "epoch": 33.44, "eval_loss": 1.8933523893356323, "eval_runtime": 48.1661, "eval_samples_per_second": 853.339, "eval_steps_per_second": 53.336, "step": 1632000 }, { "epoch": 33.6, "eval_loss": 1.890829086303711, "eval_runtime": 47.8234, "eval_samples_per_second": 859.454, "eval_steps_per_second": 53.719, "step": 1640000 }, { "epoch": 33.76, "learning_rate": 1.2846666666666667e-07, "loss": 2.0426, "step": 1648000 }, { "epoch": 33.76, "eval_loss": 1.88780677318573, "eval_runtime": 48.4427, "eval_samples_per_second": 848.466, "eval_steps_per_second": 53.032, "step": 1648000 }, { "epoch": 33.93, "eval_loss": 1.8881806135177612, "eval_runtime": 48.0347, "eval_samples_per_second": 855.674, "eval_steps_per_second": 53.482, "step": 1656000 }, { "epoch": 34.09, "learning_rate": 1.2573333333333332e-07, "loss": 2.0293, "step": 1664000 }, { "epoch": 34.09, "eval_loss": 1.9031046628952026, "eval_runtime": 48.0682, "eval_samples_per_second": 855.077, "eval_steps_per_second": 53.445, "step": 1664000 }, { "epoch": 34.26, "eval_loss": 1.9005811214447021, "eval_runtime": 48.7309, "eval_samples_per_second": 843.448, "eval_steps_per_second": 52.718, "step": 1672000 }, { "epoch": 34.42, "learning_rate": 1.23e-07, "loss": 2.0401, "step": 1680000 }, { "epoch": 34.42, "eval_loss": 1.9066253900527954, "eval_runtime": 52.1229, "eval_samples_per_second": 788.56, "eval_steps_per_second": 49.287, "step": 1680000 }, { "epoch": 34.58, "eval_loss": 1.8970048427581787, "eval_runtime": 48.6499, "eval_samples_per_second": 844.853, "eval_steps_per_second": 52.806, "step": 1688000 }, { "epoch": 34.75, "learning_rate": 1.2026666666666666e-07, "loss": 2.0315, "step": 1696000 }, { "epoch": 34.75, "eval_loss": 1.895332932472229, "eval_runtime": 47.3522, "eval_samples_per_second": 868.006, "eval_steps_per_second": 54.253, "step": 1696000 }, { "epoch": 34.91, "eval_loss": 1.9029524326324463, "eval_runtime": 48.1027, "eval_samples_per_second": 854.464, "eval_steps_per_second": 53.407, "step": 1704000 }, { "epoch": 35.08, "learning_rate": 1.1753333333333334e-07, "loss": 2.0393, "step": 1712000 }, { "epoch": 35.08, "eval_loss": 1.908921480178833, "eval_runtime": 49.5777, "eval_samples_per_second": 829.043, "eval_steps_per_second": 51.818, "step": 1712000 }, { "epoch": 35.24, "eval_loss": 1.9037113189697266, "eval_runtime": 46.6983, "eval_samples_per_second": 880.16, "eval_steps_per_second": 55.013, "step": 1720000 }, { "epoch": 35.4, "learning_rate": 1.1480000000000001e-07, "loss": 2.0422, "step": 1728000 }, { "epoch": 35.4, "eval_loss": 1.8991347551345825, "eval_runtime": 50.2373, "eval_samples_per_second": 818.157, "eval_steps_per_second": 51.137, "step": 1728000 }, { "epoch": 35.57, "eval_loss": 1.8825979232788086, "eval_runtime": 48.4051, "eval_samples_per_second": 849.125, "eval_steps_per_second": 53.073, "step": 1736000 }, { "epoch": 35.73, "learning_rate": 1.1206666666666666e-07, "loss": 2.0307, "step": 1744000 }, { "epoch": 35.73, "eval_loss": 1.9027307033538818, "eval_runtime": 48.1077, "eval_samples_per_second": 854.375, "eval_steps_per_second": 53.401, "step": 1744000 }, { "epoch": 35.9, "eval_loss": 1.9172565937042236, "eval_runtime": 52.2685, "eval_samples_per_second": 786.363, "eval_steps_per_second": 49.15, "step": 1752000 }, { "epoch": 36.06, "learning_rate": 1.0933333333333333e-07, "loss": 2.0414, "step": 1760000 }, { "epoch": 36.06, "eval_loss": 1.9065029621124268, "eval_runtime": 46.9419, "eval_samples_per_second": 875.593, "eval_steps_per_second": 54.727, "step": 1760000 }, { "epoch": 36.22, "eval_loss": 1.8984663486480713, "eval_runtime": 46.6191, "eval_samples_per_second": 881.656, "eval_steps_per_second": 55.106, "step": 1768000 }, { "epoch": 36.39, "learning_rate": 1.066e-07, "loss": 2.0393, "step": 1776000 }, { "epoch": 36.39, "eval_loss": 1.884873628616333, "eval_runtime": 48.406, "eval_samples_per_second": 849.11, "eval_steps_per_second": 53.072, "step": 1776000 }, { "epoch": 36.55, "eval_loss": 1.8944772481918335, "eval_runtime": 47.82, "eval_samples_per_second": 859.514, "eval_steps_per_second": 53.722, "step": 1784000 }, { "epoch": 36.71, "learning_rate": 1.0386666666666667e-07, "loss": 2.0302, "step": 1792000 }, { "epoch": 36.71, "eval_loss": 1.8962441682815552, "eval_runtime": 47.8175, "eval_samples_per_second": 859.56, "eval_steps_per_second": 53.725, "step": 1792000 }, { "epoch": 36.88, "eval_loss": 1.901509165763855, "eval_runtime": 48.8399, "eval_samples_per_second": 841.566, "eval_steps_per_second": 52.6, "step": 1800000 }, { "epoch": 37.04, "learning_rate": 1.0113333333333334e-07, "loss": 2.0344, "step": 1808000 }, { "epoch": 37.04, "eval_loss": 1.8923826217651367, "eval_runtime": 48.2438, "eval_samples_per_second": 851.964, "eval_steps_per_second": 53.25, "step": 1808000 }, { "epoch": 37.21, "eval_loss": 1.8804473876953125, "eval_runtime": 46.9418, "eval_samples_per_second": 875.595, "eval_steps_per_second": 54.727, "step": 1816000 }, { "epoch": 37.37, "learning_rate": 9.84e-08, "loss": 2.0303, "step": 1824000 }, { "epoch": 37.37, "eval_loss": 1.894727349281311, "eval_runtime": 49.8927, "eval_samples_per_second": 823.808, "eval_steps_per_second": 51.491, "step": 1824000 }, { "epoch": 37.53, "eval_loss": 1.891680121421814, "eval_runtime": 47.928, "eval_samples_per_second": 857.579, "eval_steps_per_second": 53.601, "step": 1832000 }, { "epoch": 37.7, "learning_rate": 9.566666666666666e-08, "loss": 2.0318, "step": 1840000 }, { "epoch": 37.7, "eval_loss": 1.8992102146148682, "eval_runtime": 46.9913, "eval_samples_per_second": 874.673, "eval_steps_per_second": 54.67, "step": 1840000 }, { "epoch": 37.86, "eval_loss": 1.901772379875183, "eval_runtime": 48.5091, "eval_samples_per_second": 847.306, "eval_steps_per_second": 52.959, "step": 1848000 }, { "epoch": 38.03, "learning_rate": 9.293333333333333e-08, "loss": 2.039, "step": 1856000 }, { "epoch": 38.03, "eval_loss": 1.8903098106384277, "eval_runtime": 49.839, "eval_samples_per_second": 824.695, "eval_steps_per_second": 51.546, "step": 1856000 }, { "epoch": 38.19, "eval_loss": 1.9088859558105469, "eval_runtime": 49.3732, "eval_samples_per_second": 832.477, "eval_steps_per_second": 52.032, "step": 1864000 }, { "epoch": 38.35, "learning_rate": 9.02e-08, "loss": 2.0319, "step": 1872000 }, { "epoch": 38.35, "eval_loss": 1.9092743396759033, "eval_runtime": 47.4891, "eval_samples_per_second": 865.505, "eval_steps_per_second": 54.097, "step": 1872000 }, { "epoch": 38.52, "eval_loss": 1.895071268081665, "eval_runtime": 50.6752, "eval_samples_per_second": 811.087, "eval_steps_per_second": 50.695, "step": 1880000 }, { "epoch": 38.68, "learning_rate": 8.746666666666667e-08, "loss": 2.0359, "step": 1888000 }, { "epoch": 38.68, "eval_loss": 1.8970472812652588, "eval_runtime": 47.92, "eval_samples_per_second": 857.721, "eval_steps_per_second": 53.61, "step": 1888000 }, { "epoch": 38.85, "eval_loss": 1.89948308467865, "eval_runtime": 49.1024, "eval_samples_per_second": 837.067, "eval_steps_per_second": 52.319, "step": 1896000 }, { "epoch": 39.01, "learning_rate": 8.473333333333334e-08, "loss": 2.0353, "step": 1904000 }, { "epoch": 39.01, "eval_loss": 1.8887970447540283, "eval_runtime": 49.8221, "eval_samples_per_second": 824.975, "eval_steps_per_second": 51.563, "step": 1904000 }, { "epoch": 39.17, "eval_loss": 1.914867877960205, "eval_runtime": 47.1415, "eval_samples_per_second": 871.885, "eval_steps_per_second": 54.495, "step": 1912000 }, { "epoch": 39.34, "learning_rate": 8.2e-08, "loss": 2.0343, "step": 1920000 }, { "epoch": 39.34, "eval_loss": 1.8881230354309082, "eval_runtime": 48.2273, "eval_samples_per_second": 852.256, "eval_steps_per_second": 53.269, "step": 1920000 }, { "epoch": 39.5, "eval_loss": 1.8935189247131348, "eval_runtime": 49.8823, "eval_samples_per_second": 823.979, "eval_steps_per_second": 51.501, "step": 1928000 }, { "epoch": 39.66, "learning_rate": 7.926666666666666e-08, "loss": 2.0395, "step": 1936000 }, { "epoch": 39.66, "eval_loss": 1.8938074111938477, "eval_runtime": 49.2266, "eval_samples_per_second": 834.956, "eval_steps_per_second": 52.187, "step": 1936000 }, { "epoch": 39.83, "eval_loss": 1.8929249048233032, "eval_runtime": 47.6917, "eval_samples_per_second": 861.827, "eval_steps_per_second": 53.867, "step": 1944000 }, { "epoch": 39.99, "learning_rate": 7.653333333333333e-08, "loss": 2.0316, "step": 1952000 }, { "epoch": 39.99, "eval_loss": 1.9186030626296997, "eval_runtime": 49.8739, "eval_samples_per_second": 824.118, "eval_steps_per_second": 51.51, "step": 1952000 }, { "epoch": 40.16, "eval_loss": 1.9189401865005493, "eval_runtime": 50.0489, "eval_samples_per_second": 821.238, "eval_steps_per_second": 51.33, "step": 1960000 }, { "epoch": 40.32, "learning_rate": 7.38e-08, "loss": 2.0302, "step": 1968000 }, { "epoch": 40.32, "eval_loss": 1.9124609231948853, "eval_runtime": 52.3286, "eval_samples_per_second": 785.459, "eval_steps_per_second": 49.094, "step": 1968000 }, { "epoch": 40.48, "eval_loss": 1.9077569246292114, "eval_runtime": 48.6653, "eval_samples_per_second": 844.585, "eval_steps_per_second": 52.789, "step": 1976000 }, { "epoch": 40.65, "learning_rate": 7.106666666666667e-08, "loss": 2.0355, "step": 1984000 }, { "epoch": 40.65, "eval_loss": 1.8975083827972412, "eval_runtime": 48.5464, "eval_samples_per_second": 846.654, "eval_steps_per_second": 52.918, "step": 1984000 }, { "epoch": 40.81, "eval_loss": 1.892892837524414, "eval_runtime": 49.649, "eval_samples_per_second": 827.852, "eval_steps_per_second": 51.743, "step": 1992000 }, { "epoch": 40.98, "learning_rate": 6.833333333333332e-08, "loss": 2.0332, "step": 2000000 }, { "epoch": 40.98, "eval_loss": 1.8898950815200806, "eval_runtime": 48.8626, "eval_samples_per_second": 841.175, "eval_steps_per_second": 52.576, "step": 2000000 }, { "epoch": 41.14, "eval_loss": 1.9043201208114624, "eval_runtime": 49.3883, "eval_samples_per_second": 832.221, "eval_steps_per_second": 52.016, "step": 2008000 }, { "epoch": 41.3, "learning_rate": 6.56e-08, "loss": 2.0327, "step": 2016000 }, { "epoch": 41.3, "eval_loss": 1.9086461067199707, "eval_runtime": 48.958, "eval_samples_per_second": 839.535, "eval_steps_per_second": 52.474, "step": 2016000 }, { "epoch": 41.47, "eval_loss": 1.894409418106079, "eval_runtime": 50.95, "eval_samples_per_second": 806.713, "eval_steps_per_second": 50.422, "step": 2024000 }, { "epoch": 41.63, "learning_rate": 6.286666666666666e-08, "loss": 2.0414, "step": 2032000 }, { "epoch": 41.63, "eval_loss": 1.9029189348220825, "eval_runtime": 48.6407, "eval_samples_per_second": 845.013, "eval_steps_per_second": 52.816, "step": 2032000 }, { "epoch": 41.8, "eval_loss": 1.8990041017532349, "eval_runtime": 48.9601, "eval_samples_per_second": 839.5, "eval_steps_per_second": 52.471, "step": 2040000 }, { "epoch": 41.96, "learning_rate": 6.013333333333333e-08, "loss": 2.0327, "step": 2048000 }, { "epoch": 41.96, "eval_loss": 1.9174869060516357, "eval_runtime": 47.4446, "eval_samples_per_second": 866.315, "eval_steps_per_second": 54.147, "step": 2048000 }, { "epoch": 42.12, "eval_loss": 1.88877534866333, "eval_runtime": 48.6939, "eval_samples_per_second": 844.09, "eval_steps_per_second": 52.758, "step": 2056000 }, { "epoch": 42.29, "learning_rate": 5.7400000000000004e-08, "loss": 2.0428, "step": 2064000 }, { "epoch": 42.29, "eval_loss": 1.8971478939056396, "eval_runtime": 47.9266, "eval_samples_per_second": 857.603, "eval_steps_per_second": 53.603, "step": 2064000 }, { "epoch": 42.45, "eval_loss": 1.90713369846344, "eval_runtime": 49.0676, "eval_samples_per_second": 837.661, "eval_steps_per_second": 52.356, "step": 2072000 }, { "epoch": 42.62, "learning_rate": 5.4666666666666666e-08, "loss": 2.0337, "step": 2080000 }, { "epoch": 42.62, "eval_loss": 1.8967806100845337, "eval_runtime": 50.5362, "eval_samples_per_second": 813.318, "eval_steps_per_second": 50.835, "step": 2080000 }, { "epoch": 42.78, "eval_loss": 1.9059593677520752, "eval_runtime": 48.8076, "eval_samples_per_second": 842.122, "eval_steps_per_second": 52.635, "step": 2088000 }, { "epoch": 42.94, "learning_rate": 5.1933333333333335e-08, "loss": 2.0394, "step": 2096000 }, { "epoch": 42.94, "eval_loss": 1.902763843536377, "eval_runtime": 48.599, "eval_samples_per_second": 845.737, "eval_steps_per_second": 52.861, "step": 2096000 }, { "epoch": 43.11, "eval_loss": 1.8916599750518799, "eval_runtime": 50.6067, "eval_samples_per_second": 812.185, "eval_steps_per_second": 50.764, "step": 2104000 }, { "epoch": 43.27, "learning_rate": 4.92e-08, "loss": 2.0314, "step": 2112000 }, { "epoch": 43.27, "eval_loss": 1.8868290185928345, "eval_runtime": 50.3637, "eval_samples_per_second": 816.103, "eval_steps_per_second": 51.009, "step": 2112000 }, { "epoch": 43.43, "eval_loss": 1.9086658954620361, "eval_runtime": 47.3253, "eval_samples_per_second": 868.499, "eval_steps_per_second": 54.284, "step": 2120000 }, { "epoch": 43.6, "learning_rate": 4.6466666666666666e-08, "loss": 2.0368, "step": 2128000 }, { "epoch": 43.6, "eval_loss": 1.8951292037963867, "eval_runtime": 48.361, "eval_samples_per_second": 849.9, "eval_steps_per_second": 53.121, "step": 2128000 }, { "epoch": 43.76, "eval_loss": 1.893837809562683, "eval_runtime": 47.3135, "eval_samples_per_second": 868.717, "eval_steps_per_second": 54.297, "step": 2136000 }, { "epoch": 43.93, "learning_rate": 4.3733333333333335e-08, "loss": 2.0298, "step": 2144000 }, { "epoch": 43.93, "eval_loss": 1.8874099254608154, "eval_runtime": 47.3514, "eval_samples_per_second": 868.022, "eval_steps_per_second": 54.254, "step": 2144000 }, { "epoch": 44.09, "eval_loss": 1.90646493434906, "eval_runtime": 49.5581, "eval_samples_per_second": 829.37, "eval_steps_per_second": 51.838, "step": 2152000 }, { "epoch": 44.25, "learning_rate": 4.1e-08, "loss": 2.0353, "step": 2160000 }, { "epoch": 44.25, "eval_loss": 1.9097111225128174, "eval_runtime": 46.9959, "eval_samples_per_second": 874.587, "eval_steps_per_second": 54.664, "step": 2160000 }, { "epoch": 44.42, "eval_loss": 1.8985484838485718, "eval_runtime": 48.6536, "eval_samples_per_second": 844.788, "eval_steps_per_second": 52.802, "step": 2168000 }, { "epoch": 44.58, "learning_rate": 3.8266666666666665e-08, "loss": 2.0324, "step": 2176000 }, { "epoch": 44.58, "eval_loss": 1.9160007238388062, "eval_runtime": 49.8464, "eval_samples_per_second": 824.574, "eval_steps_per_second": 51.538, "step": 2176000 }, { "epoch": 44.75, "eval_loss": 1.9059538841247559, "eval_runtime": 49.9186, "eval_samples_per_second": 823.38, "eval_steps_per_second": 51.464, "step": 2184000 }, { "epoch": 44.91, "learning_rate": 3.5533333333333334e-08, "loss": 2.0316, "step": 2192000 }, { "epoch": 44.91, "eval_loss": 1.8911948204040527, "eval_runtime": 47.2346, "eval_samples_per_second": 870.167, "eval_steps_per_second": 54.388, "step": 2192000 }, { "epoch": 45.07, "eval_loss": 1.9013826847076416, "eval_runtime": 47.9929, "eval_samples_per_second": 856.419, "eval_steps_per_second": 53.529, "step": 2200000 }, { "epoch": 45.24, "learning_rate": 3.28e-08, "loss": 2.0322, "step": 2208000 }, { "epoch": 45.24, "eval_loss": 1.903083324432373, "eval_runtime": 47.6274, "eval_samples_per_second": 862.991, "eval_steps_per_second": 53.94, "step": 2208000 }, { "epoch": 45.4, "eval_loss": 1.9086343050003052, "eval_runtime": 47.838, "eval_samples_per_second": 859.191, "eval_steps_per_second": 53.702, "step": 2216000 }, { "epoch": 45.57, "learning_rate": 3.0066666666666665e-08, "loss": 2.035, "step": 2224000 }, { "epoch": 45.57, "eval_loss": 1.9147051572799683, "eval_runtime": 47.9056, "eval_samples_per_second": 857.978, "eval_steps_per_second": 53.626, "step": 2224000 }, { "epoch": 45.73, "eval_loss": 1.900397777557373, "eval_runtime": 48.7638, "eval_samples_per_second": 842.879, "eval_steps_per_second": 52.682, "step": 2232000 }, { "epoch": 45.89, "learning_rate": 2.7333333333333333e-08, "loss": 2.0431, "step": 2240000 }, { "epoch": 45.89, "eval_loss": 1.9027053117752075, "eval_runtime": 47.5248, "eval_samples_per_second": 864.854, "eval_steps_per_second": 54.056, "step": 2240000 }, { "epoch": 46.06, "eval_loss": 1.891597032546997, "eval_runtime": 48.2801, "eval_samples_per_second": 851.323, "eval_steps_per_second": 53.21, "step": 2248000 }, { "epoch": 46.22, "learning_rate": 2.46e-08, "loss": 2.0347, "step": 2256000 }, { "epoch": 46.22, "eval_loss": 1.9048635959625244, "eval_runtime": 48.7746, "eval_samples_per_second": 842.693, "eval_steps_per_second": 52.671, "step": 2256000 }, { "epoch": 46.38, "eval_loss": 1.8972582817077637, "eval_runtime": 48.3007, "eval_samples_per_second": 850.96, "eval_steps_per_second": 53.188, "step": 2264000 }, { "epoch": 46.55, "learning_rate": 2.1866666666666667e-08, "loss": 2.0353, "step": 2272000 }, { "epoch": 46.55, "eval_loss": 1.8931912183761597, "eval_runtime": 49.2437, "eval_samples_per_second": 834.665, "eval_steps_per_second": 52.169, "step": 2272000 }, { "epoch": 46.71, "eval_loss": 1.912061333656311, "eval_runtime": 48.3695, "eval_samples_per_second": 849.75, "eval_steps_per_second": 53.112, "step": 2280000 }, { "epoch": 46.88, "learning_rate": 1.9133333333333333e-08, "loss": 2.0309, "step": 2288000 }, { "epoch": 46.88, "eval_loss": 1.8972575664520264, "eval_runtime": 48.673, "eval_samples_per_second": 844.452, "eval_steps_per_second": 52.781, "step": 2288000 }, { "epoch": 47.04, "eval_loss": 1.8985596895217896, "eval_runtime": 50.0454, "eval_samples_per_second": 821.294, "eval_steps_per_second": 51.333, "step": 2296000 }, { "epoch": 47.2, "learning_rate": 1.64e-08, "loss": 2.0359, "step": 2304000 }, { "epoch": 47.2, "eval_loss": 1.9202120304107666, "eval_runtime": 49.0939, "eval_samples_per_second": 837.212, "eval_steps_per_second": 52.328, "step": 2304000 }, { "epoch": 47.37, "eval_loss": 1.8778069019317627, "eval_runtime": 48.2597, "eval_samples_per_second": 851.683, "eval_steps_per_second": 53.233, "step": 2312000 }, { "epoch": 47.53, "learning_rate": 1.3666666666666667e-08, "loss": 2.037, "step": 2320000 }, { "epoch": 47.53, "eval_loss": 1.9136707782745361, "eval_runtime": 48.2191, "eval_samples_per_second": 852.401, "eval_steps_per_second": 53.278, "step": 2320000 }, { "epoch": 47.7, "eval_loss": 1.9095231294631958, "eval_runtime": 49.9332, "eval_samples_per_second": 823.14, "eval_steps_per_second": 51.449, "step": 2328000 }, { "epoch": 47.86, "learning_rate": 1.0933333333333334e-08, "loss": 2.0306, "step": 2336000 }, { "epoch": 47.86, "eval_loss": 1.903990626335144, "eval_runtime": 47.1892, "eval_samples_per_second": 871.005, "eval_steps_per_second": 54.44, "step": 2336000 }, { "epoch": 48.02, "eval_loss": 1.8931529521942139, "eval_runtime": 47.2904, "eval_samples_per_second": 869.141, "eval_steps_per_second": 54.324, "step": 2344000 }, { "epoch": 48.19, "learning_rate": 8.2e-09, "loss": 2.0342, "step": 2352000 }, { "epoch": 48.19, "eval_loss": 1.9151569604873657, "eval_runtime": 50.819, "eval_samples_per_second": 808.791, "eval_steps_per_second": 50.552, "step": 2352000 }, { "epoch": 48.35, "eval_loss": 1.9058945178985596, "eval_runtime": 47.9047, "eval_samples_per_second": 857.995, "eval_steps_per_second": 53.627, "step": 2360000 }, { "epoch": 48.52, "learning_rate": 5.466666666666667e-09, "loss": 2.0457, "step": 2368000 }, { "epoch": 48.52, "eval_loss": 1.8844729661941528, "eval_runtime": 47.9335, "eval_samples_per_second": 857.479, "eval_steps_per_second": 53.595, "step": 2368000 }, { "epoch": 48.68, "eval_loss": 1.903971791267395, "eval_runtime": 49.7448, "eval_samples_per_second": 826.257, "eval_steps_per_second": 51.644, "step": 2376000 }, { "epoch": 48.84, "learning_rate": 2.7333333333333334e-09, "loss": 2.0349, "step": 2384000 }, { "epoch": 48.84, "eval_loss": 1.9000164270401, "eval_runtime": 48.2699, "eval_samples_per_second": 851.504, "eval_steps_per_second": 53.222, "step": 2384000 }, { "epoch": 49.01, "eval_loss": 1.905893087387085, "eval_runtime": 48.9997, "eval_samples_per_second": 838.822, "eval_steps_per_second": 52.429, "step": 2392000 }, { "epoch": 49.17, "learning_rate": 0.0, "loss": 2.0322, "step": 2400000 }, { "epoch": 49.17, "eval_loss": 1.8925877809524536, "eval_runtime": 49.4417, "eval_samples_per_second": 831.323, "eval_steps_per_second": 51.96, "step": 2400000 }, { "epoch": 49.17, "step": 2400000, "total_flos": 7.48474678802484e+17, "train_loss": 2.0531104736328123, "train_runtime": 167230.1572, "train_samples_per_second": 229.624, "train_steps_per_second": 14.351 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 50, "save_steps": 32000, "total_flos": 7.48474678802484e+17, "trial_name": null, "trial_params": null }