{ "best_metric": 2.555936574935913, "best_model_checkpoint": "./model_tweets_2020_Q2_50/checkpoint-160000", "epoch": 10.105263157894736, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "eval_loss": 2.664475917816162, "eval_runtime": 220.2299, "eval_samples_per_second": 908.142, "eval_steps_per_second": 56.759, "step": 8000 }, { "epoch": 0.07, "learning_rate": 9.939131159843243e-06, "loss": 2.8656, "step": 16000 }, { "epoch": 0.07, "eval_loss": 2.6464931964874268, "eval_runtime": 221.9882, "eval_samples_per_second": 900.949, "eval_steps_per_second": 56.309, "step": 16000 }, { "epoch": 0.1, "eval_loss": 2.6185896396636963, "eval_runtime": 220.2227, "eval_samples_per_second": 908.172, "eval_steps_per_second": 56.761, "step": 24000 }, { "epoch": 0.13, "learning_rate": 9.872425581589261e-06, "loss": 2.7946, "step": 32000 }, { "epoch": 0.13, "eval_loss": 2.6234936714172363, "eval_runtime": 220.6677, "eval_samples_per_second": 906.34, "eval_steps_per_second": 56.646, "step": 32000 }, { "epoch": 0.17, "eval_loss": 2.615138292312622, "eval_runtime": 221.7491, "eval_samples_per_second": 901.92, "eval_steps_per_second": 56.37, "step": 40000 }, { "epoch": 0.2, "learning_rate": 9.80572000333528e-06, "loss": 2.7911, "step": 48000 }, { "epoch": 0.2, "eval_loss": 2.6128268241882324, "eval_runtime": 221.3475, "eval_samples_per_second": 903.557, "eval_steps_per_second": 56.472, "step": 48000 }, { "epoch": 0.24, "eval_loss": 2.6009654998779297, "eval_runtime": 221.2039, "eval_samples_per_second": 904.143, "eval_steps_per_second": 56.509, "step": 56000 }, { "epoch": 0.27, "learning_rate": 9.739014425081299e-06, "loss": 2.7898, "step": 64000 }, { "epoch": 0.27, "eval_loss": 2.614436149597168, "eval_runtime": 221.9246, "eval_samples_per_second": 901.207, "eval_steps_per_second": 56.325, "step": 64000 }, { "epoch": 0.3, "eval_loss": 2.597571611404419, "eval_runtime": 222.8659, "eval_samples_per_second": 897.401, "eval_steps_per_second": 56.088, "step": 72000 }, { "epoch": 0.34, "learning_rate": 9.672308846827316e-06, "loss": 2.7791, "step": 80000 }, { "epoch": 0.34, "eval_loss": 2.6006274223327637, "eval_runtime": 221.9145, "eval_samples_per_second": 901.248, "eval_steps_per_second": 56.328, "step": 80000 }, { "epoch": 0.37, "eval_loss": 2.5888915061950684, "eval_runtime": 223.9796, "eval_samples_per_second": 892.938, "eval_steps_per_second": 55.809, "step": 88000 }, { "epoch": 0.4, "learning_rate": 9.605603268573334e-06, "loss": 2.7776, "step": 96000 }, { "epoch": 0.4, "eval_loss": 2.5888023376464844, "eval_runtime": 221.625, "eval_samples_per_second": 902.425, "eval_steps_per_second": 56.402, "step": 96000 }, { "epoch": 0.44, "eval_loss": 2.584191083908081, "eval_runtime": 222.5849, "eval_samples_per_second": 898.533, "eval_steps_per_second": 56.158, "step": 104000 }, { "epoch": 0.47, "learning_rate": 9.538897690319354e-06, "loss": 2.7702, "step": 112000 }, { "epoch": 0.47, "eval_loss": 2.5760483741760254, "eval_runtime": 222.9149, "eval_samples_per_second": 897.203, "eval_steps_per_second": 56.075, "step": 112000 }, { "epoch": 0.51, "eval_loss": 2.5719943046569824, "eval_runtime": 220.2346, "eval_samples_per_second": 908.123, "eval_steps_per_second": 56.758, "step": 120000 }, { "epoch": 0.54, "learning_rate": 9.472192112065373e-06, "loss": 2.7661, "step": 128000 }, { "epoch": 0.54, "eval_loss": 2.5709779262542725, "eval_runtime": 221.9288, "eval_samples_per_second": 901.19, "eval_steps_per_second": 56.324, "step": 128000 }, { "epoch": 0.57, "eval_loss": 2.56732177734375, "eval_runtime": 222.0037, "eval_samples_per_second": 900.886, "eval_steps_per_second": 56.305, "step": 136000 }, { "epoch": 0.61, "learning_rate": 9.405486533811392e-06, "loss": 2.7609, "step": 144000 }, { "epoch": 0.61, "eval_loss": 2.5692689418792725, "eval_runtime": 221.4862, "eval_samples_per_second": 902.991, "eval_steps_per_second": 56.437, "step": 144000 }, { "epoch": 0.64, "eval_loss": 2.5623199939727783, "eval_runtime": 222.634, "eval_samples_per_second": 898.335, "eval_steps_per_second": 56.146, "step": 152000 }, { "epoch": 0.67, "learning_rate": 9.338780955557409e-06, "loss": 2.7557, "step": 160000 }, { "epoch": 0.67, "eval_loss": 2.555936574935913, "eval_runtime": 222.9827, "eval_samples_per_second": 896.93, "eval_steps_per_second": 56.058, "step": 160000 }, { "epoch": 0.71, "eval_loss": 2.564979314804077, "eval_runtime": 223.4268, "eval_samples_per_second": 895.148, "eval_steps_per_second": 55.947, "step": 168000 }, { "epoch": 0.74, "learning_rate": 9.272075377303427e-06, "loss": 2.7584, "step": 176000 }, { "epoch": 0.74, "eval_loss": 2.558361053466797, "eval_runtime": 223.2855, "eval_samples_per_second": 895.714, "eval_steps_per_second": 55.982, "step": 176000 }, { "epoch": 0.77, "eval_loss": 2.5590734481811523, "eval_runtime": 224.766, "eval_samples_per_second": 889.814, "eval_steps_per_second": 55.613, "step": 184000 }, { "epoch": 0.81, "learning_rate": 9.205369799049446e-06, "loss": 2.7619, "step": 192000 }, { "epoch": 0.81, "eval_loss": 2.5597450733184814, "eval_runtime": 223.3408, "eval_samples_per_second": 895.492, "eval_steps_per_second": 55.968, "step": 192000 }, { "epoch": 0.84, "eval_loss": 2.564985990524292, "eval_runtime": 222.3245, "eval_samples_per_second": 899.586, "eval_steps_per_second": 56.224, "step": 200000 }, { "epoch": 0.88, "learning_rate": 9.138664220795464e-06, "loss": 2.7678, "step": 208000 }, { "epoch": 0.88, "eval_loss": 2.572838306427002, "eval_runtime": 222.9177, "eval_samples_per_second": 897.192, "eval_steps_per_second": 56.074, "step": 208000 }, { "epoch": 0.91, "eval_loss": 2.571180820465088, "eval_runtime": 222.4106, "eval_samples_per_second": 899.238, "eval_steps_per_second": 56.202, "step": 216000 }, { "epoch": 0.94, "learning_rate": 9.071958642541483e-06, "loss": 2.7735, "step": 224000 }, { "epoch": 0.94, "eval_loss": 2.5728507041931152, "eval_runtime": 221.0881, "eval_samples_per_second": 904.617, "eval_steps_per_second": 56.539, "step": 224000 }, { "epoch": 0.98, "eval_loss": 2.5754916667938232, "eval_runtime": 224.4187, "eval_samples_per_second": 891.191, "eval_steps_per_second": 55.699, "step": 232000 }, { "epoch": 1.01, "learning_rate": 9.005253064287502e-06, "loss": 2.777, "step": 240000 }, { "epoch": 1.01, "eval_loss": 2.571467876434326, "eval_runtime": 223.3627, "eval_samples_per_second": 895.405, "eval_steps_per_second": 55.963, "step": 240000 }, { "epoch": 1.04, "eval_loss": 2.5747482776641846, "eval_runtime": 223.2929, "eval_samples_per_second": 895.685, "eval_steps_per_second": 55.98, "step": 248000 }, { "epoch": 1.08, "learning_rate": 8.93854748603352e-06, "loss": 2.7692, "step": 256000 }, { "epoch": 1.08, "eval_loss": 2.5781774520874023, "eval_runtime": 225.2908, "eval_samples_per_second": 887.742, "eval_steps_per_second": 55.484, "step": 256000 }, { "epoch": 1.11, "eval_loss": 2.58413028717041, "eval_runtime": 223.7883, "eval_samples_per_second": 893.702, "eval_steps_per_second": 55.856, "step": 264000 }, { "epoch": 1.15, "learning_rate": 8.871841907779539e-06, "loss": 2.7826, "step": 272000 }, { "epoch": 1.15, "eval_loss": 2.573080539703369, "eval_runtime": 222.4765, "eval_samples_per_second": 898.971, "eval_steps_per_second": 56.186, "step": 272000 }, { "epoch": 1.18, "eval_loss": 2.5836124420166016, "eval_runtime": 222.4727, "eval_samples_per_second": 898.987, "eval_steps_per_second": 56.187, "step": 280000 }, { "epoch": 1.21, "learning_rate": 8.805136329525557e-06, "loss": 2.7845, "step": 288000 }, { "epoch": 1.21, "eval_loss": 2.5840952396392822, "eval_runtime": 223.8774, "eval_samples_per_second": 893.346, "eval_steps_per_second": 55.834, "step": 288000 }, { "epoch": 1.25, "eval_loss": 2.5810587406158447, "eval_runtime": 224.0492, "eval_samples_per_second": 892.661, "eval_steps_per_second": 55.791, "step": 296000 }, { "epoch": 1.28, "learning_rate": 8.738430751271576e-06, "loss": 2.7909, "step": 304000 }, { "epoch": 1.28, "eval_loss": 2.592771530151367, "eval_runtime": 224.5453, "eval_samples_per_second": 890.689, "eval_steps_per_second": 55.668, "step": 304000 }, { "epoch": 1.31, "eval_loss": 2.597700834274292, "eval_runtime": 222.8877, "eval_samples_per_second": 897.313, "eval_steps_per_second": 56.082, "step": 312000 }, { "epoch": 1.35, "learning_rate": 8.671725173017595e-06, "loss": 2.7993, "step": 320000 }, { "epoch": 1.35, "eval_loss": 2.6025121212005615, "eval_runtime": 223.4062, "eval_samples_per_second": 895.23, "eval_steps_per_second": 55.952, "step": 320000 }, { "epoch": 1.38, "eval_loss": 2.6072068214416504, "eval_runtime": 222.1596, "eval_samples_per_second": 900.254, "eval_steps_per_second": 56.266, "step": 328000 }, { "epoch": 1.41, "learning_rate": 8.605019594763613e-06, "loss": 2.8107, "step": 336000 }, { "epoch": 1.41, "eval_loss": 2.6110291481018066, "eval_runtime": 221.489, "eval_samples_per_second": 902.979, "eval_steps_per_second": 56.436, "step": 336000 }, { "epoch": 1.45, "eval_loss": 2.6020007133483887, "eval_runtime": 221.5356, "eval_samples_per_second": 902.79, "eval_steps_per_second": 56.424, "step": 344000 }, { "epoch": 1.48, "learning_rate": 8.538314016509632e-06, "loss": 2.8102, "step": 352000 }, { "epoch": 1.48, "eval_loss": 2.606468677520752, "eval_runtime": 221.9142, "eval_samples_per_second": 901.249, "eval_steps_per_second": 56.328, "step": 352000 }, { "epoch": 1.52, "eval_loss": 2.620694637298584, "eval_runtime": 223.9159, "eval_samples_per_second": 893.193, "eval_steps_per_second": 55.825, "step": 360000 }, { "epoch": 1.55, "learning_rate": 8.471608438255649e-06, "loss": 2.8247, "step": 368000 }, { "epoch": 1.55, "eval_loss": 2.6191916465759277, "eval_runtime": 224.0761, "eval_samples_per_second": 892.554, "eval_steps_per_second": 55.785, "step": 368000 }, { "epoch": 1.58, "eval_loss": 2.6223857402801514, "eval_runtime": 224.2988, "eval_samples_per_second": 891.668, "eval_steps_per_second": 55.729, "step": 376000 }, { "epoch": 1.62, "learning_rate": 8.404902860001667e-06, "loss": 2.8271, "step": 384000 }, { "epoch": 1.62, "eval_loss": 2.6205480098724365, "eval_runtime": 224.6631, "eval_samples_per_second": 890.222, "eval_steps_per_second": 55.639, "step": 384000 }, { "epoch": 1.65, "eval_loss": 2.62916898727417, "eval_runtime": 221.5526, "eval_samples_per_second": 902.72, "eval_steps_per_second": 56.42, "step": 392000 }, { "epoch": 1.68, "learning_rate": 8.338197281747686e-06, "loss": 2.8415, "step": 400000 }, { "epoch": 1.68, "eval_loss": 2.6347849369049072, "eval_runtime": 222.2459, "eval_samples_per_second": 899.904, "eval_steps_per_second": 56.244, "step": 400000 }, { "epoch": 1.72, "eval_loss": 2.6518216133117676, "eval_runtime": 222.7257, "eval_samples_per_second": 897.966, "eval_steps_per_second": 56.123, "step": 408000 }, { "epoch": 1.75, "learning_rate": 8.271491703493705e-06, "loss": 2.842, "step": 416000 }, { "epoch": 1.75, "eval_loss": 2.6465137004852295, "eval_runtime": 221.6934, "eval_samples_per_second": 902.147, "eval_steps_per_second": 56.384, "step": 416000 }, { "epoch": 1.79, "eval_loss": 2.6434342861175537, "eval_runtime": 222.3028, "eval_samples_per_second": 899.674, "eval_steps_per_second": 56.23, "step": 424000 }, { "epoch": 1.82, "learning_rate": 8.204786125239725e-06, "loss": 2.8431, "step": 432000 }, { "epoch": 1.82, "eval_loss": 2.641423225402832, "eval_runtime": 222.8849, "eval_samples_per_second": 897.324, "eval_steps_per_second": 56.083, "step": 432000 }, { "epoch": 1.85, "eval_loss": 2.6531593799591064, "eval_runtime": 223.101, "eval_samples_per_second": 896.455, "eval_steps_per_second": 56.028, "step": 440000 }, { "epoch": 1.89, "learning_rate": 8.138080546985743e-06, "loss": 2.8599, "step": 448000 }, { "epoch": 1.89, "eval_loss": 2.6645281314849854, "eval_runtime": 222.7835, "eval_samples_per_second": 897.732, "eval_steps_per_second": 56.108, "step": 448000 }, { "epoch": 1.92, "eval_loss": 2.6651265621185303, "eval_runtime": 222.493, "eval_samples_per_second": 898.905, "eval_steps_per_second": 56.182, "step": 456000 }, { "epoch": 1.95, "learning_rate": 8.07137496873176e-06, "loss": 2.8567, "step": 464000 }, { "epoch": 1.95, "eval_loss": 2.6693992614746094, "eval_runtime": 221.9941, "eval_samples_per_second": 900.925, "eval_steps_per_second": 56.308, "step": 464000 }, { "epoch": 1.99, "eval_loss": 2.66097354888916, "eval_runtime": 222.2278, "eval_samples_per_second": 899.977, "eval_steps_per_second": 56.249, "step": 472000 }, { "epoch": 2.02, "learning_rate": 8.004669390477779e-06, "loss": 2.8682, "step": 480000 }, { "epoch": 2.02, "eval_loss": 2.687664747238159, "eval_runtime": 222.351, "eval_samples_per_second": 899.479, "eval_steps_per_second": 56.217, "step": 480000 }, { "epoch": 2.05, "eval_loss": 2.6723899841308594, "eval_runtime": 224.037, "eval_samples_per_second": 892.71, "eval_steps_per_second": 55.794, "step": 488000 }, { "epoch": 2.09, "learning_rate": 7.937963812223798e-06, "loss": 2.8693, "step": 496000 }, { "epoch": 2.09, "eval_loss": 2.683910608291626, "eval_runtime": 223.5884, "eval_samples_per_second": 894.501, "eval_steps_per_second": 55.906, "step": 496000 }, { "epoch": 2.12, "eval_loss": 2.692282199859619, "eval_runtime": 222.8054, "eval_samples_per_second": 897.644, "eval_steps_per_second": 56.103, "step": 504000 }, { "epoch": 2.16, "learning_rate": 7.871258233969816e-06, "loss": 2.8881, "step": 512000 }, { "epoch": 2.16, "eval_loss": 2.696408987045288, "eval_runtime": 223.0143, "eval_samples_per_second": 896.803, "eval_steps_per_second": 56.05, "step": 512000 }, { "epoch": 2.19, "eval_loss": 2.698155403137207, "eval_runtime": 223.8418, "eval_samples_per_second": 893.488, "eval_steps_per_second": 55.843, "step": 520000 }, { "epoch": 2.22, "learning_rate": 7.804552655715835e-06, "loss": 2.8874, "step": 528000 }, { "epoch": 2.22, "eval_loss": 2.6960911750793457, "eval_runtime": 224.8442, "eval_samples_per_second": 889.505, "eval_steps_per_second": 55.594, "step": 528000 }, { "epoch": 2.26, "eval_loss": 2.6883530616760254, "eval_runtime": 223.4198, "eval_samples_per_second": 895.176, "eval_steps_per_second": 55.948, "step": 536000 }, { "epoch": 2.29, "learning_rate": 7.737847077461853e-06, "loss": 2.8899, "step": 544000 }, { "epoch": 2.29, "eval_loss": 2.7055277824401855, "eval_runtime": 222.7527, "eval_samples_per_second": 897.857, "eval_steps_per_second": 56.116, "step": 544000 }, { "epoch": 2.32, "eval_loss": 2.6987791061401367, "eval_runtime": 226.517, "eval_samples_per_second": 882.936, "eval_steps_per_second": 55.183, "step": 552000 }, { "epoch": 2.36, "learning_rate": 7.671141499207872e-06, "loss": 2.8966, "step": 560000 }, { "epoch": 2.36, "eval_loss": 2.7103066444396973, "eval_runtime": 226.9023, "eval_samples_per_second": 881.437, "eval_steps_per_second": 55.09, "step": 560000 }, { "epoch": 2.39, "eval_loss": 2.709984302520752, "eval_runtime": 226.5608, "eval_samples_per_second": 882.765, "eval_steps_per_second": 55.173, "step": 568000 }, { "epoch": 2.43, "learning_rate": 7.604435920953891e-06, "loss": 2.9, "step": 576000 }, { "epoch": 2.43, "eval_loss": 2.716878652572632, "eval_runtime": 227.2343, "eval_samples_per_second": 880.149, "eval_steps_per_second": 55.009, "step": 576000 }, { "epoch": 2.46, "eval_loss": 2.718041181564331, "eval_runtime": 224.0002, "eval_samples_per_second": 892.856, "eval_steps_per_second": 55.804, "step": 584000 }, { "epoch": 2.49, "learning_rate": 7.537730342699909e-06, "loss": 2.9237, "step": 592000 }, { "epoch": 2.49, "eval_loss": 2.7270028591156006, "eval_runtime": 223.2886, "eval_samples_per_second": 895.702, "eval_steps_per_second": 55.981, "step": 592000 }, { "epoch": 2.53, "eval_loss": 2.726536989212036, "eval_runtime": 222.779, "eval_samples_per_second": 897.751, "eval_steps_per_second": 56.109, "step": 600000 }, { "epoch": 2.56, "learning_rate": 7.471024764445928e-06, "loss": 2.9236, "step": 608000 }, { "epoch": 2.56, "eval_loss": 2.732328176498413, "eval_runtime": 223.8713, "eval_samples_per_second": 893.37, "eval_steps_per_second": 55.836, "step": 608000 }, { "epoch": 2.59, "eval_loss": 2.73500394821167, "eval_runtime": 225.493, "eval_samples_per_second": 886.945, "eval_steps_per_second": 55.434, "step": 616000 }, { "epoch": 2.63, "learning_rate": 7.4043191861919465e-06, "loss": 2.9276, "step": 624000 }, { "epoch": 2.63, "eval_loss": 2.7333498001098633, "eval_runtime": 224.8806, "eval_samples_per_second": 889.361, "eval_steps_per_second": 55.585, "step": 624000 }, { "epoch": 2.66, "eval_loss": 2.734511613845825, "eval_runtime": 225.2251, "eval_samples_per_second": 888.001, "eval_steps_per_second": 55.5, "step": 632000 }, { "epoch": 2.69, "learning_rate": 7.337613607937964e-06, "loss": 2.9252, "step": 640000 }, { "epoch": 2.69, "eval_loss": 2.749704360961914, "eval_runtime": 225.1054, "eval_samples_per_second": 888.473, "eval_steps_per_second": 55.53, "step": 640000 }, { "epoch": 2.73, "eval_loss": 2.74284029006958, "eval_runtime": 224.0229, "eval_samples_per_second": 892.766, "eval_steps_per_second": 55.798, "step": 648000 }, { "epoch": 2.76, "learning_rate": 7.270908029683983e-06, "loss": 2.9364, "step": 656000 }, { "epoch": 2.76, "eval_loss": 2.7391881942749023, "eval_runtime": 224.6028, "eval_samples_per_second": 890.461, "eval_steps_per_second": 55.654, "step": 656000 }, { "epoch": 2.8, "eval_loss": 2.750549077987671, "eval_runtime": 223.6418, "eval_samples_per_second": 894.287, "eval_steps_per_second": 55.893, "step": 664000 }, { "epoch": 2.83, "learning_rate": 7.2042024514300015e-06, "loss": 2.9366, "step": 672000 }, { "epoch": 2.83, "eval_loss": 2.7392961978912354, "eval_runtime": 223.5241, "eval_samples_per_second": 894.758, "eval_steps_per_second": 55.922, "step": 672000 }, { "epoch": 2.86, "eval_loss": 2.7371537685394287, "eval_runtime": 223.9923, "eval_samples_per_second": 892.888, "eval_steps_per_second": 55.805, "step": 680000 }, { "epoch": 2.9, "learning_rate": 7.13749687317602e-06, "loss": 2.9437, "step": 688000 }, { "epoch": 2.9, "eval_loss": 2.7450687885284424, "eval_runtime": 223.0769, "eval_samples_per_second": 896.552, "eval_steps_per_second": 56.034, "step": 688000 }, { "epoch": 2.93, "eval_loss": 2.748831033706665, "eval_runtime": 222.9228, "eval_samples_per_second": 897.172, "eval_steps_per_second": 56.073, "step": 696000 }, { "epoch": 2.96, "learning_rate": 7.070791294922038e-06, "loss": 2.9483, "step": 704000 }, { "epoch": 2.96, "eval_loss": 2.7586183547973633, "eval_runtime": 223.3142, "eval_samples_per_second": 895.599, "eval_steps_per_second": 55.975, "step": 704000 }, { "epoch": 3.0, "eval_loss": 2.7612552642822266, "eval_runtime": 222.226, "eval_samples_per_second": 899.985, "eval_steps_per_second": 56.249, "step": 712000 }, { "epoch": 3.03, "learning_rate": 7.0040857166680564e-06, "loss": 2.9588, "step": 720000 }, { "epoch": 3.03, "eval_loss": 2.76190447807312, "eval_runtime": 222.4583, "eval_samples_per_second": 899.045, "eval_steps_per_second": 56.19, "step": 720000 }, { "epoch": 3.07, "eval_loss": 2.7680461406707764, "eval_runtime": 221.9857, "eval_samples_per_second": 900.959, "eval_steps_per_second": 56.31, "step": 728000 }, { "epoch": 3.1, "learning_rate": 6.937380138414076e-06, "loss": 2.9422, "step": 736000 }, { "epoch": 3.1, "eval_loss": 2.754580020904541, "eval_runtime": 221.9355, "eval_samples_per_second": 901.163, "eval_steps_per_second": 56.323, "step": 736000 }, { "epoch": 3.13, "eval_loss": 2.762883424758911, "eval_runtime": 221.6295, "eval_samples_per_second": 902.407, "eval_steps_per_second": 56.4, "step": 744000 }, { "epoch": 3.17, "learning_rate": 6.8706745601600945e-06, "loss": 2.965, "step": 752000 }, { "epoch": 3.17, "eval_loss": 2.759537696838379, "eval_runtime": 221.2415, "eval_samples_per_second": 903.99, "eval_steps_per_second": 56.499, "step": 752000 }, { "epoch": 3.2, "eval_loss": 2.776278018951416, "eval_runtime": 221.0108, "eval_samples_per_second": 904.933, "eval_steps_per_second": 56.558, "step": 760000 }, { "epoch": 3.23, "learning_rate": 6.803968981906113e-06, "loss": 2.959, "step": 768000 }, { "epoch": 3.23, "eval_loss": 2.7738993167877197, "eval_runtime": 221.3449, "eval_samples_per_second": 903.567, "eval_steps_per_second": 56.473, "step": 768000 }, { "epoch": 3.27, "eval_loss": 2.7838892936706543, "eval_runtime": 223.8916, "eval_samples_per_second": 893.29, "eval_steps_per_second": 55.831, "step": 776000 }, { "epoch": 3.3, "learning_rate": 6.737263403652131e-06, "loss": 2.9604, "step": 784000 }, { "epoch": 3.3, "eval_loss": 2.7680771350860596, "eval_runtime": 223.8457, "eval_samples_per_second": 893.473, "eval_steps_per_second": 55.842, "step": 784000 }, { "epoch": 3.33, "eval_loss": 2.7816002368927, "eval_runtime": 224.025, "eval_samples_per_second": 892.757, "eval_steps_per_second": 55.797, "step": 792000 }, { "epoch": 3.37, "learning_rate": 6.6705578253981495e-06, "loss": 2.9638, "step": 800000 }, { "epoch": 3.37, "eval_loss": 2.7812399864196777, "eval_runtime": 224.4231, "eval_samples_per_second": 891.174, "eval_steps_per_second": 55.698, "step": 800000 }, { "epoch": 3.4, "eval_loss": 2.7845778465270996, "eval_runtime": 223.1998, "eval_samples_per_second": 896.058, "eval_steps_per_second": 56.004, "step": 808000 }, { "epoch": 3.44, "learning_rate": 6.603852247144168e-06, "loss": 2.9704, "step": 816000 }, { "epoch": 3.44, "eval_loss": 2.7766318321228027, "eval_runtime": 222.3046, "eval_samples_per_second": 899.667, "eval_steps_per_second": 56.229, "step": 816000 }, { "epoch": 3.47, "eval_loss": 2.786909580230713, "eval_runtime": 221.8638, "eval_samples_per_second": 901.454, "eval_steps_per_second": 56.341, "step": 824000 }, { "epoch": 3.5, "learning_rate": 6.537146668890187e-06, "loss": 2.9684, "step": 832000 }, { "epoch": 3.5, "eval_loss": 2.7741353511810303, "eval_runtime": 222.0395, "eval_samples_per_second": 900.741, "eval_steps_per_second": 56.296, "step": 832000 }, { "epoch": 3.54, "eval_loss": 2.773477077484131, "eval_runtime": 225.8502, "eval_samples_per_second": 885.543, "eval_steps_per_second": 55.346, "step": 840000 }, { "epoch": 3.57, "learning_rate": 6.4704410906362044e-06, "loss": 2.9723, "step": 848000 }, { "epoch": 3.57, "eval_loss": 2.7700908184051514, "eval_runtime": 226.2294, "eval_samples_per_second": 884.058, "eval_steps_per_second": 55.254, "step": 848000 }, { "epoch": 3.6, "eval_loss": 2.7779886722564697, "eval_runtime": 224.2673, "eval_samples_per_second": 891.793, "eval_steps_per_second": 55.737, "step": 856000 }, { "epoch": 3.64, "learning_rate": 6.403735512382223e-06, "loss": 2.9734, "step": 864000 }, { "epoch": 3.64, "eval_loss": 2.7833447456359863, "eval_runtime": 223.9605, "eval_samples_per_second": 893.014, "eval_steps_per_second": 55.813, "step": 864000 }, { "epoch": 3.67, "eval_loss": 2.790961503982544, "eval_runtime": 223.0622, "eval_samples_per_second": 896.611, "eval_steps_per_second": 56.038, "step": 872000 }, { "epoch": 3.71, "learning_rate": 6.337029934128242e-06, "loss": 2.9806, "step": 880000 }, { "epoch": 3.71, "eval_loss": 2.794116258621216, "eval_runtime": 222.8246, "eval_samples_per_second": 897.567, "eval_steps_per_second": 56.098, "step": 880000 }, { "epoch": 3.74, "eval_loss": 2.7997074127197266, "eval_runtime": 223.2842, "eval_samples_per_second": 895.719, "eval_steps_per_second": 55.982, "step": 888000 }, { "epoch": 3.77, "learning_rate": 6.270324355874261e-06, "loss": 2.9808, "step": 896000 }, { "epoch": 3.77, "eval_loss": 2.802687406539917, "eval_runtime": 223.8034, "eval_samples_per_second": 893.641, "eval_steps_per_second": 55.853, "step": 896000 }, { "epoch": 3.81, "eval_loss": 2.797201156616211, "eval_runtime": 221.8286, "eval_samples_per_second": 901.597, "eval_steps_per_second": 56.35, "step": 904000 }, { "epoch": 3.84, "learning_rate": 6.20361877762028e-06, "loss": 3.0008, "step": 912000 }, { "epoch": 3.84, "eval_loss": 2.8025898933410645, "eval_runtime": 222.2117, "eval_samples_per_second": 900.042, "eval_steps_per_second": 56.253, "step": 912000 }, { "epoch": 3.87, "eval_loss": 2.7974584102630615, "eval_runtime": 222.1337, "eval_samples_per_second": 900.358, "eval_steps_per_second": 56.272, "step": 920000 }, { "epoch": 3.91, "learning_rate": 6.1369131993662975e-06, "loss": 2.9934, "step": 928000 }, { "epoch": 3.91, "eval_loss": 2.797086000442505, "eval_runtime": 221.5435, "eval_samples_per_second": 902.757, "eval_steps_per_second": 56.422, "step": 928000 }, { "epoch": 3.94, "eval_loss": 2.8030388355255127, "eval_runtime": 226.5332, "eval_samples_per_second": 882.873, "eval_steps_per_second": 55.18, "step": 936000 }, { "epoch": 3.97, "learning_rate": 6.070207621112316e-06, "loss": 2.9927, "step": 944000 }, { "epoch": 3.97, "eval_loss": 2.8082187175750732, "eval_runtime": 224.5948, "eval_samples_per_second": 890.492, "eval_steps_per_second": 55.656, "step": 944000 }, { "epoch": 4.01, "eval_loss": 2.820798397064209, "eval_runtime": 224.7429, "eval_samples_per_second": 889.906, "eval_steps_per_second": 55.619, "step": 952000 }, { "epoch": 4.04, "learning_rate": 6.003502042858335e-06, "loss": 3.0013, "step": 960000 }, { "epoch": 4.04, "eval_loss": 2.8129076957702637, "eval_runtime": 224.0828, "eval_samples_per_second": 892.527, "eval_steps_per_second": 55.783, "step": 960000 }, { "epoch": 4.08, "eval_loss": 2.823551893234253, "eval_runtime": 222.6379, "eval_samples_per_second": 898.32, "eval_steps_per_second": 56.145, "step": 968000 }, { "epoch": 4.11, "learning_rate": 5.936796464604353e-06, "loss": 2.9996, "step": 976000 }, { "epoch": 4.11, "eval_loss": 2.8225581645965576, "eval_runtime": 223.2923, "eval_samples_per_second": 895.687, "eval_steps_per_second": 55.98, "step": 976000 }, { "epoch": 4.14, "eval_loss": 2.827303409576416, "eval_runtime": 223.5156, "eval_samples_per_second": 894.792, "eval_steps_per_second": 55.925, "step": 984000 }, { "epoch": 4.18, "learning_rate": 5.870090886350371e-06, "loss": 3.0125, "step": 992000 }, { "epoch": 4.18, "eval_loss": 2.8161449432373047, "eval_runtime": 222.4898, "eval_samples_per_second": 898.917, "eval_steps_per_second": 56.182, "step": 992000 }, { "epoch": 4.21, "eval_loss": 2.8249175548553467, "eval_runtime": 224.1746, "eval_samples_per_second": 892.162, "eval_steps_per_second": 55.76, "step": 1000000 }, { "epoch": 4.24, "learning_rate": 5.80338530809639e-06, "loss": 3.0086, "step": 1008000 }, { "epoch": 4.24, "eval_loss": 2.832012414932251, "eval_runtime": 224.9255, "eval_samples_per_second": 889.184, "eval_steps_per_second": 55.574, "step": 1008000 }, { "epoch": 4.28, "eval_loss": 2.831321954727173, "eval_runtime": 225.1137, "eval_samples_per_second": 888.44, "eval_steps_per_second": 55.528, "step": 1016000 }, { "epoch": 4.31, "learning_rate": 5.736679729842408e-06, "loss": 3.0077, "step": 1024000 }, { "epoch": 4.31, "eval_loss": 2.83213210105896, "eval_runtime": 224.7924, "eval_samples_per_second": 889.71, "eval_steps_per_second": 55.607, "step": 1024000 }, { "epoch": 4.35, "eval_loss": 2.833178758621216, "eval_runtime": 225.4632, "eval_samples_per_second": 887.063, "eval_steps_per_second": 55.441, "step": 1032000 }, { "epoch": 4.38, "learning_rate": 5.669974151588427e-06, "loss": 3.0186, "step": 1040000 }, { "epoch": 4.38, "eval_loss": 2.8288471698760986, "eval_runtime": 225.9333, "eval_samples_per_second": 885.217, "eval_steps_per_second": 55.326, "step": 1040000 }, { "epoch": 4.41, "eval_loss": 2.839233160018921, "eval_runtime": 225.2383, "eval_samples_per_second": 887.949, "eval_steps_per_second": 55.497, "step": 1048000 }, { "epoch": 4.45, "learning_rate": 5.603268573334446e-06, "loss": 3.0311, "step": 1056000 }, { "epoch": 4.45, "eval_loss": 2.824310302734375, "eval_runtime": 223.8873, "eval_samples_per_second": 893.307, "eval_steps_per_second": 55.832, "step": 1056000 }, { "epoch": 4.48, "eval_loss": 2.852445602416992, "eval_runtime": 226.2506, "eval_samples_per_second": 883.976, "eval_steps_per_second": 55.248, "step": 1064000 }, { "epoch": 4.51, "learning_rate": 5.536562995080464e-06, "loss": 3.0199, "step": 1072000 }, { "epoch": 4.51, "eval_loss": 2.834698438644409, "eval_runtime": 224.6576, "eval_samples_per_second": 890.244, "eval_steps_per_second": 55.64, "step": 1072000 }, { "epoch": 4.55, "eval_loss": 2.8437862396240234, "eval_runtime": 224.6897, "eval_samples_per_second": 890.116, "eval_steps_per_second": 55.632, "step": 1080000 }, { "epoch": 4.58, "learning_rate": 5.469857416826483e-06, "loss": 3.0198, "step": 1088000 }, { "epoch": 4.58, "eval_loss": 2.8415181636810303, "eval_runtime": 223.938, "eval_samples_per_second": 893.104, "eval_steps_per_second": 55.819, "step": 1088000 }, { "epoch": 4.61, "eval_loss": 2.84600567817688, "eval_runtime": 222.512, "eval_samples_per_second": 898.828, "eval_steps_per_second": 56.177, "step": 1096000 }, { "epoch": 4.65, "learning_rate": 5.403151838572501e-06, "loss": 3.0279, "step": 1104000 }, { "epoch": 4.65, "eval_loss": 2.855103015899658, "eval_runtime": 224.3844, "eval_samples_per_second": 891.328, "eval_steps_per_second": 55.708, "step": 1104000 }, { "epoch": 4.68, "eval_loss": 2.8528149127960205, "eval_runtime": 222.2925, "eval_samples_per_second": 899.715, "eval_steps_per_second": 56.232, "step": 1112000 }, { "epoch": 4.72, "learning_rate": 5.33644626031852e-06, "loss": 3.0319, "step": 1120000 }, { "epoch": 4.72, "eval_loss": 2.8601133823394775, "eval_runtime": 225.9192, "eval_samples_per_second": 885.272, "eval_steps_per_second": 55.33, "step": 1120000 }, { "epoch": 4.75, "eval_loss": 2.8543853759765625, "eval_runtime": 228.4752, "eval_samples_per_second": 875.369, "eval_steps_per_second": 54.711, "step": 1128000 }, { "epoch": 4.78, "learning_rate": 5.269740682064538e-06, "loss": 3.0371, "step": 1136000 }, { "epoch": 4.78, "eval_loss": 2.855318069458008, "eval_runtime": 229.1947, "eval_samples_per_second": 872.621, "eval_steps_per_second": 54.539, "step": 1136000 }, { "epoch": 4.82, "eval_loss": 2.8596949577331543, "eval_runtime": 228.9063, "eval_samples_per_second": 873.72, "eval_steps_per_second": 54.607, "step": 1144000 }, { "epoch": 4.85, "learning_rate": 5.203035103810556e-06, "loss": 3.038, "step": 1152000 }, { "epoch": 4.85, "eval_loss": 2.865326166152954, "eval_runtime": 228.6229, "eval_samples_per_second": 874.803, "eval_steps_per_second": 54.675, "step": 1152000 }, { "epoch": 4.88, "eval_loss": 2.856044292449951, "eval_runtime": 224.6889, "eval_samples_per_second": 890.12, "eval_steps_per_second": 55.632, "step": 1160000 }, { "epoch": 4.92, "learning_rate": 5.136329525556575e-06, "loss": 3.0318, "step": 1168000 }, { "epoch": 4.92, "eval_loss": 2.860161542892456, "eval_runtime": 223.8596, "eval_samples_per_second": 893.417, "eval_steps_per_second": 55.839, "step": 1168000 }, { "epoch": 4.95, "eval_loss": 2.8483996391296387, "eval_runtime": 223.3074, "eval_samples_per_second": 895.627, "eval_steps_per_second": 55.977, "step": 1176000 }, { "epoch": 4.99, "learning_rate": 5.0696239473025935e-06, "loss": 3.0449, "step": 1184000 }, { "epoch": 4.99, "eval_loss": 2.861185073852539, "eval_runtime": 223.8763, "eval_samples_per_second": 893.35, "eval_steps_per_second": 55.834, "step": 1184000 }, { "epoch": 5.02, "eval_loss": 2.8597800731658936, "eval_runtime": 223.5703, "eval_samples_per_second": 894.573, "eval_steps_per_second": 55.911, "step": 1192000 }, { "epoch": 5.05, "learning_rate": 5.002918369048611e-06, "loss": 3.0384, "step": 1200000 }, { "epoch": 5.05, "eval_loss": 2.8580985069274902, "eval_runtime": 223.5118, "eval_samples_per_second": 894.807, "eval_steps_per_second": 55.925, "step": 1200000 }, { "epoch": 5.09, "eval_loss": 2.8481242656707764, "eval_runtime": 222.9723, "eval_samples_per_second": 896.972, "eval_steps_per_second": 56.061, "step": 1208000 }, { "epoch": 5.12, "learning_rate": 4.936212790794631e-06, "loss": 3.0243, "step": 1216000 }, { "epoch": 5.12, "eval_loss": 2.845810651779175, "eval_runtime": 223.4767, "eval_samples_per_second": 894.948, "eval_steps_per_second": 55.934, "step": 1216000 }, { "epoch": 5.15, "eval_loss": 2.849405527114868, "eval_runtime": 224.1558, "eval_samples_per_second": 892.237, "eval_steps_per_second": 55.765, "step": 1224000 }, { "epoch": 5.19, "learning_rate": 4.869507212540649e-06, "loss": 3.0345, "step": 1232000 }, { "epoch": 5.19, "eval_loss": 2.854433536529541, "eval_runtime": 223.6399, "eval_samples_per_second": 894.295, "eval_steps_per_second": 55.893, "step": 1232000 }, { "epoch": 5.22, "eval_loss": 2.8487536907196045, "eval_runtime": 223.5008, "eval_samples_per_second": 894.851, "eval_steps_per_second": 55.928, "step": 1240000 }, { "epoch": 5.25, "learning_rate": 4.802801634286667e-06, "loss": 3.0251, "step": 1248000 }, { "epoch": 5.25, "eval_loss": 2.845292568206787, "eval_runtime": 224.0031, "eval_samples_per_second": 892.845, "eval_steps_per_second": 55.803, "step": 1248000 }, { "epoch": 5.29, "eval_loss": 2.8464181423187256, "eval_runtime": 225.2034, "eval_samples_per_second": 888.086, "eval_steps_per_second": 55.505, "step": 1256000 }, { "epoch": 5.32, "learning_rate": 4.7360960560326865e-06, "loss": 3.0234, "step": 1264000 }, { "epoch": 5.32, "eval_loss": 2.848585367202759, "eval_runtime": 223.6745, "eval_samples_per_second": 894.156, "eval_steps_per_second": 55.885, "step": 1264000 }, { "epoch": 5.36, "eval_loss": 2.8435869216918945, "eval_runtime": 223.3913, "eval_samples_per_second": 895.29, "eval_steps_per_second": 55.956, "step": 1272000 }, { "epoch": 5.39, "learning_rate": 4.669390477778704e-06, "loss": 3.0205, "step": 1280000 }, { "epoch": 5.39, "eval_loss": 2.8476340770721436, "eval_runtime": 223.5929, "eval_samples_per_second": 894.483, "eval_steps_per_second": 55.905, "step": 1280000 }, { "epoch": 5.42, "eval_loss": 2.8326635360717773, "eval_runtime": 224.8548, "eval_samples_per_second": 889.463, "eval_steps_per_second": 55.591, "step": 1288000 }, { "epoch": 5.46, "learning_rate": 4.602684899524723e-06, "loss": 3.0228, "step": 1296000 }, { "epoch": 5.46, "eval_loss": 2.8452436923980713, "eval_runtime": 223.4053, "eval_samples_per_second": 895.234, "eval_steps_per_second": 55.952, "step": 1296000 }, { "epoch": 5.49, "eval_loss": 2.837240695953369, "eval_runtime": 225.2471, "eval_samples_per_second": 887.914, "eval_steps_per_second": 55.495, "step": 1304000 }, { "epoch": 5.52, "learning_rate": 4.5359793212707415e-06, "loss": 3.0063, "step": 1312000 }, { "epoch": 5.52, "eval_loss": 2.830629348754883, "eval_runtime": 224.2293, "eval_samples_per_second": 891.944, "eval_steps_per_second": 55.746, "step": 1312000 }, { "epoch": 5.56, "eval_loss": 2.841139078140259, "eval_runtime": 226.2486, "eval_samples_per_second": 883.983, "eval_steps_per_second": 55.249, "step": 1320000 }, { "epoch": 5.59, "learning_rate": 4.46927374301676e-06, "loss": 3.0068, "step": 1328000 }, { "epoch": 5.59, "eval_loss": 2.827270030975342, "eval_runtime": 226.3948, "eval_samples_per_second": 883.413, "eval_steps_per_second": 55.213, "step": 1328000 }, { "epoch": 5.63, "eval_loss": 2.834273338317871, "eval_runtime": 226.3338, "eval_samples_per_second": 883.651, "eval_steps_per_second": 55.228, "step": 1336000 }, { "epoch": 5.66, "learning_rate": 4.402568164762779e-06, "loss": 3.0109, "step": 1344000 }, { "epoch": 5.66, "eval_loss": 2.8328187465667725, "eval_runtime": 225.4917, "eval_samples_per_second": 886.95, "eval_steps_per_second": 55.434, "step": 1344000 }, { "epoch": 5.69, "eval_loss": 2.843144655227661, "eval_runtime": 224.1529, "eval_samples_per_second": 892.248, "eval_steps_per_second": 55.766, "step": 1352000 }, { "epoch": 5.73, "learning_rate": 4.335862586508797e-06, "loss": 3.0068, "step": 1360000 }, { "epoch": 5.73, "eval_loss": 2.8331680297851562, "eval_runtime": 224.2839, "eval_samples_per_second": 891.727, "eval_steps_per_second": 55.733, "step": 1360000 }, { "epoch": 5.76, "eval_loss": 2.827512502670288, "eval_runtime": 223.7519, "eval_samples_per_second": 893.847, "eval_steps_per_second": 55.865, "step": 1368000 }, { "epoch": 5.79, "learning_rate": 4.269157008254816e-06, "loss": 3.002, "step": 1376000 }, { "epoch": 5.79, "eval_loss": 2.8313817977905273, "eval_runtime": 224.094, "eval_samples_per_second": 892.483, "eval_steps_per_second": 55.78, "step": 1376000 }, { "epoch": 5.83, "eval_loss": 2.8324134349823, "eval_runtime": 226.0373, "eval_samples_per_second": 884.81, "eval_steps_per_second": 55.301, "step": 1384000 }, { "epoch": 5.86, "learning_rate": 4.202451430000834e-06, "loss": 3.0037, "step": 1392000 }, { "epoch": 5.86, "eval_loss": 2.839409351348877, "eval_runtime": 223.5509, "eval_samples_per_second": 894.651, "eval_steps_per_second": 55.916, "step": 1392000 }, { "epoch": 5.89, "eval_loss": 2.8337831497192383, "eval_runtime": 223.4898, "eval_samples_per_second": 894.895, "eval_steps_per_second": 55.931, "step": 1400000 }, { "epoch": 5.93, "learning_rate": 4.135745851746852e-06, "loss": 3.0086, "step": 1408000 }, { "epoch": 5.93, "eval_loss": 2.8447976112365723, "eval_runtime": 223.3032, "eval_samples_per_second": 895.643, "eval_steps_per_second": 55.978, "step": 1408000 }, { "epoch": 5.96, "eval_loss": 2.8326330184936523, "eval_runtime": 225.1553, "eval_samples_per_second": 888.276, "eval_steps_per_second": 55.517, "step": 1416000 }, { "epoch": 6.0, "learning_rate": 4.069040273492872e-06, "loss": 2.9977, "step": 1424000 }, { "epoch": 6.0, "eval_loss": 2.8310978412628174, "eval_runtime": 224.0959, "eval_samples_per_second": 892.475, "eval_steps_per_second": 55.78, "step": 1424000 }, { "epoch": 6.03, "eval_loss": 2.8410427570343018, "eval_runtime": 223.4994, "eval_samples_per_second": 894.857, "eval_steps_per_second": 55.929, "step": 1432000 }, { "epoch": 6.06, "learning_rate": 4.0023346952388895e-06, "loss": 2.9984, "step": 1440000 }, { "epoch": 6.06, "eval_loss": 2.8358559608459473, "eval_runtime": 223.5492, "eval_samples_per_second": 894.658, "eval_steps_per_second": 55.916, "step": 1440000 }, { "epoch": 6.1, "eval_loss": 2.839256763458252, "eval_runtime": 228.1513, "eval_samples_per_second": 876.611, "eval_steps_per_second": 54.788, "step": 1448000 }, { "epoch": 6.13, "learning_rate": 3.935629116984908e-06, "loss": 3.0095, "step": 1456000 }, { "epoch": 6.13, "eval_loss": 2.838825225830078, "eval_runtime": 226.4727, "eval_samples_per_second": 883.109, "eval_steps_per_second": 55.194, "step": 1456000 }, { "epoch": 6.16, "eval_loss": 2.844802141189575, "eval_runtime": 226.1587, "eval_samples_per_second": 884.335, "eval_steps_per_second": 55.271, "step": 1464000 }, { "epoch": 6.2, "learning_rate": 3.868923538730927e-06, "loss": 3.0051, "step": 1472000 }, { "epoch": 6.2, "eval_loss": 2.8472418785095215, "eval_runtime": 228.0091, "eval_samples_per_second": 877.158, "eval_steps_per_second": 54.822, "step": 1472000 }, { "epoch": 6.23, "eval_loss": 2.842092514038086, "eval_runtime": 224.1053, "eval_samples_per_second": 892.438, "eval_steps_per_second": 55.777, "step": 1480000 }, { "epoch": 6.27, "learning_rate": 3.8022179604769453e-06, "loss": 3.0142, "step": 1488000 }, { "epoch": 6.27, "eval_loss": 2.842365264892578, "eval_runtime": 223.8921, "eval_samples_per_second": 893.287, "eval_steps_per_second": 55.83, "step": 1488000 }, { "epoch": 6.3, "eval_loss": 2.847733974456787, "eval_runtime": 223.983, "eval_samples_per_second": 892.925, "eval_steps_per_second": 55.808, "step": 1496000 }, { "epoch": 6.33, "learning_rate": 3.735512382222964e-06, "loss": 3.0149, "step": 1504000 }, { "epoch": 6.33, "eval_loss": 2.842820405960083, "eval_runtime": 224.541, "eval_samples_per_second": 890.706, "eval_steps_per_second": 55.669, "step": 1504000 }, { "epoch": 6.37, "eval_loss": 2.8529434204101562, "eval_runtime": 229.172, "eval_samples_per_second": 872.707, "eval_steps_per_second": 54.544, "step": 1512000 }, { "epoch": 6.4, "learning_rate": 3.668806803968982e-06, "loss": 3.0147, "step": 1520000 }, { "epoch": 6.4, "eval_loss": 2.854137420654297, "eval_runtime": 228.0077, "eval_samples_per_second": 877.163, "eval_steps_per_second": 54.823, "step": 1520000 }, { "epoch": 6.43, "eval_loss": 2.8518521785736084, "eval_runtime": 227.8943, "eval_samples_per_second": 877.6, "eval_steps_per_second": 54.85, "step": 1528000 }, { "epoch": 6.47, "learning_rate": 3.6021012257150007e-06, "loss": 3.0205, "step": 1536000 }, { "epoch": 6.47, "eval_loss": 2.852667808532715, "eval_runtime": 227.5511, "eval_samples_per_second": 878.924, "eval_steps_per_second": 54.933, "step": 1536000 }, { "epoch": 6.5, "eval_loss": 2.8470675945281982, "eval_runtime": 223.3626, "eval_samples_per_second": 895.405, "eval_steps_per_second": 55.963, "step": 1544000 }, { "epoch": 6.53, "learning_rate": 3.535395647461019e-06, "loss": 3.029, "step": 1552000 }, { "epoch": 6.53, "eval_loss": 2.8583133220672607, "eval_runtime": 224.8105, "eval_samples_per_second": 889.638, "eval_steps_per_second": 55.602, "step": 1552000 }, { "epoch": 6.57, "eval_loss": 2.84967303276062, "eval_runtime": 223.9635, "eval_samples_per_second": 893.002, "eval_steps_per_second": 55.813, "step": 1560000 }, { "epoch": 6.6, "learning_rate": 3.468690069207038e-06, "loss": 3.024, "step": 1568000 }, { "epoch": 6.6, "eval_loss": 2.865325689315796, "eval_runtime": 223.4985, "eval_samples_per_second": 894.86, "eval_steps_per_second": 55.929, "step": 1568000 }, { "epoch": 6.64, "eval_loss": 2.855334997177124, "eval_runtime": 225.852, "eval_samples_per_second": 885.536, "eval_steps_per_second": 55.346, "step": 1576000 }, { "epoch": 6.67, "learning_rate": 3.4019844909530565e-06, "loss": 3.0371, "step": 1584000 }, { "epoch": 6.67, "eval_loss": 2.865299940109253, "eval_runtime": 224.5284, "eval_samples_per_second": 890.756, "eval_steps_per_second": 55.672, "step": 1584000 }, { "epoch": 6.7, "eval_loss": 2.860386848449707, "eval_runtime": 223.6209, "eval_samples_per_second": 894.371, "eval_steps_per_second": 55.898, "step": 1592000 }, { "epoch": 6.74, "learning_rate": 3.3352789126990747e-06, "loss": 3.0319, "step": 1600000 }, { "epoch": 6.74, "eval_loss": 2.862384080886841, "eval_runtime": 223.9592, "eval_samples_per_second": 893.02, "eval_steps_per_second": 55.814, "step": 1600000 }, { "epoch": 6.77, "eval_loss": 2.8657121658325195, "eval_runtime": 226.5681, "eval_samples_per_second": 882.737, "eval_steps_per_second": 55.171, "step": 1608000 }, { "epoch": 6.8, "learning_rate": 3.2685733344450933e-06, "loss": 3.0369, "step": 1616000 }, { "epoch": 6.8, "eval_loss": 2.861598491668701, "eval_runtime": 224.0599, "eval_samples_per_second": 892.618, "eval_steps_per_second": 55.789, "step": 1616000 }, { "epoch": 6.84, "eval_loss": 2.8666746616363525, "eval_runtime": 224.0122, "eval_samples_per_second": 892.808, "eval_steps_per_second": 55.801, "step": 1624000 }, { "epoch": 6.87, "learning_rate": 3.2018677561911115e-06, "loss": 3.0357, "step": 1632000 }, { "epoch": 6.87, "eval_loss": 2.86602783203125, "eval_runtime": 223.9311, "eval_samples_per_second": 893.132, "eval_steps_per_second": 55.821, "step": 1632000 }, { "epoch": 6.91, "eval_loss": 2.868190050125122, "eval_runtime": 224.6108, "eval_samples_per_second": 890.429, "eval_steps_per_second": 55.652, "step": 1640000 }, { "epoch": 6.94, "learning_rate": 3.1351621779371306e-06, "loss": 3.0342, "step": 1648000 }, { "epoch": 6.94, "eval_loss": 2.867553472518921, "eval_runtime": 224.9283, "eval_samples_per_second": 889.172, "eval_steps_per_second": 55.573, "step": 1648000 }, { "epoch": 6.97, "eval_loss": 2.881544589996338, "eval_runtime": 225.5949, "eval_samples_per_second": 886.545, "eval_steps_per_second": 55.409, "step": 1656000 }, { "epoch": 7.01, "learning_rate": 3.0684565996831487e-06, "loss": 3.0375, "step": 1664000 }, { "epoch": 7.01, "eval_loss": 2.8667211532592773, "eval_runtime": 224.8671, "eval_samples_per_second": 889.414, "eval_steps_per_second": 55.588, "step": 1664000 }, { "epoch": 7.04, "eval_loss": 2.8734593391418457, "eval_runtime": 226.9159, "eval_samples_per_second": 881.384, "eval_steps_per_second": 55.086, "step": 1672000 }, { "epoch": 7.07, "learning_rate": 3.0017510214291673e-06, "loss": 3.0419, "step": 1680000 }, { "epoch": 7.07, "eval_loss": 2.8788318634033203, "eval_runtime": 226.3899, "eval_samples_per_second": 883.432, "eval_steps_per_second": 55.214, "step": 1680000 }, { "epoch": 7.11, "eval_loss": 2.8766632080078125, "eval_runtime": 225.7385, "eval_samples_per_second": 885.981, "eval_steps_per_second": 55.374, "step": 1688000 }, { "epoch": 7.14, "learning_rate": 2.9350454431751855e-06, "loss": 3.0403, "step": 1696000 }, { "epoch": 7.14, "eval_loss": 2.8811895847320557, "eval_runtime": 225.9242, "eval_samples_per_second": 885.253, "eval_steps_per_second": 55.328, "step": 1696000 }, { "epoch": 7.17, "eval_loss": 2.879542827606201, "eval_runtime": 225.7881, "eval_samples_per_second": 885.786, "eval_steps_per_second": 55.362, "step": 1704000 }, { "epoch": 7.21, "learning_rate": 2.868339864921204e-06, "loss": 3.0482, "step": 1712000 }, { "epoch": 7.21, "eval_loss": 2.88046932220459, "eval_runtime": 225.2755, "eval_samples_per_second": 887.802, "eval_steps_per_second": 55.488, "step": 1712000 }, { "epoch": 7.24, "eval_loss": 2.8794021606445312, "eval_runtime": 226.8559, "eval_samples_per_second": 881.617, "eval_steps_per_second": 55.101, "step": 1720000 }, { "epoch": 7.28, "learning_rate": 2.801634286667223e-06, "loss": 3.0533, "step": 1728000 }, { "epoch": 7.28, "eval_loss": 2.8787782192230225, "eval_runtime": 225.0025, "eval_samples_per_second": 888.879, "eval_steps_per_second": 55.555, "step": 1728000 }, { "epoch": 7.31, "eval_loss": 2.884382724761963, "eval_runtime": 225.9472, "eval_samples_per_second": 885.163, "eval_steps_per_second": 55.323, "step": 1736000 }, { "epoch": 7.34, "learning_rate": 2.7349287084132413e-06, "loss": 3.0453, "step": 1744000 }, { "epoch": 7.34, "eval_loss": 2.870943069458008, "eval_runtime": 225.043, "eval_samples_per_second": 888.719, "eval_steps_per_second": 55.545, "step": 1744000 }, { "epoch": 7.38, "eval_loss": 2.8835349082946777, "eval_runtime": 225.0959, "eval_samples_per_second": 888.51, "eval_steps_per_second": 55.532, "step": 1752000 }, { "epoch": 7.41, "learning_rate": 2.66822313015926e-06, "loss": 3.0562, "step": 1760000 }, { "epoch": 7.41, "eval_loss": 2.8891103267669678, "eval_runtime": 224.5495, "eval_samples_per_second": 890.672, "eval_steps_per_second": 55.667, "step": 1760000 }, { "epoch": 7.44, "eval_loss": 2.8902649879455566, "eval_runtime": 225.1215, "eval_samples_per_second": 888.409, "eval_steps_per_second": 55.526, "step": 1768000 }, { "epoch": 7.48, "learning_rate": 2.601517551905278e-06, "loss": 3.0617, "step": 1776000 }, { "epoch": 7.48, "eval_loss": 2.884901762008667, "eval_runtime": 225.0729, "eval_samples_per_second": 888.601, "eval_steps_per_second": 55.538, "step": 1776000 }, { "epoch": 7.51, "eval_loss": 2.8766396045684814, "eval_runtime": 224.6011, "eval_samples_per_second": 890.468, "eval_steps_per_second": 55.654, "step": 1784000 }, { "epoch": 7.55, "learning_rate": 2.5348119736512967e-06, "loss": 3.0539, "step": 1792000 }, { "epoch": 7.55, "eval_loss": 2.8871917724609375, "eval_runtime": 224.4825, "eval_samples_per_second": 890.938, "eval_steps_per_second": 55.684, "step": 1792000 }, { "epoch": 7.58, "eval_loss": 2.898136615753174, "eval_runtime": 225.2799, "eval_samples_per_second": 887.784, "eval_steps_per_second": 55.487, "step": 1800000 }, { "epoch": 7.61, "learning_rate": 2.4681063953973154e-06, "loss": 3.0561, "step": 1808000 }, { "epoch": 7.61, "eval_loss": 2.886209487915039, "eval_runtime": 226.1006, "eval_samples_per_second": 884.562, "eval_steps_per_second": 55.285, "step": 1808000 }, { "epoch": 7.65, "eval_loss": 2.8940441608428955, "eval_runtime": 227.2765, "eval_samples_per_second": 879.986, "eval_steps_per_second": 54.999, "step": 1816000 }, { "epoch": 7.68, "learning_rate": 2.4014008171433335e-06, "loss": 3.0529, "step": 1824000 }, { "epoch": 7.68, "eval_loss": 2.887427568435669, "eval_runtime": 225.5383, "eval_samples_per_second": 886.767, "eval_steps_per_second": 55.423, "step": 1824000 }, { "epoch": 7.71, "eval_loss": 2.883918046951294, "eval_runtime": 226.0624, "eval_samples_per_second": 884.711, "eval_steps_per_second": 55.294, "step": 1832000 }, { "epoch": 7.75, "learning_rate": 2.334695238889352e-06, "loss": 3.0484, "step": 1840000 }, { "epoch": 7.75, "eval_loss": 2.883819103240967, "eval_runtime": 225.1615, "eval_samples_per_second": 888.251, "eval_steps_per_second": 55.516, "step": 1840000 }, { "epoch": 7.78, "eval_loss": 2.8856074810028076, "eval_runtime": 226.1802, "eval_samples_per_second": 884.251, "eval_steps_per_second": 55.266, "step": 1848000 }, { "epoch": 7.81, "learning_rate": 2.2679896606353707e-06, "loss": 3.0562, "step": 1856000 }, { "epoch": 7.81, "eval_loss": 2.8983583450317383, "eval_runtime": 227.809, "eval_samples_per_second": 877.929, "eval_steps_per_second": 54.871, "step": 1856000 }, { "epoch": 7.85, "eval_loss": 2.884408473968506, "eval_runtime": 228.3309, "eval_samples_per_second": 875.922, "eval_steps_per_second": 54.745, "step": 1864000 }, { "epoch": 7.88, "learning_rate": 2.2012840823813894e-06, "loss": 3.0578, "step": 1872000 }, { "epoch": 7.88, "eval_loss": 2.8873543739318848, "eval_runtime": 226.4275, "eval_samples_per_second": 883.285, "eval_steps_per_second": 55.205, "step": 1872000 }, { "epoch": 7.92, "eval_loss": 2.8886616230010986, "eval_runtime": 226.5836, "eval_samples_per_second": 882.676, "eval_steps_per_second": 55.167, "step": 1880000 }, { "epoch": 7.95, "learning_rate": 2.134578504127408e-06, "loss": 3.0553, "step": 1888000 }, { "epoch": 7.95, "eval_loss": 2.879803419113159, "eval_runtime": 228.4861, "eval_samples_per_second": 875.327, "eval_steps_per_second": 54.708, "step": 1888000 }, { "epoch": 7.98, "eval_loss": 2.8788740634918213, "eval_runtime": 228.0679, "eval_samples_per_second": 876.932, "eval_steps_per_second": 54.808, "step": 1896000 }, { "epoch": 8.02, "learning_rate": 2.067872925873426e-06, "loss": 3.0623, "step": 1904000 }, { "epoch": 8.02, "eval_loss": 2.8968303203582764, "eval_runtime": 229.4287, "eval_samples_per_second": 871.731, "eval_steps_per_second": 54.483, "step": 1904000 }, { "epoch": 8.05, "eval_loss": 2.8834211826324463, "eval_runtime": 227.0353, "eval_samples_per_second": 880.92, "eval_steps_per_second": 55.058, "step": 1912000 }, { "epoch": 8.08, "learning_rate": 2.0011673476194448e-06, "loss": 3.0652, "step": 1920000 }, { "epoch": 8.08, "eval_loss": 2.8902077674865723, "eval_runtime": 227.3091, "eval_samples_per_second": 879.859, "eval_steps_per_second": 54.991, "step": 1920000 }, { "epoch": 8.12, "eval_loss": 2.8821847438812256, "eval_runtime": 226.7104, "eval_samples_per_second": 882.183, "eval_steps_per_second": 55.136, "step": 1928000 }, { "epoch": 8.15, "learning_rate": 1.9344617693654634e-06, "loss": 3.0487, "step": 1936000 }, { "epoch": 8.15, "eval_loss": 2.8844268321990967, "eval_runtime": 227.086, "eval_samples_per_second": 880.724, "eval_steps_per_second": 55.045, "step": 1936000 }, { "epoch": 8.19, "eval_loss": 2.890925407409668, "eval_runtime": 227.5282, "eval_samples_per_second": 879.012, "eval_steps_per_second": 54.938, "step": 1944000 }, { "epoch": 8.22, "learning_rate": 1.867756191111482e-06, "loss": 3.0546, "step": 1952000 }, { "epoch": 8.22, "eval_loss": 2.8915293216705322, "eval_runtime": 226.2178, "eval_samples_per_second": 884.104, "eval_steps_per_second": 55.256, "step": 1952000 }, { "epoch": 8.25, "eval_loss": 2.8869712352752686, "eval_runtime": 234.1736, "eval_samples_per_second": 854.067, "eval_steps_per_second": 53.379, "step": 1960000 }, { "epoch": 8.29, "learning_rate": 1.8010506128575004e-06, "loss": 3.0524, "step": 1968000 }, { "epoch": 8.29, "eval_loss": 2.882768154144287, "eval_runtime": 232.3633, "eval_samples_per_second": 860.721, "eval_steps_per_second": 53.795, "step": 1968000 }, { "epoch": 8.32, "eval_loss": 2.878105401992798, "eval_runtime": 232.7641, "eval_samples_per_second": 859.239, "eval_steps_per_second": 53.702, "step": 1976000 }, { "epoch": 8.35, "learning_rate": 1.734345034603519e-06, "loss": 3.0491, "step": 1984000 }, { "epoch": 8.35, "eval_loss": 2.894814968109131, "eval_runtime": 235.0584, "eval_samples_per_second": 850.852, "eval_steps_per_second": 53.178, "step": 1984000 }, { "epoch": 8.39, "eval_loss": 2.8903963565826416, "eval_runtime": 227.9139, "eval_samples_per_second": 877.524, "eval_steps_per_second": 54.845, "step": 1992000 }, { "epoch": 8.42, "learning_rate": 1.6676394563495374e-06, "loss": 3.0534, "step": 2000000 }, { "epoch": 8.42, "eval_loss": 2.8839056491851807, "eval_runtime": 229.0151, "eval_samples_per_second": 873.305, "eval_steps_per_second": 54.582, "step": 2000000 }, { "epoch": 8.45, "eval_loss": 2.891777276992798, "eval_runtime": 227.2746, "eval_samples_per_second": 879.993, "eval_steps_per_second": 55.0, "step": 2008000 }, { "epoch": 8.49, "learning_rate": 1.6009338780955558e-06, "loss": 3.0547, "step": 2016000 }, { "epoch": 8.49, "eval_loss": 2.8738794326782227, "eval_runtime": 227.7519, "eval_samples_per_second": 878.149, "eval_steps_per_second": 54.884, "step": 2016000 }, { "epoch": 8.52, "eval_loss": 2.868389129638672, "eval_runtime": 228.3511, "eval_samples_per_second": 875.844, "eval_steps_per_second": 54.74, "step": 2024000 }, { "epoch": 8.56, "learning_rate": 1.5342282998415744e-06, "loss": 3.0544, "step": 2032000 }, { "epoch": 8.56, "eval_loss": 2.8739755153656006, "eval_runtime": 229.9365, "eval_samples_per_second": 869.806, "eval_steps_per_second": 54.363, "step": 2032000 }, { "epoch": 8.59, "eval_loss": 2.8784215450286865, "eval_runtime": 228.6391, "eval_samples_per_second": 874.741, "eval_steps_per_second": 54.671, "step": 2040000 }, { "epoch": 8.62, "learning_rate": 1.4675227215875928e-06, "loss": 3.0448, "step": 2048000 }, { "epoch": 8.62, "eval_loss": 2.8758127689361572, "eval_runtime": 229.161, "eval_samples_per_second": 872.749, "eval_steps_per_second": 54.547, "step": 2048000 }, { "epoch": 8.66, "eval_loss": 2.880105972290039, "eval_runtime": 230.876, "eval_samples_per_second": 866.266, "eval_steps_per_second": 54.142, "step": 2056000 }, { "epoch": 8.69, "learning_rate": 1.4008171433336116e-06, "loss": 3.0499, "step": 2064000 }, { "epoch": 8.69, "eval_loss": 2.8793435096740723, "eval_runtime": 229.0938, "eval_samples_per_second": 873.005, "eval_steps_per_second": 54.563, "step": 2064000 }, { "epoch": 8.72, "eval_loss": 2.8707237243652344, "eval_runtime": 228.2778, "eval_samples_per_second": 876.126, "eval_steps_per_second": 54.758, "step": 2072000 }, { "epoch": 8.76, "learning_rate": 1.33411156507963e-06, "loss": 3.0368, "step": 2080000 }, { "epoch": 8.76, "eval_loss": 2.872204065322876, "eval_runtime": 229.9264, "eval_samples_per_second": 869.844, "eval_steps_per_second": 54.365, "step": 2080000 }, { "epoch": 8.79, "eval_loss": 2.875173807144165, "eval_runtime": 229.2278, "eval_samples_per_second": 872.495, "eval_steps_per_second": 54.531, "step": 2088000 }, { "epoch": 8.83, "learning_rate": 1.2674059868256484e-06, "loss": 3.0548, "step": 2096000 }, { "epoch": 8.83, "eval_loss": 2.8879776000976562, "eval_runtime": 228.5322, "eval_samples_per_second": 875.15, "eval_steps_per_second": 54.697, "step": 2096000 }, { "epoch": 8.86, "eval_loss": 2.87813663482666, "eval_runtime": 228.0244, "eval_samples_per_second": 877.099, "eval_steps_per_second": 54.819, "step": 2104000 }, { "epoch": 8.89, "learning_rate": 1.2007004085716668e-06, "loss": 3.0457, "step": 2112000 }, { "epoch": 8.89, "eval_loss": 2.882504463195801, "eval_runtime": 228.7296, "eval_samples_per_second": 874.395, "eval_steps_per_second": 54.65, "step": 2112000 }, { "epoch": 8.93, "eval_loss": 2.8827481269836426, "eval_runtime": 231.2951, "eval_samples_per_second": 864.696, "eval_steps_per_second": 54.044, "step": 2120000 }, { "epoch": 8.96, "learning_rate": 1.1339948303176854e-06, "loss": 3.0377, "step": 2128000 }, { "epoch": 8.96, "eval_loss": 2.880984306335449, "eval_runtime": 231.2589, "eval_samples_per_second": 864.832, "eval_steps_per_second": 54.052, "step": 2128000 }, { "epoch": 8.99, "eval_loss": 2.872668981552124, "eval_runtime": 231.1403, "eval_samples_per_second": 865.275, "eval_steps_per_second": 54.08, "step": 2136000 }, { "epoch": 9.03, "learning_rate": 1.067289252063704e-06, "loss": 3.0341, "step": 2144000 }, { "epoch": 9.03, "eval_loss": 2.8749947547912598, "eval_runtime": 232.6534, "eval_samples_per_second": 859.648, "eval_steps_per_second": 53.728, "step": 2144000 }, { "epoch": 9.06, "eval_loss": 2.8637659549713135, "eval_runtime": 232.1804, "eval_samples_per_second": 861.399, "eval_steps_per_second": 53.837, "step": 2152000 }, { "epoch": 9.09, "learning_rate": 1.0005836738097224e-06, "loss": 3.0275, "step": 2160000 }, { "epoch": 9.09, "eval_loss": 2.8689756393432617, "eval_runtime": 233.8191, "eval_samples_per_second": 855.362, "eval_steps_per_second": 53.46, "step": 2160000 }, { "epoch": 9.13, "eval_loss": 2.866030693054199, "eval_runtime": 231.4154, "eval_samples_per_second": 864.247, "eval_steps_per_second": 54.015, "step": 2168000 }, { "epoch": 9.16, "learning_rate": 9.33878095555741e-07, "loss": 3.0413, "step": 2176000 }, { "epoch": 9.16, "eval_loss": 2.8578262329101562, "eval_runtime": 233.839, "eval_samples_per_second": 855.289, "eval_steps_per_second": 53.456, "step": 2176000 }, { "epoch": 9.2, "eval_loss": 2.8692455291748047, "eval_runtime": 236.8158, "eval_samples_per_second": 844.538, "eval_steps_per_second": 52.784, "step": 2184000 }, { "epoch": 9.23, "learning_rate": 8.671725173017595e-07, "loss": 3.0272, "step": 2192000 }, { "epoch": 9.23, "eval_loss": 2.8701837062835693, "eval_runtime": 235.4116, "eval_samples_per_second": 849.576, "eval_steps_per_second": 53.098, "step": 2192000 }, { "epoch": 9.26, "eval_loss": 2.870734453201294, "eval_runtime": 236.6161, "eval_samples_per_second": 845.251, "eval_steps_per_second": 52.828, "step": 2200000 }, { "epoch": 9.3, "learning_rate": 8.004669390477779e-07, "loss": 3.034, "step": 2208000 }, { "epoch": 9.3, "eval_loss": 2.866581916809082, "eval_runtime": 233.8393, "eval_samples_per_second": 855.288, "eval_steps_per_second": 53.456, "step": 2208000 }, { "epoch": 9.33, "eval_loss": 2.873441219329834, "eval_runtime": 229.9559, "eval_samples_per_second": 869.732, "eval_steps_per_second": 54.358, "step": 2216000 }, { "epoch": 9.36, "learning_rate": 7.337613607937964e-07, "loss": 3.0346, "step": 2224000 }, { "epoch": 9.36, "eval_loss": 2.8685038089752197, "eval_runtime": 229.1295, "eval_samples_per_second": 872.869, "eval_steps_per_second": 54.554, "step": 2224000 }, { "epoch": 9.4, "eval_loss": 2.867513656616211, "eval_runtime": 228.6384, "eval_samples_per_second": 874.744, "eval_steps_per_second": 54.671, "step": 2232000 }, { "epoch": 9.43, "learning_rate": 6.67055782539815e-07, "loss": 3.0234, "step": 2240000 }, { "epoch": 9.43, "eval_loss": 2.866205930709839, "eval_runtime": 228.8031, "eval_samples_per_second": 874.114, "eval_steps_per_second": 54.632, "step": 2240000 }, { "epoch": 9.47, "eval_loss": 2.8670058250427246, "eval_runtime": 230.0362, "eval_samples_per_second": 869.428, "eval_steps_per_second": 54.339, "step": 2248000 }, { "epoch": 9.5, "learning_rate": 6.003502042858334e-07, "loss": 3.0256, "step": 2256000 }, { "epoch": 9.5, "eval_loss": 2.8764402866363525, "eval_runtime": 228.8894, "eval_samples_per_second": 873.784, "eval_steps_per_second": 54.612, "step": 2256000 }, { "epoch": 9.53, "eval_loss": 2.8664441108703613, "eval_runtime": 228.7947, "eval_samples_per_second": 874.146, "eval_steps_per_second": 54.634, "step": 2264000 }, { "epoch": 9.57, "learning_rate": 5.33644626031852e-07, "loss": 3.0232, "step": 2272000 }, { "epoch": 9.57, "eval_loss": 2.8624887466430664, "eval_runtime": 229.0315, "eval_samples_per_second": 873.242, "eval_steps_per_second": 54.578, "step": 2272000 }, { "epoch": 9.6, "eval_loss": 2.8646833896636963, "eval_runtime": 229.8068, "eval_samples_per_second": 870.296, "eval_steps_per_second": 54.394, "step": 2280000 }, { "epoch": 9.63, "learning_rate": 4.669390477778705e-07, "loss": 3.0309, "step": 2288000 }, { "epoch": 9.63, "eval_loss": 2.8561413288116455, "eval_runtime": 229.8225, "eval_samples_per_second": 870.237, "eval_steps_per_second": 54.39, "step": 2288000 }, { "epoch": 9.67, "eval_loss": 2.8657453060150146, "eval_runtime": 230.8107, "eval_samples_per_second": 866.511, "eval_steps_per_second": 54.157, "step": 2296000 }, { "epoch": 9.7, "learning_rate": 4.0023346952388894e-07, "loss": 3.0254, "step": 2304000 }, { "epoch": 9.7, "eval_loss": 2.8666698932647705, "eval_runtime": 230.9054, "eval_samples_per_second": 866.156, "eval_steps_per_second": 54.135, "step": 2304000 }, { "epoch": 9.73, "eval_loss": 2.861841917037964, "eval_runtime": 233.616, "eval_samples_per_second": 856.106, "eval_steps_per_second": 53.507, "step": 2312000 }, { "epoch": 9.77, "learning_rate": 3.335278912699075e-07, "loss": 3.0198, "step": 2320000 }, { "epoch": 9.77, "eval_loss": 2.8649652004241943, "eval_runtime": 232.9095, "eval_samples_per_second": 858.702, "eval_steps_per_second": 53.669, "step": 2320000 }, { "epoch": 9.8, "eval_loss": 2.8629865646362305, "eval_runtime": 233.276, "eval_samples_per_second": 857.353, "eval_steps_per_second": 53.585, "step": 2328000 }, { "epoch": 9.84, "learning_rate": 2.66822313015926e-07, "loss": 3.0109, "step": 2336000 }, { "epoch": 9.84, "eval_loss": 2.8533174991607666, "eval_runtime": 232.7296, "eval_samples_per_second": 859.366, "eval_steps_per_second": 53.71, "step": 2336000 }, { "epoch": 9.87, "eval_loss": 2.8656232357025146, "eval_runtime": 230.1435, "eval_samples_per_second": 869.023, "eval_steps_per_second": 54.314, "step": 2344000 }, { "epoch": 9.9, "learning_rate": 2.0011673476194447e-07, "loss": 3.0316, "step": 2352000 }, { "epoch": 9.9, "eval_loss": 2.8606715202331543, "eval_runtime": 229.4357, "eval_samples_per_second": 871.704, "eval_steps_per_second": 54.482, "step": 2352000 }, { "epoch": 9.94, "eval_loss": 2.8572158813476562, "eval_runtime": 229.9275, "eval_samples_per_second": 869.839, "eval_steps_per_second": 54.365, "step": 2360000 }, { "epoch": 9.97, "learning_rate": 1.33411156507963e-07, "loss": 3.0225, "step": 2368000 }, { "epoch": 9.97, "eval_loss": 2.8617327213287354, "eval_runtime": 229.6061, "eval_samples_per_second": 871.057, "eval_steps_per_second": 54.441, "step": 2368000 }, { "epoch": 10.0, "eval_loss": 2.8604278564453125, "eval_runtime": 229.8413, "eval_samples_per_second": 870.166, "eval_steps_per_second": 54.385, "step": 2376000 }, { "epoch": 10.04, "learning_rate": 6.67055782539815e-08, "loss": 3.0132, "step": 2384000 }, { "epoch": 10.04, "eval_loss": 2.857710838317871, "eval_runtime": 229.9337, "eval_samples_per_second": 869.816, "eval_steps_per_second": 54.364, "step": 2384000 }, { "epoch": 10.07, "eval_loss": 2.8534834384918213, "eval_runtime": 230.8863, "eval_samples_per_second": 866.227, "eval_steps_per_second": 54.139, "step": 2392000 }, { "epoch": 10.11, "learning_rate": 0.0, "loss": 3.0202, "step": 2400000 }, { "epoch": 10.11, "eval_loss": 2.8565549850463867, "eval_runtime": 230.1736, "eval_samples_per_second": 868.909, "eval_steps_per_second": 54.307, "step": 2400000 }, { "epoch": 10.11, "step": 2400000, "total_flos": 7.688849395607474e+17, "train_loss": 2.9681437548828127, "train_runtime": 221059.9809, "train_samples_per_second": 173.709, "train_steps_per_second": 10.857 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 11, "save_steps": 32000, "total_flos": 7.688849395607474e+17, "trial_name": null, "trial_params": null }