{ "best_metric": 1.9708876609802246, "best_model_checkpoint": "./model_tweets_2020_Q4_full/checkpoint-2176000", "epoch": 5.052620941850649, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_loss": 2.272641897201538, "eval_runtime": 819.1442, "eval_samples_per_second": 488.315, "eval_steps_per_second": 30.52, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.0726666666666665e-07, "loss": 2.454, "step": 16000 }, { "epoch": 0.03, "eval_loss": 2.1965091228485107, "eval_runtime": 819.903, "eval_samples_per_second": 487.863, "eval_steps_per_second": 30.491, "step": 16000 }, { "epoch": 0.05, "eval_loss": 2.1550214290618896, "eval_runtime": 820.7683, "eval_samples_per_second": 487.348, "eval_steps_per_second": 30.459, "step": 24000 }, { "epoch": 0.07, "learning_rate": 4.0453333333333336e-07, "loss": 2.2713, "step": 32000 }, { "epoch": 0.07, "eval_loss": 2.1327128410339355, "eval_runtime": 819.7334, "eval_samples_per_second": 487.964, "eval_steps_per_second": 30.498, "step": 32000 }, { "epoch": 0.08, "eval_loss": 2.1083734035491943, "eval_runtime": 819.7208, "eval_samples_per_second": 487.971, "eval_steps_per_second": 30.498, "step": 40000 }, { "epoch": 0.1, "learning_rate": 4.018e-07, "loss": 2.2285, "step": 48000 }, { "epoch": 0.1, "eval_loss": 2.092008113861084, "eval_runtime": 820.7138, "eval_samples_per_second": 487.381, "eval_steps_per_second": 30.461, "step": 48000 }, { "epoch": 0.12, "eval_loss": 2.078997850418091, "eval_runtime": 819.7996, "eval_samples_per_second": 487.924, "eval_steps_per_second": 30.495, "step": 56000 }, { "epoch": 0.13, "learning_rate": 3.9906666666666667e-07, "loss": 2.2116, "step": 64000 }, { "epoch": 0.13, "eval_loss": 2.0765929222106934, "eval_runtime": 819.9644, "eval_samples_per_second": 487.826, "eval_steps_per_second": 30.489, "step": 64000 }, { "epoch": 0.15, "eval_loss": 2.062692880630493, "eval_runtime": 821.0867, "eval_samples_per_second": 487.159, "eval_steps_per_second": 30.447, "step": 72000 }, { "epoch": 0.17, "learning_rate": 3.963333333333333e-07, "loss": 2.1857, "step": 80000 }, { "epoch": 0.17, "eval_loss": 2.0599727630615234, "eval_runtime": 820.2874, "eval_samples_per_second": 487.634, "eval_steps_per_second": 30.477, "step": 80000 }, { "epoch": 0.19, "eval_loss": 2.0540931224823, "eval_runtime": 824.019, "eval_samples_per_second": 485.426, "eval_steps_per_second": 30.339, "step": 88000 }, { "epoch": 0.2, "learning_rate": 3.936e-07, "loss": 2.1716, "step": 96000 }, { "epoch": 0.2, "eval_loss": 2.0404279232025146, "eval_runtime": 821.4964, "eval_samples_per_second": 486.916, "eval_steps_per_second": 30.432, "step": 96000 }, { "epoch": 0.22, "eval_loss": 2.043778896331787, "eval_runtime": 820.9667, "eval_samples_per_second": 487.231, "eval_steps_per_second": 30.452, "step": 104000 }, { "epoch": 0.24, "learning_rate": 3.908666666666667e-07, "loss": 2.1594, "step": 112000 }, { "epoch": 0.24, "eval_loss": 2.034425973892212, "eval_runtime": 822.7203, "eval_samples_per_second": 486.192, "eval_steps_per_second": 30.387, "step": 112000 }, { "epoch": 0.25, "eval_loss": 2.042142391204834, "eval_runtime": 824.2695, "eval_samples_per_second": 485.278, "eval_steps_per_second": 30.33, "step": 120000 }, { "epoch": 0.27, "learning_rate": 3.8813333333333334e-07, "loss": 2.1584, "step": 128000 }, { "epoch": 0.27, "eval_loss": 2.0308806896209717, "eval_runtime": 823.4075, "eval_samples_per_second": 485.786, "eval_steps_per_second": 30.362, "step": 128000 }, { "epoch": 0.29, "eval_loss": 2.0292603969573975, "eval_runtime": 824.2104, "eval_samples_per_second": 485.313, "eval_steps_per_second": 30.332, "step": 136000 }, { "epoch": 0.3, "learning_rate": 3.854e-07, "loss": 2.1426, "step": 144000 }, { "epoch": 0.3, "eval_loss": 2.026230573654175, "eval_runtime": 816.4176, "eval_samples_per_second": 489.945, "eval_steps_per_second": 30.622, "step": 144000 }, { "epoch": 0.32, "eval_loss": 2.024317741394043, "eval_runtime": 819.0007, "eval_samples_per_second": 488.4, "eval_steps_per_second": 30.525, "step": 152000 }, { "epoch": 0.34, "learning_rate": 3.8266666666666665e-07, "loss": 2.1494, "step": 160000 }, { "epoch": 0.34, "eval_loss": 2.0235137939453125, "eval_runtime": 813.9399, "eval_samples_per_second": 491.437, "eval_steps_per_second": 30.715, "step": 160000 }, { "epoch": 0.35, "eval_loss": 2.0238037109375, "eval_runtime": 815.1748, "eval_samples_per_second": 490.692, "eval_steps_per_second": 30.668, "step": 168000 }, { "epoch": 0.37, "learning_rate": 3.799333333333333e-07, "loss": 2.1466, "step": 176000 }, { "epoch": 0.37, "eval_loss": 2.0157933235168457, "eval_runtime": 818.2297, "eval_samples_per_second": 488.86, "eval_steps_per_second": 30.554, "step": 176000 }, { "epoch": 0.39, "eval_loss": 2.019794225692749, "eval_runtime": 818.3454, "eval_samples_per_second": 488.791, "eval_steps_per_second": 30.549, "step": 184000 }, { "epoch": 0.4, "learning_rate": 3.772e-07, "loss": 2.1389, "step": 192000 }, { "epoch": 0.4, "eval_loss": 2.0097880363464355, "eval_runtime": 817.203, "eval_samples_per_second": 489.474, "eval_steps_per_second": 30.592, "step": 192000 }, { "epoch": 0.42, "eval_loss": 2.0161197185516357, "eval_runtime": 817.7452, "eval_samples_per_second": 489.15, "eval_steps_per_second": 30.572, "step": 200000 }, { "epoch": 0.44, "learning_rate": 3.7446666666666667e-07, "loss": 2.1312, "step": 208000 }, { "epoch": 0.44, "eval_loss": 2.0185155868530273, "eval_runtime": 813.8944, "eval_samples_per_second": 491.464, "eval_steps_per_second": 30.717, "step": 208000 }, { "epoch": 0.45, "eval_loss": 2.005812644958496, "eval_runtime": 814.4226, "eval_samples_per_second": 491.146, "eval_steps_per_second": 30.697, "step": 216000 }, { "epoch": 0.47, "learning_rate": 3.7173333333333333e-07, "loss": 2.1404, "step": 224000 }, { "epoch": 0.47, "eval_loss": 2.0142667293548584, "eval_runtime": 814.1474, "eval_samples_per_second": 491.312, "eval_steps_per_second": 30.707, "step": 224000 }, { "epoch": 0.49, "eval_loss": 2.003966808319092, "eval_runtime": 814.7205, "eval_samples_per_second": 490.966, "eval_steps_per_second": 30.685, "step": 232000 }, { "epoch": 0.51, "learning_rate": 3.69e-07, "loss": 2.1385, "step": 240000 }, { "epoch": 0.51, "eval_loss": 2.0060346126556396, "eval_runtime": 814.1316, "eval_samples_per_second": 491.321, "eval_steps_per_second": 30.708, "step": 240000 }, { "epoch": 0.52, "eval_loss": 2.009589433670044, "eval_runtime": 816.4726, "eval_samples_per_second": 489.912, "eval_steps_per_second": 30.62, "step": 248000 }, { "epoch": 0.54, "learning_rate": 3.6626666666666664e-07, "loss": 2.1356, "step": 256000 }, { "epoch": 0.54, "eval_loss": 2.0073416233062744, "eval_runtime": 814.7442, "eval_samples_per_second": 490.952, "eval_steps_per_second": 30.684, "step": 256000 }, { "epoch": 0.56, "eval_loss": 2.007857084274292, "eval_runtime": 815.1219, "eval_samples_per_second": 490.724, "eval_steps_per_second": 30.67, "step": 264000 }, { "epoch": 0.57, "learning_rate": 3.6353333333333335e-07, "loss": 2.1297, "step": 272000 }, { "epoch": 0.57, "eval_loss": 2.0067732334136963, "eval_runtime": 814.45, "eval_samples_per_second": 491.129, "eval_steps_per_second": 30.696, "step": 272000 }, { "epoch": 0.59, "eval_loss": 2.0081980228424072, "eval_runtime": 817.1552, "eval_samples_per_second": 489.503, "eval_steps_per_second": 30.594, "step": 280000 }, { "epoch": 0.61, "learning_rate": 3.608e-07, "loss": 2.1319, "step": 288000 }, { "epoch": 0.61, "eval_loss": 2.007030487060547, "eval_runtime": 814.4857, "eval_samples_per_second": 491.107, "eval_steps_per_second": 30.694, "step": 288000 }, { "epoch": 0.62, "eval_loss": 2.004092216491699, "eval_runtime": 814.8119, "eval_samples_per_second": 490.911, "eval_steps_per_second": 30.682, "step": 296000 }, { "epoch": 0.64, "learning_rate": 3.5806666666666666e-07, "loss": 2.1296, "step": 304000 }, { "epoch": 0.64, "eval_loss": 2.0037667751312256, "eval_runtime": 815.8799, "eval_samples_per_second": 490.268, "eval_steps_per_second": 30.642, "step": 304000 }, { "epoch": 0.66, "eval_loss": 2.0013270378112793, "eval_runtime": 814.3661, "eval_samples_per_second": 491.18, "eval_steps_per_second": 30.699, "step": 312000 }, { "epoch": 0.67, "learning_rate": 3.553333333333333e-07, "loss": 2.1289, "step": 320000 }, { "epoch": 0.67, "eval_loss": 2.0042684078216553, "eval_runtime": 816.3537, "eval_samples_per_second": 489.984, "eval_steps_per_second": 30.624, "step": 320000 }, { "epoch": 0.69, "eval_loss": 2.003631353378296, "eval_runtime": 814.0523, "eval_samples_per_second": 491.369, "eval_steps_per_second": 30.711, "step": 328000 }, { "epoch": 0.71, "learning_rate": 3.5259999999999997e-07, "loss": 2.127, "step": 336000 }, { "epoch": 0.71, "eval_loss": 2.0021262168884277, "eval_runtime": 817.5353, "eval_samples_per_second": 489.276, "eval_steps_per_second": 30.58, "step": 336000 }, { "epoch": 0.72, "eval_loss": 2.0051279067993164, "eval_runtime": 814.2596, "eval_samples_per_second": 491.244, "eval_steps_per_second": 30.703, "step": 344000 }, { "epoch": 0.74, "learning_rate": 3.498666666666667e-07, "loss": 2.1244, "step": 352000 }, { "epoch": 0.74, "eval_loss": 2.000556468963623, "eval_runtime": 817.3044, "eval_samples_per_second": 489.414, "eval_steps_per_second": 30.588, "step": 352000 }, { "epoch": 0.76, "eval_loss": 2.0007591247558594, "eval_runtime": 814.7923, "eval_samples_per_second": 490.923, "eval_steps_per_second": 30.683, "step": 360000 }, { "epoch": 0.77, "learning_rate": 3.4713333333333333e-07, "loss": 2.1271, "step": 368000 }, { "epoch": 0.77, "eval_loss": 2.0028321743011475, "eval_runtime": 815.1065, "eval_samples_per_second": 490.733, "eval_steps_per_second": 30.671, "step": 368000 }, { "epoch": 0.79, "eval_loss": 2.0009915828704834, "eval_runtime": 815.3397, "eval_samples_per_second": 490.593, "eval_steps_per_second": 30.662, "step": 376000 }, { "epoch": 0.81, "learning_rate": 3.444e-07, "loss": 2.1258, "step": 384000 }, { "epoch": 0.81, "eval_loss": 2.000774621963501, "eval_runtime": 815.13, "eval_samples_per_second": 490.719, "eval_steps_per_second": 30.67, "step": 384000 }, { "epoch": 0.83, "eval_loss": 1.9967281818389893, "eval_runtime": 815.8717, "eval_samples_per_second": 490.273, "eval_steps_per_second": 30.642, "step": 392000 }, { "epoch": 0.84, "learning_rate": 3.416666666666667e-07, "loss": 2.121, "step": 400000 }, { "epoch": 0.84, "eval_loss": 2.0009427070617676, "eval_runtime": 814.7366, "eval_samples_per_second": 490.956, "eval_steps_per_second": 30.685, "step": 400000 }, { "epoch": 0.86, "eval_loss": 1.9976019859313965, "eval_runtime": 817.657, "eval_samples_per_second": 489.203, "eval_steps_per_second": 30.575, "step": 408000 }, { "epoch": 0.88, "learning_rate": 3.3893333333333335e-07, "loss": 2.1288, "step": 416000 }, { "epoch": 0.88, "eval_loss": 1.999255895614624, "eval_runtime": 815.024, "eval_samples_per_second": 490.783, "eval_steps_per_second": 30.674, "step": 416000 }, { "epoch": 0.89, "eval_loss": 1.9967526197433472, "eval_runtime": 815.5084, "eval_samples_per_second": 490.492, "eval_steps_per_second": 30.656, "step": 424000 }, { "epoch": 0.91, "learning_rate": 3.3619999999999995e-07, "loss": 2.1358, "step": 432000 }, { "epoch": 0.91, "eval_loss": 1.9999067783355713, "eval_runtime": 815.485, "eval_samples_per_second": 490.506, "eval_steps_per_second": 30.657, "step": 432000 }, { "epoch": 0.93, "eval_loss": 1.9946609735488892, "eval_runtime": 815.2056, "eval_samples_per_second": 490.674, "eval_steps_per_second": 30.667, "step": 440000 }, { "epoch": 0.94, "learning_rate": 3.3346666666666666e-07, "loss": 2.1339, "step": 448000 }, { "epoch": 0.94, "eval_loss": 2.0010650157928467, "eval_runtime": 815.1338, "eval_samples_per_second": 490.717, "eval_steps_per_second": 30.67, "step": 448000 }, { "epoch": 0.96, "eval_loss": 2.002976894378662, "eval_runtime": 817.1665, "eval_samples_per_second": 489.496, "eval_steps_per_second": 30.594, "step": 456000 }, { "epoch": 0.98, "learning_rate": 3.307333333333333e-07, "loss": 2.1256, "step": 464000 }, { "epoch": 0.98, "eval_loss": 1.9871025085449219, "eval_runtime": 817.7892, "eval_samples_per_second": 489.124, "eval_steps_per_second": 30.57, "step": 464000 }, { "epoch": 0.99, "eval_loss": 1.9928302764892578, "eval_runtime": 816.7263, "eval_samples_per_second": 489.76, "eval_steps_per_second": 30.61, "step": 472000 }, { "epoch": 1.01, "learning_rate": 3.28e-07, "loss": 2.1304, "step": 480000 }, { "epoch": 1.01, "eval_loss": 1.9875584840774536, "eval_runtime": 815.8194, "eval_samples_per_second": 490.305, "eval_steps_per_second": 30.644, "step": 480000 }, { "epoch": 1.03, "eval_loss": 1.9955813884735107, "eval_runtime": 815.3139, "eval_samples_per_second": 490.609, "eval_steps_per_second": 30.663, "step": 488000 }, { "epoch": 1.04, "learning_rate": 3.252666666666667e-07, "loss": 2.1224, "step": 496000 }, { "epoch": 1.04, "eval_loss": 1.9979095458984375, "eval_runtime": 815.9893, "eval_samples_per_second": 490.203, "eval_steps_per_second": 30.638, "step": 496000 }, { "epoch": 1.06, "eval_loss": 1.9990485906600952, "eval_runtime": 821.2386, "eval_samples_per_second": 487.069, "eval_steps_per_second": 30.442, "step": 504000 }, { "epoch": 1.08, "learning_rate": 3.2253333333333334e-07, "loss": 2.1274, "step": 512000 }, { "epoch": 1.08, "eval_loss": 1.9969778060913086, "eval_runtime": 819.163, "eval_samples_per_second": 488.303, "eval_steps_per_second": 30.519, "step": 512000 }, { "epoch": 1.09, "eval_loss": 1.9943935871124268, "eval_runtime": 819.4186, "eval_samples_per_second": 488.151, "eval_steps_per_second": 30.509, "step": 520000 }, { "epoch": 1.11, "learning_rate": 3.198e-07, "loss": 2.1215, "step": 528000 }, { "epoch": 1.11, "eval_loss": 1.9923789501190186, "eval_runtime": 821.7035, "eval_samples_per_second": 486.794, "eval_steps_per_second": 30.425, "step": 528000 }, { "epoch": 1.13, "eval_loss": 1.9945484399795532, "eval_runtime": 822.2007, "eval_samples_per_second": 486.499, "eval_steps_per_second": 30.406, "step": 536000 }, { "epoch": 1.15, "learning_rate": 3.1706666666666665e-07, "loss": 2.1246, "step": 544000 }, { "epoch": 1.15, "eval_loss": 1.9916030168533325, "eval_runtime": 826.2878, "eval_samples_per_second": 484.093, "eval_steps_per_second": 30.256, "step": 544000 }, { "epoch": 1.16, "eval_loss": 1.992759108543396, "eval_runtime": 827.6532, "eval_samples_per_second": 483.294, "eval_steps_per_second": 30.206, "step": 552000 }, { "epoch": 1.18, "learning_rate": 3.1433333333333336e-07, "loss": 2.1305, "step": 560000 }, { "epoch": 1.18, "eval_loss": 1.992741584777832, "eval_runtime": 825.1549, "eval_samples_per_second": 484.757, "eval_steps_per_second": 30.297, "step": 560000 }, { "epoch": 1.2, "eval_loss": 1.9952551126480103, "eval_runtime": 820.1887, "eval_samples_per_second": 487.693, "eval_steps_per_second": 30.481, "step": 568000 }, { "epoch": 1.21, "learning_rate": 3.116e-07, "loss": 2.1204, "step": 576000 }, { "epoch": 1.21, "eval_loss": 1.9891945123672485, "eval_runtime": 820.3209, "eval_samples_per_second": 487.614, "eval_steps_per_second": 30.476, "step": 576000 }, { "epoch": 1.23, "eval_loss": 1.9910465478897095, "eval_runtime": 825.4725, "eval_samples_per_second": 484.571, "eval_steps_per_second": 30.286, "step": 584000 }, { "epoch": 1.25, "learning_rate": 3.0886666666666667e-07, "loss": 2.1171, "step": 592000 }, { "epoch": 1.25, "eval_loss": 1.9920326471328735, "eval_runtime": 826.3528, "eval_samples_per_second": 484.055, "eval_steps_per_second": 30.253, "step": 592000 }, { "epoch": 1.26, "eval_loss": 1.9932738542556763, "eval_runtime": 822.2092, "eval_samples_per_second": 486.494, "eval_steps_per_second": 30.406, "step": 600000 }, { "epoch": 1.28, "learning_rate": 3.061333333333333e-07, "loss": 2.121, "step": 608000 }, { "epoch": 1.28, "eval_loss": 1.9892170429229736, "eval_runtime": 820.3272, "eval_samples_per_second": 487.61, "eval_steps_per_second": 30.476, "step": 608000 }, { "epoch": 1.3, "eval_loss": 1.98870050907135, "eval_runtime": 825.2211, "eval_samples_per_second": 484.719, "eval_steps_per_second": 30.295, "step": 616000 }, { "epoch": 1.31, "learning_rate": 3.034e-07, "loss": 2.1238, "step": 624000 }, { "epoch": 1.31, "eval_loss": 1.9916698932647705, "eval_runtime": 822.0883, "eval_samples_per_second": 486.566, "eval_steps_per_second": 30.41, "step": 624000 }, { "epoch": 1.33, "eval_loss": 1.987068772315979, "eval_runtime": 823.3781, "eval_samples_per_second": 485.804, "eval_steps_per_second": 30.363, "step": 632000 }, { "epoch": 1.35, "learning_rate": 3.0066666666666663e-07, "loss": 2.1235, "step": 640000 }, { "epoch": 1.35, "eval_loss": 1.9852222204208374, "eval_runtime": 825.4219, "eval_samples_per_second": 484.601, "eval_steps_per_second": 30.288, "step": 640000 }, { "epoch": 1.36, "eval_loss": 1.9862326383590698, "eval_runtime": 822.3851, "eval_samples_per_second": 486.39, "eval_steps_per_second": 30.399, "step": 648000 }, { "epoch": 1.38, "learning_rate": 2.9793333333333334e-07, "loss": 2.1266, "step": 656000 }, { "epoch": 1.38, "eval_loss": 1.9866079092025757, "eval_runtime": 822.9693, "eval_samples_per_second": 486.045, "eval_steps_per_second": 30.378, "step": 656000 }, { "epoch": 1.4, "eval_loss": 1.9920697212219238, "eval_runtime": 825.8837, "eval_samples_per_second": 484.33, "eval_steps_per_second": 30.271, "step": 664000 }, { "epoch": 1.41, "learning_rate": 2.952e-07, "loss": 2.1236, "step": 672000 }, { "epoch": 1.41, "eval_loss": 1.9807332754135132, "eval_runtime": 822.3431, "eval_samples_per_second": 486.415, "eval_steps_per_second": 30.401, "step": 672000 }, { "epoch": 1.43, "eval_loss": 1.9858804941177368, "eval_runtime": 823.2436, "eval_samples_per_second": 485.883, "eval_steps_per_second": 30.368, "step": 680000 }, { "epoch": 1.45, "learning_rate": 2.9246666666666665e-07, "loss": 2.1278, "step": 688000 }, { "epoch": 1.45, "eval_loss": 1.992494821548462, "eval_runtime": 824.8223, "eval_samples_per_second": 484.953, "eval_steps_per_second": 30.31, "step": 688000 }, { "epoch": 1.47, "eval_loss": 1.985640287399292, "eval_runtime": 822.6808, "eval_samples_per_second": 486.215, "eval_steps_per_second": 30.388, "step": 696000 }, { "epoch": 1.48, "learning_rate": 2.897333333333333e-07, "loss": 2.1116, "step": 704000 }, { "epoch": 1.48, "eval_loss": 1.988182544708252, "eval_runtime": 821.5071, "eval_samples_per_second": 486.91, "eval_steps_per_second": 30.432, "step": 704000 }, { "epoch": 1.5, "eval_loss": 1.9869115352630615, "eval_runtime": 824.2209, "eval_samples_per_second": 485.307, "eval_steps_per_second": 30.332, "step": 712000 }, { "epoch": 1.52, "learning_rate": 2.8699999999999996e-07, "loss": 2.1128, "step": 720000 }, { "epoch": 1.52, "eval_loss": 1.9818884134292603, "eval_runtime": 822.9289, "eval_samples_per_second": 486.069, "eval_steps_per_second": 30.379, "step": 720000 }, { "epoch": 1.53, "eval_loss": 1.9836347103118896, "eval_runtime": 821.7422, "eval_samples_per_second": 486.771, "eval_steps_per_second": 30.423, "step": 728000 }, { "epoch": 1.55, "learning_rate": 2.8426666666666667e-07, "loss": 2.1208, "step": 736000 }, { "epoch": 1.55, "eval_loss": 1.9819457530975342, "eval_runtime": 826.1126, "eval_samples_per_second": 484.196, "eval_steps_per_second": 30.262, "step": 736000 }, { "epoch": 1.57, "eval_loss": 1.9867298603057861, "eval_runtime": 823.8302, "eval_samples_per_second": 485.537, "eval_steps_per_second": 30.346, "step": 744000 }, { "epoch": 1.58, "learning_rate": 2.815333333333333e-07, "loss": 2.1248, "step": 752000 }, { "epoch": 1.58, "eval_loss": 1.9892758131027222, "eval_runtime": 822.1535, "eval_samples_per_second": 486.527, "eval_steps_per_second": 30.408, "step": 752000 }, { "epoch": 1.6, "eval_loss": 1.9866580963134766, "eval_runtime": 825.9107, "eval_samples_per_second": 484.314, "eval_steps_per_second": 30.27, "step": 760000 }, { "epoch": 1.62, "learning_rate": 2.7880000000000003e-07, "loss": 2.1181, "step": 768000 }, { "epoch": 1.62, "eval_loss": 1.9825937747955322, "eval_runtime": 823.3532, "eval_samples_per_second": 485.818, "eval_steps_per_second": 30.364, "step": 768000 }, { "epoch": 1.63, "eval_loss": 1.9859682321548462, "eval_runtime": 823.0164, "eval_samples_per_second": 486.017, "eval_steps_per_second": 30.376, "step": 776000 }, { "epoch": 1.65, "learning_rate": 2.7606666666666664e-07, "loss": 2.117, "step": 784000 }, { "epoch": 1.65, "eval_loss": 1.9858005046844482, "eval_runtime": 825.8773, "eval_samples_per_second": 484.333, "eval_steps_per_second": 30.271, "step": 784000 }, { "epoch": 1.67, "eval_loss": 1.982791543006897, "eval_runtime": 824.8192, "eval_samples_per_second": 484.955, "eval_steps_per_second": 30.31, "step": 792000 }, { "epoch": 1.68, "learning_rate": 2.733333333333333e-07, "loss": 2.1203, "step": 800000 }, { "epoch": 1.68, "eval_loss": 1.984574317932129, "eval_runtime": 825.4935, "eval_samples_per_second": 484.559, "eval_steps_per_second": 30.285, "step": 800000 }, { "epoch": 1.7, "eval_loss": 1.987585425376892, "eval_runtime": 823.0072, "eval_samples_per_second": 486.022, "eval_steps_per_second": 30.376, "step": 808000 }, { "epoch": 1.72, "learning_rate": 2.706e-07, "loss": 2.1219, "step": 816000 }, { "epoch": 1.72, "eval_loss": 1.981570839881897, "eval_runtime": 825.8237, "eval_samples_per_second": 484.365, "eval_steps_per_second": 30.273, "step": 816000 }, { "epoch": 1.73, "eval_loss": 1.9856051206588745, "eval_runtime": 825.8139, "eval_samples_per_second": 484.371, "eval_steps_per_second": 30.273, "step": 824000 }, { "epoch": 1.75, "learning_rate": 2.6786666666666666e-07, "loss": 2.1226, "step": 832000 }, { "epoch": 1.75, "eval_loss": 1.9833366870880127, "eval_runtime": 825.582, "eval_samples_per_second": 484.507, "eval_steps_per_second": 30.282, "step": 832000 }, { "epoch": 1.77, "eval_loss": 1.9828506708145142, "eval_runtime": 824.0758, "eval_samples_per_second": 485.392, "eval_steps_per_second": 30.337, "step": 840000 }, { "epoch": 1.79, "learning_rate": 2.651333333333333e-07, "loss": 2.1218, "step": 848000 }, { "epoch": 1.79, "eval_loss": 1.987038254737854, "eval_runtime": 825.2588, "eval_samples_per_second": 484.696, "eval_steps_per_second": 30.294, "step": 848000 }, { "epoch": 1.8, "eval_loss": 1.9793510437011719, "eval_runtime": 824.2884, "eval_samples_per_second": 485.267, "eval_steps_per_second": 30.329, "step": 856000 }, { "epoch": 1.82, "learning_rate": 2.624e-07, "loss": 2.1207, "step": 864000 }, { "epoch": 1.82, "eval_loss": 1.9860295057296753, "eval_runtime": 825.028, "eval_samples_per_second": 484.832, "eval_steps_per_second": 30.302, "step": 864000 }, { "epoch": 1.84, "eval_loss": 1.9841169118881226, "eval_runtime": 828.6138, "eval_samples_per_second": 482.734, "eval_steps_per_second": 30.171, "step": 872000 }, { "epoch": 1.85, "learning_rate": 2.596666666666667e-07, "loss": 2.1173, "step": 880000 }, { "epoch": 1.85, "eval_loss": 1.9851088523864746, "eval_runtime": 824.3663, "eval_samples_per_second": 485.221, "eval_steps_per_second": 30.326, "step": 880000 }, { "epoch": 1.87, "eval_loss": 1.9808342456817627, "eval_runtime": 825.036, "eval_samples_per_second": 484.827, "eval_steps_per_second": 30.302, "step": 888000 }, { "epoch": 1.89, "learning_rate": 2.5693333333333333e-07, "loss": 2.118, "step": 896000 }, { "epoch": 1.89, "eval_loss": 1.9755035638809204, "eval_runtime": 825.8338, "eval_samples_per_second": 484.359, "eval_steps_per_second": 30.272, "step": 896000 }, { "epoch": 1.9, "eval_loss": 1.9814049005508423, "eval_runtime": 826.5539, "eval_samples_per_second": 483.937, "eval_steps_per_second": 30.246, "step": 904000 }, { "epoch": 1.92, "learning_rate": 2.542e-07, "loss": 2.1085, "step": 912000 }, { "epoch": 1.92, "eval_loss": 1.9834091663360596, "eval_runtime": 826.7203, "eval_samples_per_second": 483.84, "eval_steps_per_second": 30.24, "step": 912000 }, { "epoch": 1.94, "eval_loss": 1.9810645580291748, "eval_runtime": 827.1053, "eval_samples_per_second": 483.614, "eval_steps_per_second": 30.226, "step": 920000 }, { "epoch": 1.95, "learning_rate": 2.5146666666666664e-07, "loss": 2.1213, "step": 928000 }, { "epoch": 1.95, "eval_loss": 1.9837363958358765, "eval_runtime": 828.4747, "eval_samples_per_second": 482.815, "eval_steps_per_second": 30.176, "step": 928000 }, { "epoch": 1.97, "eval_loss": 1.9879738092422485, "eval_runtime": 826.1791, "eval_samples_per_second": 484.157, "eval_steps_per_second": 30.26, "step": 936000 }, { "epoch": 1.99, "learning_rate": 2.4873333333333335e-07, "loss": 2.1254, "step": 944000 }, { "epoch": 1.99, "eval_loss": 1.980188250541687, "eval_runtime": 828.7291, "eval_samples_per_second": 482.667, "eval_steps_per_second": 30.167, "step": 944000 }, { "epoch": 2.0, "eval_loss": 1.977104902267456, "eval_runtime": 829.3928, "eval_samples_per_second": 482.281, "eval_steps_per_second": 30.143, "step": 952000 }, { "epoch": 2.02, "learning_rate": 2.46e-07, "loss": 2.119, "step": 960000 }, { "epoch": 2.02, "eval_loss": 1.9837137460708618, "eval_runtime": 826.3836, "eval_samples_per_second": 484.037, "eval_steps_per_second": 30.252, "step": 960000 }, { "epoch": 2.04, "eval_loss": 1.9814780950546265, "eval_runtime": 825.3523, "eval_samples_per_second": 484.642, "eval_steps_per_second": 30.29, "step": 968000 }, { "epoch": 2.05, "learning_rate": 2.4326666666666666e-07, "loss": 2.1217, "step": 976000 }, { "epoch": 2.05, "eval_loss": 1.979064702987671, "eval_runtime": 827.4989, "eval_samples_per_second": 483.384, "eval_steps_per_second": 30.212, "step": 976000 }, { "epoch": 2.07, "eval_loss": 1.9857829809188843, "eval_runtime": 828.2012, "eval_samples_per_second": 482.974, "eval_steps_per_second": 30.186, "step": 984000 }, { "epoch": 2.09, "learning_rate": 2.405333333333333e-07, "loss": 2.1196, "step": 992000 }, { "epoch": 2.09, "eval_loss": 1.982262372970581, "eval_runtime": 829.1232, "eval_samples_per_second": 482.437, "eval_steps_per_second": 30.152, "step": 992000 }, { "epoch": 2.11, "eval_loss": 1.9849437475204468, "eval_runtime": 831.3718, "eval_samples_per_second": 481.132, "eval_steps_per_second": 30.071, "step": 1000000 }, { "epoch": 2.12, "learning_rate": 2.3779999999999997e-07, "loss": 2.1175, "step": 1008000 }, { "epoch": 2.12, "eval_loss": 1.9832239151000977, "eval_runtime": 832.5828, "eval_samples_per_second": 480.433, "eval_steps_per_second": 30.027, "step": 1008000 }, { "epoch": 2.14, "eval_loss": 1.979453444480896, "eval_runtime": 825.531, "eval_samples_per_second": 484.537, "eval_steps_per_second": 30.284, "step": 1016000 }, { "epoch": 2.16, "learning_rate": 2.3506666666666668e-07, "loss": 2.1165, "step": 1024000 }, { "epoch": 2.16, "eval_loss": 1.9848490953445435, "eval_runtime": 827.9899, "eval_samples_per_second": 483.098, "eval_steps_per_second": 30.194, "step": 1024000 }, { "epoch": 2.17, "eval_loss": 1.9812926054000854, "eval_runtime": 826.4076, "eval_samples_per_second": 484.023, "eval_steps_per_second": 30.251, "step": 1032000 }, { "epoch": 2.19, "learning_rate": 2.3233333333333334e-07, "loss": 2.1223, "step": 1040000 }, { "epoch": 2.19, "eval_loss": 1.9791409969329834, "eval_runtime": 829.059, "eval_samples_per_second": 482.475, "eval_steps_per_second": 30.155, "step": 1040000 }, { "epoch": 2.21, "eval_loss": 1.9790812730789185, "eval_runtime": 828.5751, "eval_samples_per_second": 482.756, "eval_steps_per_second": 30.172, "step": 1048000 }, { "epoch": 2.22, "learning_rate": 2.2960000000000002e-07, "loss": 2.1196, "step": 1056000 }, { "epoch": 2.22, "eval_loss": 1.9723681211471558, "eval_runtime": 826.7944, "eval_samples_per_second": 483.796, "eval_steps_per_second": 30.237, "step": 1056000 }, { "epoch": 2.24, "eval_loss": 1.977889895439148, "eval_runtime": 825.5759, "eval_samples_per_second": 484.51, "eval_steps_per_second": 30.282, "step": 1064000 }, { "epoch": 2.26, "learning_rate": 2.2686666666666667e-07, "loss": 2.1097, "step": 1072000 }, { "epoch": 2.26, "eval_loss": 1.9785293340682983, "eval_runtime": 828.0745, "eval_samples_per_second": 483.048, "eval_steps_per_second": 30.191, "step": 1072000 }, { "epoch": 2.27, "eval_loss": 1.9841850996017456, "eval_runtime": 829.17, "eval_samples_per_second": 482.41, "eval_steps_per_second": 30.151, "step": 1080000 }, { "epoch": 2.29, "learning_rate": 2.2413333333333333e-07, "loss": 2.109, "step": 1088000 }, { "epoch": 2.29, "eval_loss": 1.9791826009750366, "eval_runtime": 826.9889, "eval_samples_per_second": 483.682, "eval_steps_per_second": 30.23, "step": 1088000 }, { "epoch": 2.31, "eval_loss": 1.980352759361267, "eval_runtime": 827.506, "eval_samples_per_second": 483.38, "eval_steps_per_second": 30.211, "step": 1096000 }, { "epoch": 2.32, "learning_rate": 2.214e-07, "loss": 2.1175, "step": 1104000 }, { "epoch": 2.32, "eval_loss": 1.9810823202133179, "eval_runtime": 827.6073, "eval_samples_per_second": 483.321, "eval_steps_per_second": 30.208, "step": 1104000 }, { "epoch": 2.34, "eval_loss": 1.981313705444336, "eval_runtime": 828.6779, "eval_samples_per_second": 482.697, "eval_steps_per_second": 30.169, "step": 1112000 }, { "epoch": 2.36, "learning_rate": 2.1866666666666667e-07, "loss": 2.1239, "step": 1120000 }, { "epoch": 2.36, "eval_loss": 1.9742064476013184, "eval_runtime": 828.7308, "eval_samples_per_second": 482.666, "eval_steps_per_second": 30.167, "step": 1120000 }, { "epoch": 2.37, "eval_loss": 1.9758623838424683, "eval_runtime": 833.0794, "eval_samples_per_second": 480.146, "eval_steps_per_second": 30.009, "step": 1128000 }, { "epoch": 2.39, "learning_rate": 2.1593333333333332e-07, "loss": 2.1141, "step": 1136000 }, { "epoch": 2.39, "eval_loss": 1.983467936515808, "eval_runtime": 828.757, "eval_samples_per_second": 482.651, "eval_steps_per_second": 30.166, "step": 1136000 }, { "epoch": 2.41, "eval_loss": 1.9814101457595825, "eval_runtime": 827.9075, "eval_samples_per_second": 483.146, "eval_steps_per_second": 30.197, "step": 1144000 }, { "epoch": 2.43, "learning_rate": 2.132e-07, "loss": 2.1121, "step": 1152000 }, { "epoch": 2.43, "eval_loss": 1.9752756357192993, "eval_runtime": 831.4143, "eval_samples_per_second": 481.108, "eval_steps_per_second": 30.069, "step": 1152000 }, { "epoch": 2.44, "eval_loss": 1.979575753211975, "eval_runtime": 829.7763, "eval_samples_per_second": 482.058, "eval_steps_per_second": 30.129, "step": 1160000 }, { "epoch": 2.46, "learning_rate": 2.1046666666666666e-07, "loss": 2.1298, "step": 1168000 }, { "epoch": 2.46, "eval_loss": 1.971991777420044, "eval_runtime": 826.9256, "eval_samples_per_second": 483.719, "eval_steps_per_second": 30.232, "step": 1168000 }, { "epoch": 2.48, "eval_loss": 1.9822479486465454, "eval_runtime": 829.5634, "eval_samples_per_second": 482.181, "eval_steps_per_second": 30.136, "step": 1176000 }, { "epoch": 2.49, "learning_rate": 2.0773333333333334e-07, "loss": 2.1113, "step": 1184000 }, { "epoch": 2.49, "eval_loss": 1.977174997329712, "eval_runtime": 829.6202, "eval_samples_per_second": 482.148, "eval_steps_per_second": 30.134, "step": 1184000 }, { "epoch": 2.51, "eval_loss": 1.977932333946228, "eval_runtime": 828.9453, "eval_samples_per_second": 482.541, "eval_steps_per_second": 30.159, "step": 1192000 }, { "epoch": 2.53, "learning_rate": 2.05e-07, "loss": 2.1224, "step": 1200000 }, { "epoch": 2.53, "eval_loss": 1.975991129875183, "eval_runtime": 829.3484, "eval_samples_per_second": 482.306, "eval_steps_per_second": 30.144, "step": 1200000 }, { "epoch": 2.54, "eval_loss": 1.9823347330093384, "eval_runtime": 830.5547, "eval_samples_per_second": 481.606, "eval_steps_per_second": 30.1, "step": 1208000 }, { "epoch": 2.56, "learning_rate": 2.0226666666666668e-07, "loss": 2.1181, "step": 1216000 }, { "epoch": 2.56, "eval_loss": 1.9836112260818481, "eval_runtime": 828.4723, "eval_samples_per_second": 482.816, "eval_steps_per_second": 30.176, "step": 1216000 }, { "epoch": 2.58, "eval_loss": 1.975426435470581, "eval_runtime": 828.9128, "eval_samples_per_second": 482.56, "eval_steps_per_second": 30.16, "step": 1224000 }, { "epoch": 2.59, "learning_rate": 1.9953333333333333e-07, "loss": 2.1152, "step": 1232000 }, { "epoch": 2.59, "eval_loss": 1.9764443635940552, "eval_runtime": 829.9601, "eval_samples_per_second": 481.951, "eval_steps_per_second": 30.122, "step": 1232000 }, { "epoch": 2.61, "eval_loss": 1.9771231412887573, "eval_runtime": 828.3284, "eval_samples_per_second": 482.9, "eval_steps_per_second": 30.181, "step": 1240000 }, { "epoch": 2.63, "learning_rate": 1.968e-07, "loss": 2.1219, "step": 1248000 }, { "epoch": 2.63, "eval_loss": 1.977352499961853, "eval_runtime": 829.3614, "eval_samples_per_second": 482.299, "eval_steps_per_second": 30.144, "step": 1248000 }, { "epoch": 2.64, "eval_loss": 1.97895348072052, "eval_runtime": 829.5491, "eval_samples_per_second": 482.19, "eval_steps_per_second": 30.137, "step": 1256000 }, { "epoch": 2.66, "learning_rate": 1.9406666666666667e-07, "loss": 2.115, "step": 1264000 }, { "epoch": 2.66, "eval_loss": 1.9782521724700928, "eval_runtime": 830.2978, "eval_samples_per_second": 481.755, "eval_steps_per_second": 30.11, "step": 1264000 }, { "epoch": 2.68, "eval_loss": 1.9829373359680176, "eval_runtime": 828.0819, "eval_samples_per_second": 483.044, "eval_steps_per_second": 30.19, "step": 1272000 }, { "epoch": 2.69, "learning_rate": 1.9133333333333333e-07, "loss": 2.1241, "step": 1280000 }, { "epoch": 2.69, "eval_loss": 1.9843811988830566, "eval_runtime": 832.2319, "eval_samples_per_second": 480.635, "eval_steps_per_second": 30.04, "step": 1280000 }, { "epoch": 2.71, "eval_loss": 1.9780951738357544, "eval_runtime": 835.8498, "eval_samples_per_second": 478.555, "eval_steps_per_second": 29.91, "step": 1288000 }, { "epoch": 2.73, "learning_rate": 1.886e-07, "loss": 2.1157, "step": 1296000 }, { "epoch": 2.73, "eval_loss": 1.9807745218276978, "eval_runtime": 832.4111, "eval_samples_per_second": 480.532, "eval_steps_per_second": 30.033, "step": 1296000 }, { "epoch": 2.75, "eval_loss": 1.9820023775100708, "eval_runtime": 831.3183, "eval_samples_per_second": 481.163, "eval_steps_per_second": 30.073, "step": 1304000 }, { "epoch": 2.76, "learning_rate": 1.8586666666666666e-07, "loss": 2.1223, "step": 1312000 }, { "epoch": 2.76, "eval_loss": 1.9811793565750122, "eval_runtime": 833.2785, "eval_samples_per_second": 480.032, "eval_steps_per_second": 30.002, "step": 1312000 }, { "epoch": 2.78, "eval_loss": 1.9810597896575928, "eval_runtime": 829.9455, "eval_samples_per_second": 481.959, "eval_steps_per_second": 30.122, "step": 1320000 }, { "epoch": 2.8, "learning_rate": 1.8313333333333332e-07, "loss": 2.1178, "step": 1328000 }, { "epoch": 2.8, "eval_loss": 1.9779342412948608, "eval_runtime": 831.1251, "eval_samples_per_second": 481.275, "eval_steps_per_second": 30.08, "step": 1328000 }, { "epoch": 2.81, "eval_loss": 1.9760552644729614, "eval_runtime": 831.7829, "eval_samples_per_second": 480.895, "eval_steps_per_second": 30.056, "step": 1336000 }, { "epoch": 2.83, "learning_rate": 1.804e-07, "loss": 2.1204, "step": 1344000 }, { "epoch": 2.83, "eval_loss": 1.9772136211395264, "eval_runtime": 831.6787, "eval_samples_per_second": 480.955, "eval_steps_per_second": 30.06, "step": 1344000 }, { "epoch": 2.85, "eval_loss": 1.9723666906356812, "eval_runtime": 831.0166, "eval_samples_per_second": 481.338, "eval_steps_per_second": 30.084, "step": 1352000 }, { "epoch": 2.86, "learning_rate": 1.7766666666666666e-07, "loss": 2.1205, "step": 1360000 }, { "epoch": 2.86, "eval_loss": 1.977715015411377, "eval_runtime": 832.3396, "eval_samples_per_second": 480.573, "eval_steps_per_second": 30.036, "step": 1360000 }, { "epoch": 2.88, "eval_loss": 1.9721226692199707, "eval_runtime": 831.5525, "eval_samples_per_second": 481.028, "eval_steps_per_second": 30.064, "step": 1368000 }, { "epoch": 2.9, "learning_rate": 1.7493333333333334e-07, "loss": 2.1178, "step": 1376000 }, { "epoch": 2.9, "eval_loss": 1.9767777919769287, "eval_runtime": 830.881, "eval_samples_per_second": 481.417, "eval_steps_per_second": 30.089, "step": 1376000 }, { "epoch": 2.91, "eval_loss": 1.9802054166793823, "eval_runtime": 833.1595, "eval_samples_per_second": 480.1, "eval_steps_per_second": 30.006, "step": 1384000 }, { "epoch": 2.93, "learning_rate": 1.722e-07, "loss": 2.1205, "step": 1392000 }, { "epoch": 2.93, "eval_loss": 1.9758925437927246, "eval_runtime": 832.192, "eval_samples_per_second": 480.658, "eval_steps_per_second": 30.041, "step": 1392000 }, { "epoch": 2.95, "eval_loss": 1.981723427772522, "eval_runtime": 830.7334, "eval_samples_per_second": 481.502, "eval_steps_per_second": 30.094, "step": 1400000 }, { "epoch": 2.96, "learning_rate": 1.6946666666666668e-07, "loss": 2.1193, "step": 1408000 }, { "epoch": 2.96, "eval_loss": 1.9788410663604736, "eval_runtime": 833.0961, "eval_samples_per_second": 480.137, "eval_steps_per_second": 30.009, "step": 1408000 }, { "epoch": 2.98, "eval_loss": 1.9770302772521973, "eval_runtime": 834.1909, "eval_samples_per_second": 479.507, "eval_steps_per_second": 29.969, "step": 1416000 }, { "epoch": 3.0, "learning_rate": 1.6673333333333333e-07, "loss": 2.1195, "step": 1424000 }, { "epoch": 3.0, "eval_loss": 1.9768872261047363, "eval_runtime": 832.2279, "eval_samples_per_second": 480.638, "eval_steps_per_second": 30.04, "step": 1424000 }, { "epoch": 3.01, "eval_loss": 1.984775185585022, "eval_runtime": 833.218, "eval_samples_per_second": 480.066, "eval_steps_per_second": 30.004, "step": 1432000 }, { "epoch": 3.03, "learning_rate": 1.64e-07, "loss": 2.1137, "step": 1440000 }, { "epoch": 3.03, "eval_loss": 1.974687933921814, "eval_runtime": 835.9568, "eval_samples_per_second": 478.494, "eval_steps_per_second": 29.906, "step": 1440000 }, { "epoch": 3.05, "eval_loss": 1.9745441675186157, "eval_runtime": 833.8369, "eval_samples_per_second": 479.71, "eval_steps_per_second": 29.982, "step": 1448000 }, { "epoch": 3.07, "learning_rate": 1.6126666666666667e-07, "loss": 2.12, "step": 1456000 }, { "epoch": 3.07, "eval_loss": 1.9765040874481201, "eval_runtime": 832.1024, "eval_samples_per_second": 480.71, "eval_steps_per_second": 30.044, "step": 1456000 }, { "epoch": 3.08, "eval_loss": 1.9775580167770386, "eval_runtime": 833.3483, "eval_samples_per_second": 479.991, "eval_steps_per_second": 29.999, "step": 1464000 }, { "epoch": 3.1, "learning_rate": 1.5853333333333332e-07, "loss": 2.123, "step": 1472000 }, { "epoch": 3.1, "eval_loss": 1.9799487590789795, "eval_runtime": 833.8148, "eval_samples_per_second": 479.723, "eval_steps_per_second": 29.983, "step": 1472000 }, { "epoch": 3.12, "eval_loss": 1.97369384765625, "eval_runtime": 833.8386, "eval_samples_per_second": 479.709, "eval_steps_per_second": 29.982, "step": 1480000 }, { "epoch": 3.13, "learning_rate": 1.558e-07, "loss": 2.1213, "step": 1488000 }, { "epoch": 3.13, "eval_loss": 1.9774503707885742, "eval_runtime": 836.4963, "eval_samples_per_second": 478.185, "eval_steps_per_second": 29.887, "step": 1488000 }, { "epoch": 3.15, "eval_loss": 1.978287696838379, "eval_runtime": 834.0672, "eval_samples_per_second": 479.578, "eval_steps_per_second": 29.974, "step": 1496000 }, { "epoch": 3.17, "learning_rate": 1.5306666666666666e-07, "loss": 2.1267, "step": 1504000 }, { "epoch": 3.17, "eval_loss": 1.9806185960769653, "eval_runtime": 833.244, "eval_samples_per_second": 480.051, "eval_steps_per_second": 30.003, "step": 1504000 }, { "epoch": 3.18, "eval_loss": 1.976397156715393, "eval_runtime": 834.888, "eval_samples_per_second": 479.106, "eval_steps_per_second": 29.944, "step": 1512000 }, { "epoch": 3.2, "learning_rate": 1.5033333333333332e-07, "loss": 2.1186, "step": 1520000 }, { "epoch": 3.2, "eval_loss": 1.9694900512695312, "eval_runtime": 833.6452, "eval_samples_per_second": 479.82, "eval_steps_per_second": 29.989, "step": 1520000 }, { "epoch": 3.22, "eval_loss": 1.9782640933990479, "eval_runtime": 835.1469, "eval_samples_per_second": 478.958, "eval_steps_per_second": 29.935, "step": 1528000 }, { "epoch": 3.23, "learning_rate": 1.476e-07, "loss": 2.1189, "step": 1536000 }, { "epoch": 3.23, "eval_loss": 1.9773519039154053, "eval_runtime": 834.7344, "eval_samples_per_second": 479.194, "eval_steps_per_second": 29.95, "step": 1536000 }, { "epoch": 3.25, "eval_loss": 1.978074073791504, "eval_runtime": 834.7102, "eval_samples_per_second": 479.208, "eval_steps_per_second": 29.951, "step": 1544000 }, { "epoch": 3.27, "learning_rate": 1.4486666666666665e-07, "loss": 2.1249, "step": 1552000 }, { "epoch": 3.27, "eval_loss": 1.9740216732025146, "eval_runtime": 833.2736, "eval_samples_per_second": 480.034, "eval_steps_per_second": 30.002, "step": 1552000 }, { "epoch": 3.28, "eval_loss": 1.9787108898162842, "eval_runtime": 834.7811, "eval_samples_per_second": 479.168, "eval_steps_per_second": 29.948, "step": 1560000 }, { "epoch": 3.3, "learning_rate": 1.4213333333333334e-07, "loss": 2.1124, "step": 1568000 }, { "epoch": 3.3, "eval_loss": 1.979936122894287, "eval_runtime": 834.2073, "eval_samples_per_second": 479.497, "eval_steps_per_second": 29.969, "step": 1568000 }, { "epoch": 3.32, "eval_loss": 1.973401427268982, "eval_runtime": 834.8304, "eval_samples_per_second": 479.139, "eval_steps_per_second": 29.946, "step": 1576000 }, { "epoch": 3.33, "learning_rate": 1.3940000000000002e-07, "loss": 2.1166, "step": 1584000 }, { "epoch": 3.33, "eval_loss": 1.9763137102127075, "eval_runtime": 834.8064, "eval_samples_per_second": 479.153, "eval_steps_per_second": 29.947, "step": 1584000 }, { "epoch": 3.35, "eval_loss": 1.9797940254211426, "eval_runtime": 834.9213, "eval_samples_per_second": 479.087, "eval_steps_per_second": 29.943, "step": 1592000 }, { "epoch": 3.37, "learning_rate": 1.3666666666666665e-07, "loss": 2.1224, "step": 1600000 }, { "epoch": 3.37, "eval_loss": 1.974062442779541, "eval_runtime": 834.3195, "eval_samples_per_second": 479.433, "eval_steps_per_second": 29.965, "step": 1600000 }, { "epoch": 3.39, "eval_loss": 1.978087067604065, "eval_runtime": 836.4887, "eval_samples_per_second": 478.189, "eval_steps_per_second": 29.887, "step": 1608000 }, { "epoch": 3.4, "learning_rate": 1.3393333333333333e-07, "loss": 2.1178, "step": 1616000 }, { "epoch": 3.4, "eval_loss": 1.9705400466918945, "eval_runtime": 835.9379, "eval_samples_per_second": 478.504, "eval_steps_per_second": 29.907, "step": 1616000 }, { "epoch": 3.42, "eval_loss": 1.9753915071487427, "eval_runtime": 836.7877, "eval_samples_per_second": 478.018, "eval_steps_per_second": 29.876, "step": 1624000 }, { "epoch": 3.44, "learning_rate": 1.312e-07, "loss": 2.1096, "step": 1632000 }, { "epoch": 3.44, "eval_loss": 1.9738069772720337, "eval_runtime": 835.2752, "eval_samples_per_second": 478.884, "eval_steps_per_second": 29.93, "step": 1632000 }, { "epoch": 3.45, "eval_loss": 1.9784822463989258, "eval_runtime": 837.414, "eval_samples_per_second": 477.661, "eval_steps_per_second": 29.854, "step": 1640000 }, { "epoch": 3.47, "learning_rate": 1.2846666666666667e-07, "loss": 2.1157, "step": 1648000 }, { "epoch": 3.47, "eval_loss": 1.974530577659607, "eval_runtime": 835.2372, "eval_samples_per_second": 478.906, "eval_steps_per_second": 29.932, "step": 1648000 }, { "epoch": 3.49, "eval_loss": 1.9787706136703491, "eval_runtime": 837.243, "eval_samples_per_second": 477.759, "eval_steps_per_second": 29.86, "step": 1656000 }, { "epoch": 3.5, "learning_rate": 1.2573333333333332e-07, "loss": 2.1184, "step": 1664000 }, { "epoch": 3.5, "eval_loss": 1.9739311933517456, "eval_runtime": 835.5909, "eval_samples_per_second": 478.703, "eval_steps_per_second": 29.919, "step": 1664000 }, { "epoch": 3.52, "eval_loss": 1.9722193479537964, "eval_runtime": 835.111, "eval_samples_per_second": 478.978, "eval_steps_per_second": 29.936, "step": 1672000 }, { "epoch": 3.54, "learning_rate": 1.23e-07, "loss": 2.1288, "step": 1680000 }, { "epoch": 3.54, "eval_loss": 1.972862958908081, "eval_runtime": 836.8908, "eval_samples_per_second": 477.96, "eval_steps_per_second": 29.872, "step": 1680000 }, { "epoch": 3.55, "eval_loss": 1.9781924486160278, "eval_runtime": 837.6578, "eval_samples_per_second": 477.522, "eval_steps_per_second": 29.845, "step": 1688000 }, { "epoch": 3.57, "learning_rate": 1.2026666666666666e-07, "loss": 2.1247, "step": 1696000 }, { "epoch": 3.57, "eval_loss": 1.9771887063980103, "eval_runtime": 837.0667, "eval_samples_per_second": 477.859, "eval_steps_per_second": 29.866, "step": 1696000 }, { "epoch": 3.59, "eval_loss": 1.9759401082992554, "eval_runtime": 835.9338, "eval_samples_per_second": 478.507, "eval_steps_per_second": 29.907, "step": 1704000 }, { "epoch": 3.6, "learning_rate": 1.1753333333333334e-07, "loss": 2.1113, "step": 1712000 }, { "epoch": 3.6, "eval_loss": 1.9695793390274048, "eval_runtime": 839.8818, "eval_samples_per_second": 476.258, "eval_steps_per_second": 29.766, "step": 1712000 }, { "epoch": 3.62, "eval_loss": 1.9751383066177368, "eval_runtime": 837.8318, "eval_samples_per_second": 477.423, "eval_steps_per_second": 29.839, "step": 1720000 }, { "epoch": 3.64, "learning_rate": 1.1480000000000001e-07, "loss": 2.124, "step": 1728000 }, { "epoch": 3.64, "eval_loss": 1.9740879535675049, "eval_runtime": 840.4275, "eval_samples_per_second": 475.948, "eval_steps_per_second": 29.747, "step": 1728000 }, { "epoch": 3.65, "eval_loss": 1.9779579639434814, "eval_runtime": 840.4483, "eval_samples_per_second": 475.936, "eval_steps_per_second": 29.746, "step": 1736000 }, { "epoch": 3.67, "learning_rate": 1.1206666666666666e-07, "loss": 2.1242, "step": 1744000 }, { "epoch": 3.67, "eval_loss": 1.977715015411377, "eval_runtime": 840.8692, "eval_samples_per_second": 475.698, "eval_steps_per_second": 29.731, "step": 1744000 }, { "epoch": 3.69, "eval_loss": 1.9723716974258423, "eval_runtime": 843.4448, "eval_samples_per_second": 474.246, "eval_steps_per_second": 29.64, "step": 1752000 }, { "epoch": 3.71, "learning_rate": 1.0933333333333333e-07, "loss": 2.1263, "step": 1760000 }, { "epoch": 3.71, "eval_loss": 1.977464199066162, "eval_runtime": 837.9993, "eval_samples_per_second": 477.327, "eval_steps_per_second": 29.833, "step": 1760000 }, { "epoch": 3.72, "eval_loss": 1.9779309034347534, "eval_runtime": 839.9442, "eval_samples_per_second": 476.222, "eval_steps_per_second": 29.764, "step": 1768000 }, { "epoch": 3.74, "learning_rate": 1.066e-07, "loss": 2.1214, "step": 1776000 }, { "epoch": 3.74, "eval_loss": 1.9786217212677002, "eval_runtime": 840.5564, "eval_samples_per_second": 475.875, "eval_steps_per_second": 29.742, "step": 1776000 }, { "epoch": 3.76, "eval_loss": 1.9769537448883057, "eval_runtime": 838.7303, "eval_samples_per_second": 476.911, "eval_steps_per_second": 29.807, "step": 1784000 }, { "epoch": 3.77, "learning_rate": 1.0386666666666667e-07, "loss": 2.1209, "step": 1792000 }, { "epoch": 3.77, "eval_loss": 1.980912208557129, "eval_runtime": 840.2253, "eval_samples_per_second": 476.063, "eval_steps_per_second": 29.754, "step": 1792000 }, { "epoch": 3.79, "eval_loss": 1.975409746170044, "eval_runtime": 837.5398, "eval_samples_per_second": 477.589, "eval_steps_per_second": 29.849, "step": 1800000 }, { "epoch": 3.81, "learning_rate": 1.0113333333333334e-07, "loss": 2.1254, "step": 1808000 }, { "epoch": 3.81, "eval_loss": 1.9769078493118286, "eval_runtime": 837.9271, "eval_samples_per_second": 477.368, "eval_steps_per_second": 29.836, "step": 1808000 }, { "epoch": 3.82, "eval_loss": 1.9781721830368042, "eval_runtime": 838.6389, "eval_samples_per_second": 476.963, "eval_steps_per_second": 29.81, "step": 1816000 }, { "epoch": 3.84, "learning_rate": 9.84e-08, "loss": 2.1225, "step": 1824000 }, { "epoch": 3.84, "eval_loss": 1.9799091815948486, "eval_runtime": 840.1986, "eval_samples_per_second": 476.078, "eval_steps_per_second": 29.755, "step": 1824000 }, { "epoch": 3.86, "eval_loss": 1.9781138896942139, "eval_runtime": 838.6105, "eval_samples_per_second": 476.979, "eval_steps_per_second": 29.811, "step": 1832000 }, { "epoch": 3.87, "learning_rate": 9.566666666666666e-08, "loss": 2.1232, "step": 1840000 }, { "epoch": 3.87, "eval_loss": 1.9752310514450073, "eval_runtime": 838.5172, "eval_samples_per_second": 477.033, "eval_steps_per_second": 29.815, "step": 1840000 }, { "epoch": 3.89, "eval_loss": 1.9748742580413818, "eval_runtime": 838.6203, "eval_samples_per_second": 476.974, "eval_steps_per_second": 29.811, "step": 1848000 }, { "epoch": 3.91, "learning_rate": 9.293333333333333e-08, "loss": 2.1225, "step": 1856000 }, { "epoch": 3.91, "eval_loss": 1.9786981344223022, "eval_runtime": 837.8665, "eval_samples_per_second": 477.403, "eval_steps_per_second": 29.838, "step": 1856000 }, { "epoch": 3.92, "eval_loss": 1.9764823913574219, "eval_runtime": 839.4776, "eval_samples_per_second": 476.487, "eval_steps_per_second": 29.78, "step": 1864000 }, { "epoch": 3.94, "learning_rate": 9.02e-08, "loss": 2.118, "step": 1872000 }, { "epoch": 3.94, "eval_loss": 1.9764275550842285, "eval_runtime": 838.9687, "eval_samples_per_second": 476.776, "eval_steps_per_second": 29.798, "step": 1872000 }, { "epoch": 3.96, "eval_loss": 1.9767355918884277, "eval_runtime": 841.7386, "eval_samples_per_second": 475.207, "eval_steps_per_second": 29.7, "step": 1880000 }, { "epoch": 3.97, "learning_rate": 8.746666666666667e-08, "loss": 2.1158, "step": 1888000 }, { "epoch": 3.97, "eval_loss": 1.9775059223175049, "eval_runtime": 841.1451, "eval_samples_per_second": 475.542, "eval_steps_per_second": 29.721, "step": 1888000 }, { "epoch": 3.99, "eval_loss": 1.9774997234344482, "eval_runtime": 839.7152, "eval_samples_per_second": 476.352, "eval_steps_per_second": 29.772, "step": 1896000 }, { "epoch": 4.01, "learning_rate": 8.473333333333334e-08, "loss": 2.1257, "step": 1904000 }, { "epoch": 4.01, "eval_loss": 1.975029706954956, "eval_runtime": 838.6023, "eval_samples_per_second": 476.984, "eval_steps_per_second": 29.812, "step": 1904000 }, { "epoch": 4.03, "eval_loss": 1.9755631685256958, "eval_runtime": 839.8455, "eval_samples_per_second": 476.278, "eval_steps_per_second": 29.767, "step": 1912000 }, { "epoch": 4.04, "learning_rate": 8.2e-08, "loss": 2.122, "step": 1920000 }, { "epoch": 4.04, "eval_loss": 1.981199860572815, "eval_runtime": 841.9953, "eval_samples_per_second": 475.062, "eval_steps_per_second": 29.691, "step": 1920000 }, { "epoch": 4.06, "eval_loss": 1.975250482559204, "eval_runtime": 842.4298, "eval_samples_per_second": 474.817, "eval_steps_per_second": 29.676, "step": 1928000 }, { "epoch": 4.08, "learning_rate": 7.926666666666666e-08, "loss": 2.1223, "step": 1936000 }, { "epoch": 4.08, "eval_loss": 1.978774070739746, "eval_runtime": 840.2911, "eval_samples_per_second": 476.026, "eval_steps_per_second": 29.752, "step": 1936000 }, { "epoch": 4.09, "eval_loss": 1.9772640466690063, "eval_runtime": 842.2179, "eval_samples_per_second": 474.936, "eval_steps_per_second": 29.684, "step": 1944000 }, { "epoch": 4.11, "learning_rate": 7.653333333333333e-08, "loss": 2.1189, "step": 1952000 }, { "epoch": 4.11, "eval_loss": 1.979813575744629, "eval_runtime": 847.3463, "eval_samples_per_second": 472.062, "eval_steps_per_second": 29.504, "step": 1952000 }, { "epoch": 4.13, "eval_loss": 1.9723551273345947, "eval_runtime": 842.2019, "eval_samples_per_second": 474.946, "eval_steps_per_second": 29.684, "step": 1960000 }, { "epoch": 4.14, "learning_rate": 7.38e-08, "loss": 2.1182, "step": 1968000 }, { "epoch": 4.14, "eval_loss": 1.981265902519226, "eval_runtime": 842.6861, "eval_samples_per_second": 474.673, "eval_steps_per_second": 29.667, "step": 1968000 }, { "epoch": 4.16, "eval_loss": 1.9821213483810425, "eval_runtime": 841.3164, "eval_samples_per_second": 475.445, "eval_steps_per_second": 29.715, "step": 1976000 }, { "epoch": 4.18, "learning_rate": 7.106666666666667e-08, "loss": 2.118, "step": 1984000 }, { "epoch": 4.18, "eval_loss": 1.9766249656677246, "eval_runtime": 839.8385, "eval_samples_per_second": 476.282, "eval_steps_per_second": 29.768, "step": 1984000 }, { "epoch": 4.19, "eval_loss": 1.9779284000396729, "eval_runtime": 841.2514, "eval_samples_per_second": 475.482, "eval_steps_per_second": 29.718, "step": 1992000 }, { "epoch": 4.21, "learning_rate": 6.833333333333332e-08, "loss": 2.1188, "step": 2000000 }, { "epoch": 4.21, "eval_loss": 1.970005750656128, "eval_runtime": 843.0319, "eval_samples_per_second": 474.478, "eval_steps_per_second": 29.655, "step": 2000000 }, { "epoch": 4.23, "eval_loss": 1.9782909154891968, "eval_runtime": 843.121, "eval_samples_per_second": 474.428, "eval_steps_per_second": 29.652, "step": 2008000 }, { "epoch": 4.24, "learning_rate": 6.56e-08, "loss": 2.1207, "step": 2016000 }, { "epoch": 4.24, "eval_loss": 1.9743865728378296, "eval_runtime": 842.0723, "eval_samples_per_second": 475.019, "eval_steps_per_second": 29.689, "step": 2016000 }, { "epoch": 4.26, "eval_loss": 1.9799559116363525, "eval_runtime": 841.052, "eval_samples_per_second": 475.595, "eval_steps_per_second": 29.725, "step": 2024000 }, { "epoch": 4.28, "learning_rate": 6.286666666666666e-08, "loss": 2.1181, "step": 2032000 }, { "epoch": 4.28, "eval_loss": 1.9768708944320679, "eval_runtime": 840.6632, "eval_samples_per_second": 475.815, "eval_steps_per_second": 29.738, "step": 2032000 }, { "epoch": 4.29, "eval_loss": 1.9769923686981201, "eval_runtime": 844.3041, "eval_samples_per_second": 473.763, "eval_steps_per_second": 29.61, "step": 2040000 }, { "epoch": 4.31, "learning_rate": 6.013333333333333e-08, "loss": 2.1219, "step": 2048000 }, { "epoch": 4.31, "eval_loss": 1.9744817018508911, "eval_runtime": 841.6997, "eval_samples_per_second": 475.229, "eval_steps_per_second": 29.702, "step": 2048000 }, { "epoch": 4.33, "eval_loss": 1.9719310998916626, "eval_runtime": 843.9683, "eval_samples_per_second": 473.951, "eval_steps_per_second": 29.622, "step": 2056000 }, { "epoch": 4.35, "learning_rate": 5.7400000000000004e-08, "loss": 2.1264, "step": 2064000 }, { "epoch": 4.35, "eval_loss": 1.9765992164611816, "eval_runtime": 843.4371, "eval_samples_per_second": 474.25, "eval_steps_per_second": 29.641, "step": 2064000 }, { "epoch": 4.36, "eval_loss": 1.9752711057662964, "eval_runtime": 843.5201, "eval_samples_per_second": 474.203, "eval_steps_per_second": 29.638, "step": 2072000 }, { "epoch": 4.38, "learning_rate": 5.4666666666666666e-08, "loss": 2.1188, "step": 2080000 }, { "epoch": 4.38, "eval_loss": 1.9752399921417236, "eval_runtime": 842.1885, "eval_samples_per_second": 474.953, "eval_steps_per_second": 29.685, "step": 2080000 }, { "epoch": 4.4, "eval_loss": 1.97865891456604, "eval_runtime": 842.5772, "eval_samples_per_second": 474.734, "eval_steps_per_second": 29.671, "step": 2088000 }, { "epoch": 4.41, "learning_rate": 5.1933333333333335e-08, "loss": 2.1132, "step": 2096000 }, { "epoch": 4.41, "eval_loss": 1.9755377769470215, "eval_runtime": 843.2787, "eval_samples_per_second": 474.339, "eval_steps_per_second": 29.646, "step": 2096000 }, { "epoch": 4.43, "eval_loss": 1.982397198677063, "eval_runtime": 845.3821, "eval_samples_per_second": 473.159, "eval_steps_per_second": 29.572, "step": 2104000 }, { "epoch": 4.45, "learning_rate": 4.92e-08, "loss": 2.1284, "step": 2112000 }, { "epoch": 4.45, "eval_loss": 1.9787956476211548, "eval_runtime": 844.0963, "eval_samples_per_second": 473.88, "eval_steps_per_second": 29.617, "step": 2112000 }, { "epoch": 4.46, "eval_loss": 1.9768098592758179, "eval_runtime": 847.082, "eval_samples_per_second": 472.209, "eval_steps_per_second": 29.513, "step": 2120000 }, { "epoch": 4.48, "learning_rate": 4.6466666666666666e-08, "loss": 2.1197, "step": 2128000 }, { "epoch": 4.48, "eval_loss": 1.979955792427063, "eval_runtime": 844.8654, "eval_samples_per_second": 473.448, "eval_steps_per_second": 29.591, "step": 2128000 }, { "epoch": 4.5, "eval_loss": 1.9770689010620117, "eval_runtime": 847.4301, "eval_samples_per_second": 472.015, "eval_steps_per_second": 29.501, "step": 2136000 }, { "epoch": 4.51, "learning_rate": 4.3733333333333335e-08, "loss": 2.1208, "step": 2144000 }, { "epoch": 4.51, "eval_loss": 1.9768630266189575, "eval_runtime": 856.5896, "eval_samples_per_second": 466.968, "eval_steps_per_second": 29.186, "step": 2144000 }, { "epoch": 4.53, "eval_loss": 1.9769912958145142, "eval_runtime": 854.5273, "eval_samples_per_second": 468.095, "eval_steps_per_second": 29.256, "step": 2152000 }, { "epoch": 4.55, "learning_rate": 4.1e-08, "loss": 2.1174, "step": 2160000 }, { "epoch": 4.55, "eval_loss": 1.9727282524108887, "eval_runtime": 846.4451, "eval_samples_per_second": 472.565, "eval_steps_per_second": 29.535, "step": 2160000 }, { "epoch": 4.56, "eval_loss": 1.9771779775619507, "eval_runtime": 849.3361, "eval_samples_per_second": 470.956, "eval_steps_per_second": 29.435, "step": 2168000 }, { "epoch": 4.58, "learning_rate": 3.8266666666666665e-08, "loss": 2.1222, "step": 2176000 }, { "epoch": 4.58, "eval_loss": 1.9708876609802246, "eval_runtime": 848.9168, "eval_samples_per_second": 471.189, "eval_steps_per_second": 29.449, "step": 2176000 }, { "epoch": 4.6, "eval_loss": 1.9767884016036987, "eval_runtime": 847.0221, "eval_samples_per_second": 472.243, "eval_steps_per_second": 29.515, "step": 2184000 }, { "epoch": 4.61, "learning_rate": 3.5533333333333334e-08, "loss": 2.1306, "step": 2192000 }, { "epoch": 4.61, "eval_loss": 1.9720683097839355, "eval_runtime": 848.1207, "eval_samples_per_second": 471.631, "eval_steps_per_second": 29.477, "step": 2192000 }, { "epoch": 4.63, "eval_loss": 1.97303307056427, "eval_runtime": 847.196, "eval_samples_per_second": 472.146, "eval_steps_per_second": 29.509, "step": 2200000 }, { "epoch": 4.65, "learning_rate": 3.28e-08, "loss": 2.1224, "step": 2208000 }, { "epoch": 4.65, "eval_loss": 1.9755624532699585, "eval_runtime": 843.71, "eval_samples_per_second": 474.097, "eval_steps_per_second": 29.631, "step": 2208000 }, { "epoch": 4.67, "eval_loss": 1.9702823162078857, "eval_runtime": 846.8549, "eval_samples_per_second": 472.336, "eval_steps_per_second": 29.521, "step": 2216000 }, { "epoch": 4.68, "learning_rate": 3.0066666666666665e-08, "loss": 2.1317, "step": 2224000 }, { "epoch": 4.68, "eval_loss": 1.9788029193878174, "eval_runtime": 848.3806, "eval_samples_per_second": 471.487, "eval_steps_per_second": 29.468, "step": 2224000 }, { "epoch": 4.7, "eval_loss": 1.97597074508667, "eval_runtime": 844.36, "eval_samples_per_second": 473.732, "eval_steps_per_second": 29.608, "step": 2232000 }, { "epoch": 4.72, "learning_rate": 2.7333333333333333e-08, "loss": 2.1215, "step": 2240000 }, { "epoch": 4.72, "eval_loss": 1.9794765710830688, "eval_runtime": 846.527, "eval_samples_per_second": 472.519, "eval_steps_per_second": 29.532, "step": 2240000 }, { "epoch": 4.73, "eval_loss": 1.9747477769851685, "eval_runtime": 844.7038, "eval_samples_per_second": 473.539, "eval_steps_per_second": 29.596, "step": 2248000 }, { "epoch": 4.75, "learning_rate": 2.46e-08, "loss": 2.1093, "step": 2256000 }, { "epoch": 4.75, "eval_loss": 1.9798070192337036, "eval_runtime": 847.6533, "eval_samples_per_second": 471.891, "eval_steps_per_second": 29.493, "step": 2256000 }, { "epoch": 4.77, "eval_loss": 1.973381519317627, "eval_runtime": 845.2193, "eval_samples_per_second": 473.25, "eval_steps_per_second": 29.578, "step": 2264000 }, { "epoch": 4.78, "learning_rate": 2.1866666666666667e-08, "loss": 2.1168, "step": 2272000 }, { "epoch": 4.78, "eval_loss": 1.9769169092178345, "eval_runtime": 845.4278, "eval_samples_per_second": 473.133, "eval_steps_per_second": 29.571, "step": 2272000 }, { "epoch": 4.8, "eval_loss": 1.9767026901245117, "eval_runtime": 848.2554, "eval_samples_per_second": 471.556, "eval_steps_per_second": 29.472, "step": 2280000 }, { "epoch": 4.82, "learning_rate": 1.9133333333333333e-08, "loss": 2.1209, "step": 2288000 }, { "epoch": 4.82, "eval_loss": 1.9757850170135498, "eval_runtime": 846.8397, "eval_samples_per_second": 472.344, "eval_steps_per_second": 29.522, "step": 2288000 }, { "epoch": 4.83, "eval_loss": 1.9793760776519775, "eval_runtime": 847.2624, "eval_samples_per_second": 472.109, "eval_steps_per_second": 29.507, "step": 2296000 }, { "epoch": 4.85, "learning_rate": 1.64e-08, "loss": 2.1295, "step": 2304000 }, { "epoch": 4.85, "eval_loss": 1.9805538654327393, "eval_runtime": 845.0584, "eval_samples_per_second": 473.34, "eval_steps_per_second": 29.584, "step": 2304000 }, { "epoch": 4.87, "eval_loss": 1.9777742624282837, "eval_runtime": 847.5576, "eval_samples_per_second": 471.944, "eval_steps_per_second": 29.497, "step": 2312000 }, { "epoch": 4.88, "learning_rate": 1.3666666666666667e-08, "loss": 2.1095, "step": 2320000 }, { "epoch": 4.88, "eval_loss": 1.973996877670288, "eval_runtime": 848.2426, "eval_samples_per_second": 471.563, "eval_steps_per_second": 29.473, "step": 2320000 }, { "epoch": 4.9, "eval_loss": 1.9752956628799438, "eval_runtime": 844.2546, "eval_samples_per_second": 473.791, "eval_steps_per_second": 29.612, "step": 2328000 }, { "epoch": 4.92, "learning_rate": 1.0933333333333334e-08, "loss": 2.1141, "step": 2336000 }, { "epoch": 4.92, "eval_loss": 1.9768184423446655, "eval_runtime": 841.2335, "eval_samples_per_second": 475.492, "eval_steps_per_second": 29.718, "step": 2336000 }, { "epoch": 4.93, "eval_loss": 1.9744023084640503, "eval_runtime": 870.0719, "eval_samples_per_second": 459.732, "eval_steps_per_second": 28.733, "step": 2344000 }, { "epoch": 4.95, "learning_rate": 8.2e-09, "loss": 2.1208, "step": 2352000 }, { "epoch": 4.95, "eval_loss": 1.9785023927688599, "eval_runtime": 884.0259, "eval_samples_per_second": 452.475, "eval_steps_per_second": 28.28, "step": 2352000 }, { "epoch": 4.97, "eval_loss": 1.982853651046753, "eval_runtime": 879.2318, "eval_samples_per_second": 454.943, "eval_steps_per_second": 28.434, "step": 2360000 }, { "epoch": 4.99, "learning_rate": 5.466666666666667e-09, "loss": 2.1257, "step": 2368000 }, { "epoch": 4.99, "eval_loss": 1.9744242429733276, "eval_runtime": 880.509, "eval_samples_per_second": 454.283, "eval_steps_per_second": 28.393, "step": 2368000 }, { "epoch": 5.0, "eval_loss": 1.9828633069992065, "eval_runtime": 881.6057, "eval_samples_per_second": 453.718, "eval_steps_per_second": 28.357, "step": 2376000 }, { "epoch": 5.02, "learning_rate": 2.7333333333333334e-09, "loss": 2.1202, "step": 2384000 }, { "epoch": 5.02, "eval_loss": 1.972878336906433, "eval_runtime": 879.2288, "eval_samples_per_second": 454.944, "eval_steps_per_second": 28.434, "step": 2384000 }, { "epoch": 5.04, "eval_loss": 1.9803715944290161, "eval_runtime": 885.3821, "eval_samples_per_second": 451.782, "eval_steps_per_second": 28.236, "step": 2392000 }, { "epoch": 5.05, "learning_rate": 0.0, "loss": 2.1221, "step": 2400000 }, { "epoch": 5.05, "eval_loss": 1.9802659749984741, "eval_runtime": 882.1976, "eval_samples_per_second": 453.413, "eval_steps_per_second": 28.338, "step": 2400000 }, { "epoch": 5.05, "step": 2400000, "total_flos": 7.576530301685805e+17, "train_loss": 2.127910384114583, "train_runtime": 518149.7655, "train_samples_per_second": 74.11, "train_steps_per_second": 4.632 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 6, "save_steps": 32000, "total_flos": 7.576530301685805e+17, "trial_name": null, "trial_params": null }