|
{ |
|
"best_metric": 3.23770809173584, |
|
"best_model_checkpoint": "./model_tweets_2020_Q1_90/checkpoint-128000", |
|
"epoch": 49.171259398881354, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 3.4494731426239014, |
|
"eval_runtime": 46.3964, |
|
"eval_samples_per_second": 885.888, |
|
"eval_steps_per_second": 55.371, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.939131159843243e-06, |
|
"loss": 3.5684, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 3.416630744934082, |
|
"eval_runtime": 46.3565, |
|
"eval_samples_per_second": 886.65, |
|
"eval_steps_per_second": 55.418, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 3.3847219944000244, |
|
"eval_runtime": 47.2297, |
|
"eval_samples_per_second": 870.258, |
|
"eval_steps_per_second": 54.394, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.872425581589261e-06, |
|
"loss": 3.3755, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 3.3664660453796387, |
|
"eval_runtime": 47.0495, |
|
"eval_samples_per_second": 873.591, |
|
"eval_steps_per_second": 54.602, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 3.3654134273529053, |
|
"eval_runtime": 46.3932, |
|
"eval_samples_per_second": 885.949, |
|
"eval_steps_per_second": 55.374, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.80572000333528e-06, |
|
"loss": 3.3533, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 3.3654167652130127, |
|
"eval_runtime": 46.5322, |
|
"eval_samples_per_second": 883.301, |
|
"eval_steps_per_second": 55.209, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 3.332759380340576, |
|
"eval_runtime": 46.4492, |
|
"eval_samples_per_second": 884.88, |
|
"eval_steps_per_second": 55.308, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 9.739014425081299e-06, |
|
"loss": 3.3014, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 3.3209590911865234, |
|
"eval_runtime": 45.8973, |
|
"eval_samples_per_second": 895.521, |
|
"eval_steps_per_second": 55.973, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 3.3491690158843994, |
|
"eval_runtime": 46.3252, |
|
"eval_samples_per_second": 887.249, |
|
"eval_steps_per_second": 55.456, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.672308846827316e-06, |
|
"loss": 3.2888, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 3.3213465213775635, |
|
"eval_runtime": 45.915, |
|
"eval_samples_per_second": 895.177, |
|
"eval_steps_per_second": 55.951, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 3.2708065509796143, |
|
"eval_runtime": 45.9723, |
|
"eval_samples_per_second": 894.061, |
|
"eval_steps_per_second": 55.882, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.605603268573334e-06, |
|
"loss": 3.2609, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 3.290764808654785, |
|
"eval_runtime": 46.6916, |
|
"eval_samples_per_second": 880.287, |
|
"eval_steps_per_second": 55.021, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 3.2766778469085693, |
|
"eval_runtime": 45.6527, |
|
"eval_samples_per_second": 900.318, |
|
"eval_steps_per_second": 56.273, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 9.538897690319354e-06, |
|
"loss": 3.2159, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 3.259241819381714, |
|
"eval_runtime": 45.9077, |
|
"eval_samples_per_second": 895.319, |
|
"eval_steps_per_second": 55.96, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 3.2411258220672607, |
|
"eval_runtime": 46.8974, |
|
"eval_samples_per_second": 876.424, |
|
"eval_steps_per_second": 54.779, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.472192112065373e-06, |
|
"loss": 3.2167, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 3.23770809173584, |
|
"eval_runtime": 46.0285, |
|
"eval_samples_per_second": 892.969, |
|
"eval_steps_per_second": 55.813, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 3.2485291957855225, |
|
"eval_runtime": 46.313, |
|
"eval_samples_per_second": 887.483, |
|
"eval_steps_per_second": 55.47, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.405486533811392e-06, |
|
"loss": 3.199, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 3.2608513832092285, |
|
"eval_runtime": 46.3737, |
|
"eval_samples_per_second": 886.322, |
|
"eval_steps_per_second": 55.398, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 3.2552711963653564, |
|
"eval_runtime": 45.7073, |
|
"eval_samples_per_second": 899.243, |
|
"eval_steps_per_second": 56.205, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 9.338780955557409e-06, |
|
"loss": 3.1905, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 3.2425193786621094, |
|
"eval_runtime": 46.3189, |
|
"eval_samples_per_second": 887.37, |
|
"eval_steps_per_second": 55.463, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 3.2421696186065674, |
|
"eval_runtime": 46.3489, |
|
"eval_samples_per_second": 886.796, |
|
"eval_steps_per_second": 55.427, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.272075377303427e-06, |
|
"loss": 3.1822, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 3.262392997741699, |
|
"eval_runtime": 46.6763, |
|
"eval_samples_per_second": 880.575, |
|
"eval_steps_per_second": 55.039, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 3.2507119178771973, |
|
"eval_runtime": 46.8277, |
|
"eval_samples_per_second": 877.728, |
|
"eval_steps_per_second": 54.861, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 9.205369799049446e-06, |
|
"loss": 3.1852, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 3.2483315467834473, |
|
"eval_runtime": 45.7607, |
|
"eval_samples_per_second": 898.195, |
|
"eval_steps_per_second": 56.14, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 3.251424789428711, |
|
"eval_runtime": 46.3642, |
|
"eval_samples_per_second": 886.503, |
|
"eval_steps_per_second": 55.409, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 9.138664220795464e-06, |
|
"loss": 3.1767, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 3.242562770843506, |
|
"eval_runtime": 46.886, |
|
"eval_samples_per_second": 876.637, |
|
"eval_steps_per_second": 54.792, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 3.234778642654419, |
|
"eval_runtime": 46.4949, |
|
"eval_samples_per_second": 884.01, |
|
"eval_steps_per_second": 55.253, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 9.071958642541483e-06, |
|
"loss": 3.1767, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 3.2734625339508057, |
|
"eval_runtime": 46.0486, |
|
"eval_samples_per_second": 892.58, |
|
"eval_steps_per_second": 55.789, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 3.2471694946289062, |
|
"eval_runtime": 46.5054, |
|
"eval_samples_per_second": 883.811, |
|
"eval_steps_per_second": 55.241, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 9.005253064287502e-06, |
|
"loss": 3.1973, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 3.259644031524658, |
|
"eval_runtime": 45.8405, |
|
"eval_samples_per_second": 896.631, |
|
"eval_steps_per_second": 56.042, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 3.2605602741241455, |
|
"eval_runtime": 45.4485, |
|
"eval_samples_per_second": 904.365, |
|
"eval_steps_per_second": 56.526, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 8.93854748603352e-06, |
|
"loss": 3.1781, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 3.281527519226074, |
|
"eval_runtime": 46.3336, |
|
"eval_samples_per_second": 887.089, |
|
"eval_steps_per_second": 55.446, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 3.273421049118042, |
|
"eval_runtime": 45.4558, |
|
"eval_samples_per_second": 904.218, |
|
"eval_steps_per_second": 56.516, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 8.871841907779539e-06, |
|
"loss": 3.1803, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 3.2739477157592773, |
|
"eval_runtime": 45.6455, |
|
"eval_samples_per_second": 900.462, |
|
"eval_steps_per_second": 56.282, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 3.2712481021881104, |
|
"eval_runtime": 46.5973, |
|
"eval_samples_per_second": 882.068, |
|
"eval_steps_per_second": 55.132, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 8.805136329525557e-06, |
|
"loss": 3.1989, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_loss": 3.273439884185791, |
|
"eval_runtime": 46.318, |
|
"eval_samples_per_second": 887.387, |
|
"eval_steps_per_second": 55.464, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 3.293893814086914, |
|
"eval_runtime": 45.8003, |
|
"eval_samples_per_second": 897.418, |
|
"eval_steps_per_second": 56.091, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 8.738430751271576e-06, |
|
"loss": 3.1929, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 3.288043737411499, |
|
"eval_runtime": 46.6462, |
|
"eval_samples_per_second": 881.144, |
|
"eval_steps_per_second": 55.074, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 3.289358139038086, |
|
"eval_runtime": 45.8512, |
|
"eval_samples_per_second": 896.422, |
|
"eval_steps_per_second": 56.029, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 8.671725173017595e-06, |
|
"loss": 3.2083, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 3.308645725250244, |
|
"eval_runtime": 46.2317, |
|
"eval_samples_per_second": 889.043, |
|
"eval_steps_per_second": 55.568, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 3.3066623210906982, |
|
"eval_runtime": 46.8669, |
|
"eval_samples_per_second": 876.995, |
|
"eval_steps_per_second": 54.815, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 8.605019594763613e-06, |
|
"loss": 3.2013, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_loss": 3.278655529022217, |
|
"eval_runtime": 45.904, |
|
"eval_samples_per_second": 895.391, |
|
"eval_steps_per_second": 55.965, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 3.3152964115142822, |
|
"eval_runtime": 46.5312, |
|
"eval_samples_per_second": 883.322, |
|
"eval_steps_per_second": 55.21, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 8.538314016509632e-06, |
|
"loss": 3.2111, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 3.3246278762817383, |
|
"eval_runtime": 46.7247, |
|
"eval_samples_per_second": 879.664, |
|
"eval_steps_per_second": 54.982, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 3.3322579860687256, |
|
"eval_runtime": 45.9989, |
|
"eval_samples_per_second": 893.543, |
|
"eval_steps_per_second": 55.849, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 8.471608438255649e-06, |
|
"loss": 3.2186, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"eval_loss": 3.2938337326049805, |
|
"eval_runtime": 46.144, |
|
"eval_samples_per_second": 890.734, |
|
"eval_steps_per_second": 55.674, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 3.3499817848205566, |
|
"eval_runtime": 45.582, |
|
"eval_samples_per_second": 901.717, |
|
"eval_steps_per_second": 56.36, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 8.404902860001667e-06, |
|
"loss": 3.2268, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_loss": 3.3179759979248047, |
|
"eval_runtime": 45.2091, |
|
"eval_samples_per_second": 909.153, |
|
"eval_steps_per_second": 56.825, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 3.3171069622039795, |
|
"eval_runtime": 46.0196, |
|
"eval_samples_per_second": 893.141, |
|
"eval_steps_per_second": 55.824, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 8.338197281747686e-06, |
|
"loss": 3.233, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 3.3461642265319824, |
|
"eval_runtime": 45.5487, |
|
"eval_samples_per_second": 902.375, |
|
"eval_steps_per_second": 56.401, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 3.341256618499756, |
|
"eval_runtime": 45.4264, |
|
"eval_samples_per_second": 904.804, |
|
"eval_steps_per_second": 56.553, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 8.271491703493705e-06, |
|
"loss": 3.2432, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 3.328122615814209, |
|
"eval_runtime": 45.9787, |
|
"eval_samples_per_second": 893.936, |
|
"eval_steps_per_second": 55.874, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 3.342041492462158, |
|
"eval_runtime": 45.4274, |
|
"eval_samples_per_second": 904.784, |
|
"eval_steps_per_second": 56.552, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 8.204786125239725e-06, |
|
"loss": 3.2586, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 3.3609066009521484, |
|
"eval_runtime": 45.3913, |
|
"eval_samples_per_second": 905.504, |
|
"eval_steps_per_second": 56.597, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_loss": 3.352691173553467, |
|
"eval_runtime": 46.0515, |
|
"eval_samples_per_second": 892.522, |
|
"eval_steps_per_second": 55.785, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 8.138080546985743e-06, |
|
"loss": 3.2567, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"eval_loss": 3.359393358230591, |
|
"eval_runtime": 45.57, |
|
"eval_samples_per_second": 901.953, |
|
"eval_steps_per_second": 56.375, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 3.3497443199157715, |
|
"eval_runtime": 45.4208, |
|
"eval_samples_per_second": 904.915, |
|
"eval_steps_per_second": 56.56, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 8.07137496873176e-06, |
|
"loss": 3.2592, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_loss": 3.3606550693511963, |
|
"eval_runtime": 46.15, |
|
"eval_samples_per_second": 890.617, |
|
"eval_steps_per_second": 55.666, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 3.3839540481567383, |
|
"eval_runtime": 45.5702, |
|
"eval_samples_per_second": 901.95, |
|
"eval_steps_per_second": 56.375, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 8.004669390477779e-06, |
|
"loss": 3.2793, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_loss": 3.366785764694214, |
|
"eval_runtime": 45.749, |
|
"eval_samples_per_second": 898.424, |
|
"eval_steps_per_second": 56.154, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.3609416484832764, |
|
"eval_runtime": 47.1383, |
|
"eval_samples_per_second": 871.945, |
|
"eval_steps_per_second": 54.499, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 7.937963812223798e-06, |
|
"loss": 3.257, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"eval_loss": 3.368229389190674, |
|
"eval_runtime": 45.5778, |
|
"eval_samples_per_second": 901.798, |
|
"eval_steps_per_second": 56.365, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"eval_loss": 3.4005918502807617, |
|
"eval_runtime": 46.5843, |
|
"eval_samples_per_second": 882.314, |
|
"eval_steps_per_second": 55.147, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 7.871258233969816e-06, |
|
"loss": 3.2656, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"eval_loss": 3.358835220336914, |
|
"eval_runtime": 46.2545, |
|
"eval_samples_per_second": 888.605, |
|
"eval_steps_per_second": 55.541, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"eval_loss": 3.379861831665039, |
|
"eval_runtime": 45.613, |
|
"eval_samples_per_second": 901.103, |
|
"eval_steps_per_second": 56.322, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 7.804552655715835e-06, |
|
"loss": 3.2727, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_loss": 3.383315086364746, |
|
"eval_runtime": 46.0041, |
|
"eval_samples_per_second": 893.442, |
|
"eval_steps_per_second": 55.843, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 3.356590747833252, |
|
"eval_runtime": 45.9202, |
|
"eval_samples_per_second": 895.074, |
|
"eval_steps_per_second": 55.945, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 7.737847077461853e-06, |
|
"loss": 3.2705, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"eval_loss": 3.3793959617614746, |
|
"eval_runtime": 45.6075, |
|
"eval_samples_per_second": 901.211, |
|
"eval_steps_per_second": 56.328, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"eval_loss": 3.3838233947753906, |
|
"eval_runtime": 46.1859, |
|
"eval_samples_per_second": 889.925, |
|
"eval_steps_per_second": 55.623, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 7.671141499207872e-06, |
|
"loss": 3.2676, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"eval_loss": 3.3659656047821045, |
|
"eval_runtime": 45.7183, |
|
"eval_samples_per_second": 899.027, |
|
"eval_steps_per_second": 56.192, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"eval_loss": 3.3937699794769287, |
|
"eval_runtime": 45.9326, |
|
"eval_samples_per_second": 894.832, |
|
"eval_steps_per_second": 55.93, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 7.604435920953891e-06, |
|
"loss": 3.258, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_loss": 3.3661420345306396, |
|
"eval_runtime": 46.4625, |
|
"eval_samples_per_second": 884.627, |
|
"eval_steps_per_second": 55.292, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"eval_loss": 3.3490447998046875, |
|
"eval_runtime": 45.8318, |
|
"eval_samples_per_second": 896.801, |
|
"eval_steps_per_second": 56.053, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 7.537730342699909e-06, |
|
"loss": 3.2646, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 3.3716230392456055, |
|
"eval_runtime": 45.6734, |
|
"eval_samples_per_second": 899.91, |
|
"eval_steps_per_second": 56.247, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"eval_loss": 3.3877346515655518, |
|
"eval_runtime": 46.2161, |
|
"eval_samples_per_second": 889.344, |
|
"eval_steps_per_second": 55.587, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 7.471024764445928e-06, |
|
"loss": 3.2578, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 3.3930206298828125, |
|
"eval_runtime": 45.3985, |
|
"eval_samples_per_second": 905.361, |
|
"eval_steps_per_second": 56.588, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"eval_loss": 3.392077922821045, |
|
"eval_runtime": 45.1724, |
|
"eval_samples_per_second": 909.893, |
|
"eval_steps_per_second": 56.871, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 7.4043191861919465e-06, |
|
"loss": 3.2719, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"eval_loss": 3.395730495452881, |
|
"eval_runtime": 45.8195, |
|
"eval_samples_per_second": 897.042, |
|
"eval_steps_per_second": 56.068, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_loss": 3.4196434020996094, |
|
"eval_runtime": 45.2614, |
|
"eval_samples_per_second": 908.103, |
|
"eval_steps_per_second": 56.759, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 7.337613607937964e-06, |
|
"loss": 3.2828, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 3.4077515602111816, |
|
"eval_runtime": 45.5674, |
|
"eval_samples_per_second": 902.004, |
|
"eval_steps_per_second": 56.378, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"eval_loss": 3.4202864170074463, |
|
"eval_runtime": 46.3249, |
|
"eval_samples_per_second": 887.255, |
|
"eval_steps_per_second": 55.456, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 7.270908029683983e-06, |
|
"loss": 3.2805, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_loss": 3.3899548053741455, |
|
"eval_runtime": 46.1588, |
|
"eval_samples_per_second": 890.448, |
|
"eval_steps_per_second": 55.656, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_loss": 3.4037835597991943, |
|
"eval_runtime": 46.9454, |
|
"eval_samples_per_second": 875.527, |
|
"eval_steps_per_second": 54.723, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 7.2042024514300015e-06, |
|
"loss": 3.2975, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"eval_loss": 3.405585765838623, |
|
"eval_runtime": 46.2706, |
|
"eval_samples_per_second": 888.297, |
|
"eval_steps_per_second": 55.521, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"eval_loss": 3.428373336791992, |
|
"eval_runtime": 45.9889, |
|
"eval_samples_per_second": 893.738, |
|
"eval_steps_per_second": 55.861, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 7.13749687317602e-06, |
|
"loss": 3.2965, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 3.41803240776062, |
|
"eval_runtime": 46.9126, |
|
"eval_samples_per_second": 876.14, |
|
"eval_steps_per_second": 54.761, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"eval_loss": 3.419599771499634, |
|
"eval_runtime": 46.1796, |
|
"eval_samples_per_second": 890.047, |
|
"eval_steps_per_second": 55.631, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 7.070791294922038e-06, |
|
"loss": 3.3069, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 3.425711154937744, |
|
"eval_runtime": 46.2298, |
|
"eval_samples_per_second": 889.08, |
|
"eval_steps_per_second": 55.57, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"eval_loss": 3.4299447536468506, |
|
"eval_runtime": 46.768, |
|
"eval_samples_per_second": 878.85, |
|
"eval_steps_per_second": 54.931, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 7.0040857166680564e-06, |
|
"loss": 3.3152, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"eval_loss": 3.4787514209747314, |
|
"eval_runtime": 46.0913, |
|
"eval_samples_per_second": 891.752, |
|
"eval_steps_per_second": 55.737, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 3.4424662590026855, |
|
"eval_runtime": 46.3411, |
|
"eval_samples_per_second": 886.945, |
|
"eval_steps_per_second": 55.437, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 6.937380138414076e-06, |
|
"loss": 3.3125, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"eval_loss": 3.430126667022705, |
|
"eval_runtime": 46.9882, |
|
"eval_samples_per_second": 874.73, |
|
"eval_steps_per_second": 54.673, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"eval_loss": 3.4440979957580566, |
|
"eval_runtime": 46.1825, |
|
"eval_samples_per_second": 889.99, |
|
"eval_steps_per_second": 55.627, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 6.8706745601600945e-06, |
|
"loss": 3.3174, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"eval_loss": 3.4396116733551025, |
|
"eval_runtime": 46.2686, |
|
"eval_samples_per_second": 888.334, |
|
"eval_steps_per_second": 55.524, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"eval_loss": 3.463931083679199, |
|
"eval_runtime": 46.7798, |
|
"eval_samples_per_second": 878.627, |
|
"eval_steps_per_second": 54.917, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 6.803968981906113e-06, |
|
"loss": 3.3242, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"eval_loss": 3.4523837566375732, |
|
"eval_runtime": 45.7867, |
|
"eval_samples_per_second": 897.685, |
|
"eval_steps_per_second": 56.108, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"eval_loss": 3.455958366394043, |
|
"eval_runtime": 45.3124, |
|
"eval_samples_per_second": 907.08, |
|
"eval_steps_per_second": 56.695, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 6.737263403652131e-06, |
|
"loss": 3.3385, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"eval_loss": 3.4779999256134033, |
|
"eval_runtime": 46.0072, |
|
"eval_samples_per_second": 893.383, |
|
"eval_steps_per_second": 55.839, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"eval_loss": 3.4773714542388916, |
|
"eval_runtime": 45.131, |
|
"eval_samples_per_second": 910.727, |
|
"eval_steps_per_second": 56.923, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 6.6705578253981495e-06, |
|
"loss": 3.3371, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"eval_loss": 3.47719669342041, |
|
"eval_runtime": 45.6308, |
|
"eval_samples_per_second": 900.751, |
|
"eval_steps_per_second": 56.3, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"eval_loss": 3.4955241680145264, |
|
"eval_runtime": 46.0477, |
|
"eval_samples_per_second": 892.597, |
|
"eval_steps_per_second": 55.79, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 6.603852247144168e-06, |
|
"loss": 3.3633, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"eval_loss": 3.486057996749878, |
|
"eval_runtime": 44.9231, |
|
"eval_samples_per_second": 914.941, |
|
"eval_steps_per_second": 57.187, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"eval_loss": 3.506316661834717, |
|
"eval_runtime": 45.7078, |
|
"eval_samples_per_second": 899.234, |
|
"eval_steps_per_second": 56.205, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 6.537146668890187e-06, |
|
"loss": 3.3678, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_loss": 3.50439190864563, |
|
"eval_runtime": 45.0245, |
|
"eval_samples_per_second": 912.882, |
|
"eval_steps_per_second": 57.058, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"eval_loss": 3.520247220993042, |
|
"eval_runtime": 45.2071, |
|
"eval_samples_per_second": 909.193, |
|
"eval_steps_per_second": 56.827, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 6.4704410906362044e-06, |
|
"loss": 3.3634, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"eval_loss": 3.4941418170928955, |
|
"eval_runtime": 46.4208, |
|
"eval_samples_per_second": 885.423, |
|
"eval_steps_per_second": 55.342, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"eval_loss": 3.522303819656372, |
|
"eval_runtime": 46.164, |
|
"eval_samples_per_second": 890.347, |
|
"eval_steps_per_second": 55.649, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 6.403735512382223e-06, |
|
"loss": 3.3797, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_loss": 3.502774715423584, |
|
"eval_runtime": 45.8285, |
|
"eval_samples_per_second": 896.865, |
|
"eval_steps_per_second": 56.057, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_loss": 3.526393175125122, |
|
"eval_runtime": 46.6422, |
|
"eval_samples_per_second": 881.219, |
|
"eval_steps_per_second": 55.079, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 6.337029934128242e-06, |
|
"loss": 3.3802, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_loss": 3.531257152557373, |
|
"eval_runtime": 46.217, |
|
"eval_samples_per_second": 889.327, |
|
"eval_steps_per_second": 55.586, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"eval_loss": 3.496319055557251, |
|
"eval_runtime": 45.9803, |
|
"eval_samples_per_second": 893.904, |
|
"eval_steps_per_second": 55.872, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 6.270324355874261e-06, |
|
"loss": 3.357, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"eval_loss": 3.5171141624450684, |
|
"eval_runtime": 47.1622, |
|
"eval_samples_per_second": 871.504, |
|
"eval_steps_per_second": 54.472, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_loss": 3.530701160430908, |
|
"eval_runtime": 46.113, |
|
"eval_samples_per_second": 891.332, |
|
"eval_steps_per_second": 55.711, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 6.20361877762028e-06, |
|
"loss": 3.3866, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 3.5221967697143555, |
|
"eval_runtime": 46.035, |
|
"eval_samples_per_second": 892.843, |
|
"eval_steps_per_second": 55.805, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"eval_loss": 3.5319056510925293, |
|
"eval_runtime": 46.8446, |
|
"eval_samples_per_second": 877.412, |
|
"eval_steps_per_second": 54.841, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 6.1369131993662975e-06, |
|
"loss": 3.3818, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_loss": 3.532552480697632, |
|
"eval_runtime": 46.3901, |
|
"eval_samples_per_second": 886.007, |
|
"eval_steps_per_second": 55.378, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"eval_loss": 3.5116307735443115, |
|
"eval_runtime": 45.2931, |
|
"eval_samples_per_second": 907.466, |
|
"eval_steps_per_second": 56.719, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 6.070207621112316e-06, |
|
"loss": 3.3754, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"eval_loss": 3.5228991508483887, |
|
"eval_runtime": 47.0715, |
|
"eval_samples_per_second": 873.183, |
|
"eval_steps_per_second": 54.577, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"eval_loss": 3.538318634033203, |
|
"eval_runtime": 45.9256, |
|
"eval_samples_per_second": 894.97, |
|
"eval_steps_per_second": 55.938, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 6.003502042858335e-06, |
|
"loss": 3.3893, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"eval_loss": 3.544513463973999, |
|
"eval_runtime": 46.8245, |
|
"eval_samples_per_second": 877.788, |
|
"eval_steps_per_second": 54.864, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"eval_loss": 3.5230634212493896, |
|
"eval_runtime": 47.3348, |
|
"eval_samples_per_second": 868.325, |
|
"eval_steps_per_second": 54.273, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.936796464604353e-06, |
|
"loss": 3.3899, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 3.531026840209961, |
|
"eval_runtime": 45.7886, |
|
"eval_samples_per_second": 897.647, |
|
"eval_steps_per_second": 56.106, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"eval_loss": 3.53287935256958, |
|
"eval_runtime": 46.7771, |
|
"eval_samples_per_second": 878.677, |
|
"eval_steps_per_second": 54.92, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 5.870090886350371e-06, |
|
"loss": 3.3918, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"eval_loss": 3.5158653259277344, |
|
"eval_runtime": 46.2173, |
|
"eval_samples_per_second": 889.32, |
|
"eval_steps_per_second": 55.585, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"eval_loss": 3.562788486480713, |
|
"eval_runtime": 45.7474, |
|
"eval_samples_per_second": 898.456, |
|
"eval_steps_per_second": 56.156, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 5.80338530809639e-06, |
|
"loss": 3.3786, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"eval_loss": 3.5290534496307373, |
|
"eval_runtime": 46.4581, |
|
"eval_samples_per_second": 884.711, |
|
"eval_steps_per_second": 55.297, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"eval_loss": 3.5163111686706543, |
|
"eval_runtime": 45.899, |
|
"eval_samples_per_second": 895.487, |
|
"eval_steps_per_second": 55.971, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 5.736679729842408e-06, |
|
"loss": 3.3862, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"eval_loss": 3.531219959259033, |
|
"eval_runtime": 45.4959, |
|
"eval_samples_per_second": 903.423, |
|
"eval_steps_per_second": 56.467, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"eval_loss": 3.514033317565918, |
|
"eval_runtime": 46.6408, |
|
"eval_samples_per_second": 881.245, |
|
"eval_steps_per_second": 55.08, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"learning_rate": 5.669974151588427e-06, |
|
"loss": 3.3855, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"eval_loss": 3.5617153644561768, |
|
"eval_runtime": 45.7071, |
|
"eval_samples_per_second": 899.248, |
|
"eval_steps_per_second": 56.206, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"eval_loss": 3.5374927520751953, |
|
"eval_runtime": 45.668, |
|
"eval_samples_per_second": 900.018, |
|
"eval_steps_per_second": 56.254, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 5.603268573334446e-06, |
|
"loss": 3.3872, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"eval_loss": 3.532823085784912, |
|
"eval_runtime": 46.5514, |
|
"eval_samples_per_second": 882.938, |
|
"eval_steps_per_second": 55.186, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"eval_loss": 3.561626434326172, |
|
"eval_runtime": 45.9586, |
|
"eval_samples_per_second": 894.327, |
|
"eval_steps_per_second": 55.898, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 5.536562995080464e-06, |
|
"loss": 3.3931, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"eval_loss": 3.5647873878479004, |
|
"eval_runtime": 46.8936, |
|
"eval_samples_per_second": 876.495, |
|
"eval_steps_per_second": 54.784, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"eval_loss": 3.544335126876831, |
|
"eval_runtime": 46.3686, |
|
"eval_samples_per_second": 886.419, |
|
"eval_steps_per_second": 55.404, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 5.469857416826483e-06, |
|
"loss": 3.3708, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"eval_loss": 3.5400941371917725, |
|
"eval_runtime": 45.8359, |
|
"eval_samples_per_second": 896.72, |
|
"eval_steps_per_second": 56.048, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.45, |
|
"eval_loss": 3.55292010307312, |
|
"eval_runtime": 46.8082, |
|
"eval_samples_per_second": 878.095, |
|
"eval_steps_per_second": 54.884, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"learning_rate": 5.403151838572501e-06, |
|
"loss": 3.4099, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"eval_loss": 3.533414602279663, |
|
"eval_runtime": 46.1107, |
|
"eval_samples_per_second": 891.377, |
|
"eval_steps_per_second": 55.714, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"eval_loss": 3.5325212478637695, |
|
"eval_runtime": 46.1351, |
|
"eval_samples_per_second": 890.905, |
|
"eval_steps_per_second": 55.684, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 5.33644626031852e-06, |
|
"loss": 3.4027, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"eval_loss": 3.5818660259246826, |
|
"eval_runtime": 46.7428, |
|
"eval_samples_per_second": 879.323, |
|
"eval_steps_per_second": 54.96, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"eval_loss": 3.5470829010009766, |
|
"eval_runtime": 46.1344, |
|
"eval_samples_per_second": 890.92, |
|
"eval_steps_per_second": 55.685, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 5.269740682064538e-06, |
|
"loss": 3.4035, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"eval_loss": 3.548552989959717, |
|
"eval_runtime": 46.1071, |
|
"eval_samples_per_second": 891.446, |
|
"eval_steps_per_second": 55.718, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"eval_loss": 3.5470151901245117, |
|
"eval_runtime": 46.849, |
|
"eval_samples_per_second": 877.33, |
|
"eval_steps_per_second": 54.836, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 5.203035103810556e-06, |
|
"loss": 3.3964, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"eval_loss": 3.572176694869995, |
|
"eval_runtime": 46.3661, |
|
"eval_samples_per_second": 886.467, |
|
"eval_steps_per_second": 55.407, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"eval_loss": 3.55098295211792, |
|
"eval_runtime": 46.1812, |
|
"eval_samples_per_second": 890.015, |
|
"eval_steps_per_second": 55.629, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"learning_rate": 5.136329525556575e-06, |
|
"loss": 3.4115, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"eval_loss": 3.561007499694824, |
|
"eval_runtime": 47.5429, |
|
"eval_samples_per_second": 864.525, |
|
"eval_steps_per_second": 54.035, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 24.09, |
|
"eval_loss": 3.5757482051849365, |
|
"eval_runtime": 46.3962, |
|
"eval_samples_per_second": 885.891, |
|
"eval_steps_per_second": 55.371, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 5.0696239473025935e-06, |
|
"loss": 3.4173, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"eval_loss": 3.554094076156616, |
|
"eval_runtime": 45.5708, |
|
"eval_samples_per_second": 901.936, |
|
"eval_steps_per_second": 56.374, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"eval_loss": 3.577660083770752, |
|
"eval_runtime": 47.0565, |
|
"eval_samples_per_second": 873.461, |
|
"eval_steps_per_second": 54.594, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 5.002918369048611e-06, |
|
"loss": 3.4169, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"eval_loss": 3.5637948513031006, |
|
"eval_runtime": 47.0711, |
|
"eval_samples_per_second": 873.19, |
|
"eval_steps_per_second": 54.577, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"eval_loss": 3.5462896823883057, |
|
"eval_runtime": 46.8215, |
|
"eval_samples_per_second": 877.845, |
|
"eval_steps_per_second": 54.868, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 4.936212790794631e-06, |
|
"loss": 3.4031, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"eval_loss": 3.5299670696258545, |
|
"eval_runtime": 46.9742, |
|
"eval_samples_per_second": 874.99, |
|
"eval_steps_per_second": 54.69, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"eval_loss": 3.558427333831787, |
|
"eval_runtime": 46.1322, |
|
"eval_samples_per_second": 890.961, |
|
"eval_steps_per_second": 55.688, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 4.869507212540649e-06, |
|
"loss": 3.4094, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"eval_loss": 3.568174123764038, |
|
"eval_runtime": 46.3049, |
|
"eval_samples_per_second": 887.638, |
|
"eval_steps_per_second": 55.48, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.41, |
|
"eval_loss": 3.555844783782959, |
|
"eval_runtime": 46.0676, |
|
"eval_samples_per_second": 892.211, |
|
"eval_steps_per_second": 55.766, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"learning_rate": 4.802801634286667e-06, |
|
"loss": 3.4116, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"eval_loss": 3.5629091262817383, |
|
"eval_runtime": 45.5765, |
|
"eval_samples_per_second": 901.825, |
|
"eval_steps_per_second": 56.367, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 25.73, |
|
"eval_loss": 3.5490224361419678, |
|
"eval_runtime": 46.4409, |
|
"eval_samples_per_second": 885.039, |
|
"eval_steps_per_second": 55.318, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"learning_rate": 4.7360960560326865e-06, |
|
"loss": 3.4199, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"eval_loss": 3.567878484725952, |
|
"eval_runtime": 46.1595, |
|
"eval_samples_per_second": 890.434, |
|
"eval_steps_per_second": 55.655, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"eval_loss": 3.5885465145111084, |
|
"eval_runtime": 45.9316, |
|
"eval_samples_per_second": 894.853, |
|
"eval_steps_per_second": 55.931, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 4.669390477778704e-06, |
|
"loss": 3.412, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"eval_loss": 3.5578629970550537, |
|
"eval_runtime": 46.4337, |
|
"eval_samples_per_second": 885.176, |
|
"eval_steps_per_second": 55.326, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.39, |
|
"eval_loss": 3.5465352535247803, |
|
"eval_runtime": 45.7517, |
|
"eval_samples_per_second": 898.371, |
|
"eval_steps_per_second": 56.151, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 4.602684899524723e-06, |
|
"loss": 3.4123, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 3.572610855102539, |
|
"eval_runtime": 45.5426, |
|
"eval_samples_per_second": 902.496, |
|
"eval_steps_per_second": 56.409, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"eval_loss": 3.577484130859375, |
|
"eval_runtime": 46.4204, |
|
"eval_samples_per_second": 885.431, |
|
"eval_steps_per_second": 55.342, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 4.5359793212707415e-06, |
|
"loss": 3.4132, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"eval_loss": 3.5477850437164307, |
|
"eval_runtime": 45.6512, |
|
"eval_samples_per_second": 900.348, |
|
"eval_steps_per_second": 56.275, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"eval_loss": 3.5588574409484863, |
|
"eval_runtime": 46.0446, |
|
"eval_samples_per_second": 892.657, |
|
"eval_steps_per_second": 55.794, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"learning_rate": 4.46927374301676e-06, |
|
"loss": 3.4161, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"eval_loss": 3.56620717048645, |
|
"eval_runtime": 46.4839, |
|
"eval_samples_per_second": 884.22, |
|
"eval_steps_per_second": 55.266, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"eval_loss": 3.589487075805664, |
|
"eval_runtime": 46.3966, |
|
"eval_samples_per_second": 885.884, |
|
"eval_steps_per_second": 55.37, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 4.402568164762779e-06, |
|
"loss": 3.4097, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"eval_loss": 3.5940632820129395, |
|
"eval_runtime": 46.4364, |
|
"eval_samples_per_second": 885.125, |
|
"eval_steps_per_second": 55.323, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 27.7, |
|
"eval_loss": 3.5912110805511475, |
|
"eval_runtime": 45.9687, |
|
"eval_samples_per_second": 894.131, |
|
"eval_steps_per_second": 55.886, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"learning_rate": 4.335862586508797e-06, |
|
"loss": 3.415, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"eval_loss": 3.565756320953369, |
|
"eval_runtime": 45.7621, |
|
"eval_samples_per_second": 898.168, |
|
"eval_steps_per_second": 56.138, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"eval_loss": 3.5553781986236572, |
|
"eval_runtime": 46.2903, |
|
"eval_samples_per_second": 887.919, |
|
"eval_steps_per_second": 55.498, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 4.269157008254816e-06, |
|
"loss": 3.4193, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"eval_loss": 3.589851140975952, |
|
"eval_runtime": 45.8411, |
|
"eval_samples_per_second": 896.618, |
|
"eval_steps_per_second": 56.041, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"eval_loss": 3.5652260780334473, |
|
"eval_runtime": 45.5538, |
|
"eval_samples_per_second": 902.275, |
|
"eval_steps_per_second": 56.395, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"learning_rate": 4.202451430000834e-06, |
|
"loss": 3.4136, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"eval_loss": 3.5832390785217285, |
|
"eval_runtime": 46.575, |
|
"eval_samples_per_second": 882.491, |
|
"eval_steps_per_second": 55.158, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 28.68, |
|
"eval_loss": 3.5885210037231445, |
|
"eval_runtime": 45.9659, |
|
"eval_samples_per_second": 894.184, |
|
"eval_steps_per_second": 55.889, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 4.135745851746852e-06, |
|
"loss": 3.4294, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"eval_loss": 3.583249807357788, |
|
"eval_runtime": 45.7927, |
|
"eval_samples_per_second": 897.568, |
|
"eval_steps_per_second": 56.101, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_loss": 3.6025209426879883, |
|
"eval_runtime": 46.362, |
|
"eval_samples_per_second": 886.546, |
|
"eval_steps_per_second": 55.412, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 4.069040273492872e-06, |
|
"loss": 3.4243, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"eval_loss": 3.6040360927581787, |
|
"eval_runtime": 45.7855, |
|
"eval_samples_per_second": 897.708, |
|
"eval_steps_per_second": 56.11, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.34, |
|
"eval_loss": 3.5890395641326904, |
|
"eval_runtime": 46.5109, |
|
"eval_samples_per_second": 883.707, |
|
"eval_steps_per_second": 55.234, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"learning_rate": 4.0023346952388895e-06, |
|
"loss": 3.4427, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"eval_loss": 3.58347749710083, |
|
"eval_runtime": 46.2896, |
|
"eval_samples_per_second": 887.931, |
|
"eval_steps_per_second": 55.498, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"eval_loss": 3.6185286045074463, |
|
"eval_runtime": 46.4189, |
|
"eval_samples_per_second": 885.459, |
|
"eval_steps_per_second": 55.344, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 3.935629116984908e-06, |
|
"loss": 3.4293, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"eval_loss": 3.6028919219970703, |
|
"eval_runtime": 46.7251, |
|
"eval_samples_per_second": 879.656, |
|
"eval_steps_per_second": 54.981, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_loss": 3.616161823272705, |
|
"eval_runtime": 45.7265, |
|
"eval_samples_per_second": 898.865, |
|
"eval_steps_per_second": 56.182, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 3.868923538730927e-06, |
|
"loss": 3.4363, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"eval_loss": 3.6257941722869873, |
|
"eval_runtime": 45.6532, |
|
"eval_samples_per_second": 900.308, |
|
"eval_steps_per_second": 56.272, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"eval_loss": 3.6038014888763428, |
|
"eval_runtime": 46.717, |
|
"eval_samples_per_second": 879.808, |
|
"eval_steps_per_second": 54.991, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"learning_rate": 3.8022179604769453e-06, |
|
"loss": 3.4532, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"eval_loss": 3.6039483547210693, |
|
"eval_runtime": 45.742, |
|
"eval_samples_per_second": 898.562, |
|
"eval_steps_per_second": 56.163, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"eval_loss": 3.605367422103882, |
|
"eval_runtime": 45.7078, |
|
"eval_samples_per_second": 899.234, |
|
"eval_steps_per_second": 56.205, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"learning_rate": 3.735512382222964e-06, |
|
"loss": 3.4401, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"eval_loss": 3.6269376277923584, |
|
"eval_runtime": 46.6124, |
|
"eval_samples_per_second": 881.783, |
|
"eval_steps_per_second": 55.114, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_loss": 3.600417137145996, |
|
"eval_runtime": 47.0146, |
|
"eval_samples_per_second": 874.239, |
|
"eval_steps_per_second": 54.643, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"learning_rate": 3.668806803968982e-06, |
|
"loss": 3.4491, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"eval_loss": 3.6095597743988037, |
|
"eval_runtime": 47.1653, |
|
"eval_samples_per_second": 871.446, |
|
"eval_steps_per_second": 54.468, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 31.31, |
|
"eval_loss": 3.6216766834259033, |
|
"eval_runtime": 48.343, |
|
"eval_samples_per_second": 850.216, |
|
"eval_steps_per_second": 53.141, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 3.6021012257150007e-06, |
|
"loss": 3.4438, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"eval_loss": 3.6081080436706543, |
|
"eval_runtime": 47.4804, |
|
"eval_samples_per_second": 865.663, |
|
"eval_steps_per_second": 54.107, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 31.63, |
|
"eval_loss": 3.6190168857574463, |
|
"eval_runtime": 48.3587, |
|
"eval_samples_per_second": 849.941, |
|
"eval_steps_per_second": 53.124, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"learning_rate": 3.535395647461019e-06, |
|
"loss": 3.4337, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"eval_loss": 3.611992835998535, |
|
"eval_runtime": 47.5342, |
|
"eval_samples_per_second": 864.683, |
|
"eval_steps_per_second": 54.045, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 31.96, |
|
"eval_loss": 3.586127996444702, |
|
"eval_runtime": 46.8726, |
|
"eval_samples_per_second": 876.888, |
|
"eval_steps_per_second": 54.808, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"learning_rate": 3.468690069207038e-06, |
|
"loss": 3.4475, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"eval_loss": 3.620932102203369, |
|
"eval_runtime": 48.2654, |
|
"eval_samples_per_second": 851.582, |
|
"eval_steps_per_second": 53.226, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 32.29, |
|
"eval_loss": 3.6301937103271484, |
|
"eval_runtime": 47.2416, |
|
"eval_samples_per_second": 870.039, |
|
"eval_steps_per_second": 54.38, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"learning_rate": 3.4019844909530565e-06, |
|
"loss": 3.4406, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"eval_loss": 3.6052932739257812, |
|
"eval_runtime": 46.0861, |
|
"eval_samples_per_second": 891.852, |
|
"eval_steps_per_second": 55.743, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 32.62, |
|
"eval_loss": 3.593369960784912, |
|
"eval_runtime": 49.6475, |
|
"eval_samples_per_second": 827.876, |
|
"eval_steps_per_second": 51.745, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 32.78, |
|
"learning_rate": 3.3352789126990747e-06, |
|
"loss": 3.4392, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 32.78, |
|
"eval_loss": 3.594203472137451, |
|
"eval_runtime": 47.8907, |
|
"eval_samples_per_second": 858.246, |
|
"eval_steps_per_second": 53.643, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"eval_loss": 3.601329803466797, |
|
"eval_runtime": 46.6549, |
|
"eval_samples_per_second": 880.98, |
|
"eval_steps_per_second": 55.064, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 33.11, |
|
"learning_rate": 3.2685733344450933e-06, |
|
"loss": 3.4514, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 33.11, |
|
"eval_loss": 3.6505630016326904, |
|
"eval_runtime": 47.3453, |
|
"eval_samples_per_second": 868.132, |
|
"eval_steps_per_second": 54.261, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 33.27, |
|
"eval_loss": 3.604905128479004, |
|
"eval_runtime": 47.3478, |
|
"eval_samples_per_second": 868.087, |
|
"eval_steps_per_second": 54.258, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 3.2018677561911115e-06, |
|
"loss": 3.4406, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"eval_loss": 3.6285159587860107, |
|
"eval_runtime": 45.2665, |
|
"eval_samples_per_second": 908.001, |
|
"eval_steps_per_second": 56.753, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"eval_loss": 3.6107122898101807, |
|
"eval_runtime": 47.0075, |
|
"eval_samples_per_second": 874.372, |
|
"eval_steps_per_second": 54.651, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 3.1351621779371306e-06, |
|
"loss": 3.4522, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"eval_loss": 3.6080775260925293, |
|
"eval_runtime": 46.384, |
|
"eval_samples_per_second": 886.124, |
|
"eval_steps_per_second": 55.385, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 33.93, |
|
"eval_loss": 3.6121394634246826, |
|
"eval_runtime": 47.5808, |
|
"eval_samples_per_second": 863.836, |
|
"eval_steps_per_second": 53.992, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"learning_rate": 3.0684565996831487e-06, |
|
"loss": 3.4592, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"eval_loss": 3.639568567276001, |
|
"eval_runtime": 47.4907, |
|
"eval_samples_per_second": 865.474, |
|
"eval_steps_per_second": 54.095, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 34.26, |
|
"eval_loss": 3.628408432006836, |
|
"eval_runtime": 45.8805, |
|
"eval_samples_per_second": 895.849, |
|
"eval_steps_per_second": 55.993, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"learning_rate": 3.0017510214291673e-06, |
|
"loss": 3.4587, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"eval_loss": 3.619464635848999, |
|
"eval_runtime": 46.7813, |
|
"eval_samples_per_second": 878.599, |
|
"eval_steps_per_second": 54.915, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"eval_loss": 3.6168148517608643, |
|
"eval_runtime": 46.0408, |
|
"eval_samples_per_second": 892.731, |
|
"eval_steps_per_second": 55.798, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"learning_rate": 2.9350454431751855e-06, |
|
"loss": 3.4589, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"eval_loss": 3.631527900695801, |
|
"eval_runtime": 45.9831, |
|
"eval_samples_per_second": 893.85, |
|
"eval_steps_per_second": 55.868, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 3.6044745445251465, |
|
"eval_runtime": 46.5293, |
|
"eval_samples_per_second": 883.356, |
|
"eval_steps_per_second": 55.212, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"learning_rate": 2.868339864921204e-06, |
|
"loss": 3.4703, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"eval_loss": 3.6251227855682373, |
|
"eval_runtime": 45.5912, |
|
"eval_samples_per_second": 901.533, |
|
"eval_steps_per_second": 56.349, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"eval_loss": 3.6251931190490723, |
|
"eval_runtime": 45.7404, |
|
"eval_samples_per_second": 898.593, |
|
"eval_steps_per_second": 56.165, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 2.801634286667223e-06, |
|
"loss": 3.4565, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"eval_loss": 3.62538743019104, |
|
"eval_runtime": 46.4207, |
|
"eval_samples_per_second": 885.423, |
|
"eval_steps_per_second": 55.342, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"eval_loss": 3.6544113159179688, |
|
"eval_runtime": 45.7864, |
|
"eval_samples_per_second": 897.691, |
|
"eval_steps_per_second": 56.108, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 2.7349287084132413e-06, |
|
"loss": 3.4634, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"eval_loss": 3.629049062728882, |
|
"eval_runtime": 46.556, |
|
"eval_samples_per_second": 882.85, |
|
"eval_steps_per_second": 55.181, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"eval_loss": 3.612429618835449, |
|
"eval_runtime": 46.5059, |
|
"eval_samples_per_second": 883.802, |
|
"eval_steps_per_second": 55.24, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"learning_rate": 2.66822313015926e-06, |
|
"loss": 3.4625, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"eval_loss": 3.6262378692626953, |
|
"eval_runtime": 45.8554, |
|
"eval_samples_per_second": 896.34, |
|
"eval_steps_per_second": 56.024, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 36.22, |
|
"eval_loss": 3.6317975521087646, |
|
"eval_runtime": 46.7318, |
|
"eval_samples_per_second": 879.529, |
|
"eval_steps_per_second": 54.973, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"learning_rate": 2.601517551905278e-06, |
|
"loss": 3.457, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"eval_loss": 3.640812397003174, |
|
"eval_runtime": 45.9688, |
|
"eval_samples_per_second": 894.129, |
|
"eval_steps_per_second": 55.886, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_loss": 3.6433026790618896, |
|
"eval_runtime": 45.8154, |
|
"eval_samples_per_second": 897.122, |
|
"eval_steps_per_second": 56.073, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"learning_rate": 2.5348119736512967e-06, |
|
"loss": 3.4618, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"eval_loss": 3.627612352371216, |
|
"eval_runtime": 46.6149, |
|
"eval_samples_per_second": 881.735, |
|
"eval_steps_per_second": 55.111, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 36.88, |
|
"eval_loss": 3.631366014480591, |
|
"eval_runtime": 46.0925, |
|
"eval_samples_per_second": 891.729, |
|
"eval_steps_per_second": 55.736, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"learning_rate": 2.4681063953973154e-06, |
|
"loss": 3.4611, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"eval_loss": 3.6415860652923584, |
|
"eval_runtime": 46.287, |
|
"eval_samples_per_second": 887.982, |
|
"eval_steps_per_second": 55.502, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"eval_loss": 3.665800094604492, |
|
"eval_runtime": 46.839, |
|
"eval_samples_per_second": 877.517, |
|
"eval_steps_per_second": 54.847, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"learning_rate": 2.4014008171433335e-06, |
|
"loss": 3.4651, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"eval_loss": 3.638195037841797, |
|
"eval_runtime": 46.0815, |
|
"eval_samples_per_second": 891.942, |
|
"eval_steps_per_second": 55.749, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 37.53, |
|
"eval_loss": 3.656243085861206, |
|
"eval_runtime": 45.3257, |
|
"eval_samples_per_second": 906.815, |
|
"eval_steps_per_second": 56.679, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"learning_rate": 2.334695238889352e-06, |
|
"loss": 3.4625, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"eval_loss": 3.6376214027404785, |
|
"eval_runtime": 47.1734, |
|
"eval_samples_per_second": 871.296, |
|
"eval_steps_per_second": 54.459, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 37.86, |
|
"eval_loss": 3.651963710784912, |
|
"eval_runtime": 46.059, |
|
"eval_samples_per_second": 892.377, |
|
"eval_steps_per_second": 55.776, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"learning_rate": 2.2679896606353707e-06, |
|
"loss": 3.4561, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"eval_loss": 3.6300716400146484, |
|
"eval_runtime": 46.8158, |
|
"eval_samples_per_second": 877.951, |
|
"eval_steps_per_second": 54.875, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 38.19, |
|
"eval_loss": 3.619462728500366, |
|
"eval_runtime": 45.8596, |
|
"eval_samples_per_second": 896.258, |
|
"eval_steps_per_second": 56.019, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"learning_rate": 2.2012840823813894e-06, |
|
"loss": 3.4655, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"eval_loss": 3.6279447078704834, |
|
"eval_runtime": 46.2215, |
|
"eval_samples_per_second": 889.241, |
|
"eval_steps_per_second": 55.58, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 38.52, |
|
"eval_loss": 3.636460542678833, |
|
"eval_runtime": 46.7533, |
|
"eval_samples_per_second": 879.125, |
|
"eval_steps_per_second": 54.948, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 2.134578504127408e-06, |
|
"loss": 3.4637, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 3.638620138168335, |
|
"eval_runtime": 46.2177, |
|
"eval_samples_per_second": 889.313, |
|
"eval_steps_per_second": 55.585, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 38.85, |
|
"eval_loss": 3.643373489379883, |
|
"eval_runtime": 45.9947, |
|
"eval_samples_per_second": 893.624, |
|
"eval_steps_per_second": 55.854, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 2.067872925873426e-06, |
|
"loss": 3.458, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"eval_loss": 3.65189266204834, |
|
"eval_runtime": 46.7003, |
|
"eval_samples_per_second": 880.122, |
|
"eval_steps_per_second": 55.01, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 39.17, |
|
"eval_loss": 3.6438076496124268, |
|
"eval_runtime": 46.3785, |
|
"eval_samples_per_second": 886.229, |
|
"eval_steps_per_second": 55.392, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"learning_rate": 2.0011673476194448e-06, |
|
"loss": 3.4523, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"eval_loss": 3.640777349472046, |
|
"eval_runtime": 46.701, |
|
"eval_samples_per_second": 880.109, |
|
"eval_steps_per_second": 55.01, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"eval_loss": 3.6513171195983887, |
|
"eval_runtime": 46.884, |
|
"eval_samples_per_second": 876.675, |
|
"eval_steps_per_second": 54.795, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"learning_rate": 1.9344617693654634e-06, |
|
"loss": 3.4743, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"eval_loss": 3.6177797317504883, |
|
"eval_runtime": 46.0686, |
|
"eval_samples_per_second": 892.192, |
|
"eval_steps_per_second": 55.765, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 39.83, |
|
"eval_loss": 3.6398518085479736, |
|
"eval_runtime": 46.8575, |
|
"eval_samples_per_second": 877.171, |
|
"eval_steps_per_second": 54.826, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"learning_rate": 1.867756191111482e-06, |
|
"loss": 3.4626, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_loss": 3.624283790588379, |
|
"eval_runtime": 46.1682, |
|
"eval_samples_per_second": 890.266, |
|
"eval_steps_per_second": 55.644, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 40.16, |
|
"eval_loss": 3.6325714588165283, |
|
"eval_runtime": 45.9837, |
|
"eval_samples_per_second": 893.838, |
|
"eval_steps_per_second": 55.868, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 1.8010506128575004e-06, |
|
"loss": 3.4692, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"eval_loss": 3.6723103523254395, |
|
"eval_runtime": 46.8787, |
|
"eval_samples_per_second": 876.773, |
|
"eval_steps_per_second": 54.801, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"eval_loss": 3.6456410884857178, |
|
"eval_runtime": 46.0442, |
|
"eval_samples_per_second": 892.664, |
|
"eval_steps_per_second": 55.794, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"learning_rate": 1.734345034603519e-06, |
|
"loss": 3.4765, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"eval_loss": 3.6437156200408936, |
|
"eval_runtime": 45.2826, |
|
"eval_samples_per_second": 907.678, |
|
"eval_steps_per_second": 56.733, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 40.81, |
|
"eval_loss": 3.647704839706421, |
|
"eval_runtime": 46.8981, |
|
"eval_samples_per_second": 876.41, |
|
"eval_steps_per_second": 54.778, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"learning_rate": 1.6676394563495374e-06, |
|
"loss": 3.4747, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"eval_loss": 3.638388156890869, |
|
"eval_runtime": 46.0328, |
|
"eval_samples_per_second": 892.886, |
|
"eval_steps_per_second": 55.808, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"eval_loss": 3.6370368003845215, |
|
"eval_runtime": 46.7372, |
|
"eval_samples_per_second": 879.427, |
|
"eval_steps_per_second": 54.967, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"learning_rate": 1.6009338780955558e-06, |
|
"loss": 3.4683, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"eval_loss": 3.662468433380127, |
|
"eval_runtime": 46.61, |
|
"eval_samples_per_second": 881.828, |
|
"eval_steps_per_second": 55.117, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 41.47, |
|
"eval_loss": 3.6453213691711426, |
|
"eval_runtime": 45.8611, |
|
"eval_samples_per_second": 896.229, |
|
"eval_steps_per_second": 56.017, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"learning_rate": 1.5342282998415744e-06, |
|
"loss": 3.4599, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"eval_loss": 3.64886212348938, |
|
"eval_runtime": 46.762, |
|
"eval_samples_per_second": 878.962, |
|
"eval_steps_per_second": 54.938, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"eval_loss": 3.6310884952545166, |
|
"eval_runtime": 46.4576, |
|
"eval_samples_per_second": 884.72, |
|
"eval_steps_per_second": 55.298, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"learning_rate": 1.4675227215875928e-06, |
|
"loss": 3.4713, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"eval_loss": 3.619154691696167, |
|
"eval_runtime": 45.9184, |
|
"eval_samples_per_second": 895.109, |
|
"eval_steps_per_second": 55.947, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 42.12, |
|
"eval_loss": 3.651060104370117, |
|
"eval_runtime": 47.0032, |
|
"eval_samples_per_second": 874.451, |
|
"eval_steps_per_second": 54.656, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"learning_rate": 1.4008171433336116e-06, |
|
"loss": 3.4677, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"eval_loss": 3.6425869464874268, |
|
"eval_runtime": 46.3503, |
|
"eval_samples_per_second": 886.769, |
|
"eval_steps_per_second": 55.426, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 3.6362836360931396, |
|
"eval_runtime": 46.2845, |
|
"eval_samples_per_second": 888.029, |
|
"eval_steps_per_second": 55.505, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"learning_rate": 1.33411156507963e-06, |
|
"loss": 3.4689, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"eval_loss": 3.6378438472747803, |
|
"eval_runtime": 47.0132, |
|
"eval_samples_per_second": 874.265, |
|
"eval_steps_per_second": 54.644, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 42.78, |
|
"eval_loss": 3.6450445652008057, |
|
"eval_runtime": 46.1055, |
|
"eval_samples_per_second": 891.478, |
|
"eval_steps_per_second": 55.72, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 1.2674059868256484e-06, |
|
"loss": 3.4598, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"eval_loss": 3.64805006980896, |
|
"eval_runtime": 46.8684, |
|
"eval_samples_per_second": 876.967, |
|
"eval_steps_per_second": 54.813, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"eval_loss": 3.6675028800964355, |
|
"eval_runtime": 46.4765, |
|
"eval_samples_per_second": 884.36, |
|
"eval_steps_per_second": 55.275, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 1.2007004085716668e-06, |
|
"loss": 3.4487, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"eval_loss": 3.6557657718658447, |
|
"eval_runtime": 46.0356, |
|
"eval_samples_per_second": 892.83, |
|
"eval_steps_per_second": 55.805, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"eval_loss": 3.6451427936553955, |
|
"eval_runtime": 47.3121, |
|
"eval_samples_per_second": 868.741, |
|
"eval_steps_per_second": 54.299, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"learning_rate": 1.1339948303176854e-06, |
|
"loss": 3.4555, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"eval_loss": 3.643132448196411, |
|
"eval_runtime": 46.2499, |
|
"eval_samples_per_second": 888.694, |
|
"eval_steps_per_second": 55.546, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 43.76, |
|
"eval_loss": 3.6470389366149902, |
|
"eval_runtime": 45.8331, |
|
"eval_samples_per_second": 896.776, |
|
"eval_steps_per_second": 56.051, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"learning_rate": 1.067289252063704e-06, |
|
"loss": 3.4727, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"eval_loss": 3.6265406608581543, |
|
"eval_runtime": 47.1162, |
|
"eval_samples_per_second": 872.353, |
|
"eval_steps_per_second": 54.525, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 44.09, |
|
"eval_loss": 3.6335132122039795, |
|
"eval_runtime": 45.9499, |
|
"eval_samples_per_second": 894.497, |
|
"eval_steps_per_second": 55.909, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"learning_rate": 1.0005836738097224e-06, |
|
"loss": 3.4626, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"eval_loss": 3.639557123184204, |
|
"eval_runtime": 46.75, |
|
"eval_samples_per_second": 879.187, |
|
"eval_steps_per_second": 54.952, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 44.42, |
|
"eval_loss": 3.653687000274658, |
|
"eval_runtime": 47.165, |
|
"eval_samples_per_second": 871.452, |
|
"eval_steps_per_second": 54.468, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"learning_rate": 9.33878095555741e-07, |
|
"loss": 3.4724, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"eval_loss": 3.61678409576416, |
|
"eval_runtime": 46.2585, |
|
"eval_samples_per_second": 888.528, |
|
"eval_steps_per_second": 55.536, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 44.75, |
|
"eval_loss": 3.644352674484253, |
|
"eval_runtime": 47.0469, |
|
"eval_samples_per_second": 873.64, |
|
"eval_steps_per_second": 54.605, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"learning_rate": 8.671725173017595e-07, |
|
"loss": 3.4545, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"eval_loss": 3.6440114974975586, |
|
"eval_runtime": 46.2426, |
|
"eval_samples_per_second": 888.835, |
|
"eval_steps_per_second": 55.555, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 45.07, |
|
"eval_loss": 3.6327061653137207, |
|
"eval_runtime": 46.09, |
|
"eval_samples_per_second": 891.776, |
|
"eval_steps_per_second": 55.739, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"learning_rate": 8.004669390477779e-07, |
|
"loss": 3.461, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"eval_loss": 3.6362533569335938, |
|
"eval_runtime": 47.1445, |
|
"eval_samples_per_second": 871.831, |
|
"eval_steps_per_second": 54.492, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"eval_loss": 3.653747081756592, |
|
"eval_runtime": 46.2235, |
|
"eval_samples_per_second": 889.202, |
|
"eval_steps_per_second": 55.578, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"learning_rate": 7.337613607937964e-07, |
|
"loss": 3.4702, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"eval_loss": 3.6123247146606445, |
|
"eval_runtime": 46.168, |
|
"eval_samples_per_second": 890.27, |
|
"eval_steps_per_second": 55.645, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 45.73, |
|
"eval_loss": 3.6554455757141113, |
|
"eval_runtime": 47.1193, |
|
"eval_samples_per_second": 872.296, |
|
"eval_steps_per_second": 54.521, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"learning_rate": 6.67055782539815e-07, |
|
"loss": 3.4565, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"eval_loss": 3.6522979736328125, |
|
"eval_runtime": 46.1449, |
|
"eval_samples_per_second": 890.716, |
|
"eval_steps_per_second": 55.672, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 46.06, |
|
"eval_loss": 3.6339659690856934, |
|
"eval_runtime": 47.2579, |
|
"eval_samples_per_second": 869.739, |
|
"eval_steps_per_second": 54.361, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"learning_rate": 6.003502042858334e-07, |
|
"loss": 3.4517, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"eval_loss": 3.6459498405456543, |
|
"eval_runtime": 46.9038, |
|
"eval_samples_per_second": 876.305, |
|
"eval_steps_per_second": 54.772, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"eval_loss": 3.656141996383667, |
|
"eval_runtime": 46.3654, |
|
"eval_samples_per_second": 886.48, |
|
"eval_steps_per_second": 55.408, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"learning_rate": 5.33644626031852e-07, |
|
"loss": 3.4631, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"eval_loss": 3.6547927856445312, |
|
"eval_runtime": 47.1154, |
|
"eval_samples_per_second": 872.368, |
|
"eval_steps_per_second": 54.526, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 46.71, |
|
"eval_loss": 3.6228716373443604, |
|
"eval_runtime": 46.2908, |
|
"eval_samples_per_second": 887.908, |
|
"eval_steps_per_second": 55.497, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 4.669390477778705e-07, |
|
"loss": 3.4518, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"eval_loss": 3.6350128650665283, |
|
"eval_runtime": 46.3584, |
|
"eval_samples_per_second": 886.613, |
|
"eval_steps_per_second": 55.416, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"eval_loss": 3.6483192443847656, |
|
"eval_runtime": 47.24, |
|
"eval_samples_per_second": 870.067, |
|
"eval_steps_per_second": 54.382, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 4.0023346952388894e-07, |
|
"loss": 3.4592, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"eval_loss": 3.6263089179992676, |
|
"eval_runtime": 47.0185, |
|
"eval_samples_per_second": 874.166, |
|
"eval_steps_per_second": 54.638, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"eval_loss": 3.6339097023010254, |
|
"eval_runtime": 46.0199, |
|
"eval_samples_per_second": 893.135, |
|
"eval_steps_per_second": 55.824, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"learning_rate": 3.335278912699075e-07, |
|
"loss": 3.4569, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"eval_loss": 3.659444808959961, |
|
"eval_runtime": 47.1636, |
|
"eval_samples_per_second": 871.477, |
|
"eval_steps_per_second": 54.47, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 47.7, |
|
"eval_loss": 3.638535737991333, |
|
"eval_runtime": 46.1693, |
|
"eval_samples_per_second": 890.246, |
|
"eval_steps_per_second": 55.643, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 47.86, |
|
"learning_rate": 2.66822313015926e-07, |
|
"loss": 3.4524, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 47.86, |
|
"eval_loss": 3.6434078216552734, |
|
"eval_runtime": 47.0318, |
|
"eval_samples_per_second": 873.919, |
|
"eval_steps_per_second": 54.623, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_loss": 3.650230646133423, |
|
"eval_runtime": 46.5514, |
|
"eval_samples_per_second": 882.938, |
|
"eval_steps_per_second": 55.186, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 48.19, |
|
"learning_rate": 2.0011673476194447e-07, |
|
"loss": 3.4644, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 48.19, |
|
"eval_loss": 3.617619276046753, |
|
"eval_runtime": 46.2116, |
|
"eval_samples_per_second": 889.43, |
|
"eval_steps_per_second": 55.592, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 48.35, |
|
"eval_loss": 3.6293184757232666, |
|
"eval_runtime": 47.399, |
|
"eval_samples_per_second": 867.15, |
|
"eval_steps_per_second": 54.199, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 48.52, |
|
"learning_rate": 1.33411156507963e-07, |
|
"loss": 3.4586, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 48.52, |
|
"eval_loss": 3.630380392074585, |
|
"eval_runtime": 46.3912, |
|
"eval_samples_per_second": 885.987, |
|
"eval_steps_per_second": 55.377, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 48.68, |
|
"eval_loss": 3.6343326568603516, |
|
"eval_runtime": 46.2144, |
|
"eval_samples_per_second": 889.376, |
|
"eval_steps_per_second": 55.589, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"learning_rate": 6.67055782539815e-08, |
|
"loss": 3.4439, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"eval_loss": 3.6090333461761475, |
|
"eval_runtime": 47.3482, |
|
"eval_samples_per_second": 868.08, |
|
"eval_steps_per_second": 54.258, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_loss": 3.6414153575897217, |
|
"eval_runtime": 46.5994, |
|
"eval_samples_per_second": 882.029, |
|
"eval_steps_per_second": 55.13, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"learning_rate": 0.0, |
|
"loss": 3.4474, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"eval_loss": 3.620838165283203, |
|
"eval_runtime": 46.9825, |
|
"eval_samples_per_second": 874.835, |
|
"eval_steps_per_second": 54.68, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"step": 2400000, |
|
"total_flos": 6.906141294629226e+17, |
|
"train_loss": 3.376089767252604, |
|
"train_runtime": 158003.2062, |
|
"train_samples_per_second": 243.033, |
|
"train_steps_per_second": 15.19 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 50, |
|
"save_steps": 32000, |
|
"total_flos": 6.906141294629226e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|