|
{ |
|
"best_metric": 1.9023408889770508, |
|
"best_model_checkpoint": "./model_tweets_2020_Q1_75/checkpoint-2336000", |
|
"epoch": 19.569471624266146, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.2396600246429443, |
|
"eval_runtime": 113.355, |
|
"eval_samples_per_second": 911.075, |
|
"eval_steps_per_second": 56.945, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.4342, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1510801315307617, |
|
"eval_runtime": 111.953, |
|
"eval_samples_per_second": 922.486, |
|
"eval_steps_per_second": 57.658, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.1108760833740234, |
|
"eval_runtime": 111.7901, |
|
"eval_samples_per_second": 923.83, |
|
"eval_steps_per_second": 57.742, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.2417, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 2.0788779258728027, |
|
"eval_runtime": 111.8843, |
|
"eval_samples_per_second": 923.051, |
|
"eval_steps_per_second": 57.694, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.065674066543579, |
|
"eval_runtime": 111.7346, |
|
"eval_samples_per_second": 924.288, |
|
"eval_steps_per_second": 57.771, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.1852, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.0397331714630127, |
|
"eval_runtime": 114.2687, |
|
"eval_samples_per_second": 903.791, |
|
"eval_steps_per_second": 56.49, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.0303494930267334, |
|
"eval_runtime": 112.0167, |
|
"eval_samples_per_second": 921.961, |
|
"eval_steps_per_second": 57.625, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.1511, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.024770975112915, |
|
"eval_runtime": 112.2634, |
|
"eval_samples_per_second": 919.935, |
|
"eval_steps_per_second": 57.499, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.022064685821533, |
|
"eval_runtime": 112.5031, |
|
"eval_samples_per_second": 917.975, |
|
"eval_steps_per_second": 57.376, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.1261, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.0128211975097656, |
|
"eval_runtime": 113.0139, |
|
"eval_samples_per_second": 913.826, |
|
"eval_steps_per_second": 57.117, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.0067052841186523, |
|
"eval_runtime": 112.3731, |
|
"eval_samples_per_second": 919.037, |
|
"eval_steps_per_second": 57.443, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.1179, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 2.003864288330078, |
|
"eval_runtime": 113.5425, |
|
"eval_samples_per_second": 909.571, |
|
"eval_steps_per_second": 56.851, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.9972714185714722, |
|
"eval_runtime": 112.6781, |
|
"eval_samples_per_second": 916.549, |
|
"eval_steps_per_second": 57.287, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.1097, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.98354971408844, |
|
"eval_runtime": 112.0904, |
|
"eval_samples_per_second": 921.355, |
|
"eval_steps_per_second": 57.587, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.9983153343200684, |
|
"eval_runtime": 112.7204, |
|
"eval_samples_per_second": 916.205, |
|
"eval_steps_per_second": 57.266, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.1031, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 1.9898955821990967, |
|
"eval_runtime": 114.024, |
|
"eval_samples_per_second": 905.731, |
|
"eval_steps_per_second": 56.611, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 1.9755431413650513, |
|
"eval_runtime": 114.5549, |
|
"eval_samples_per_second": 901.533, |
|
"eval_steps_per_second": 56.349, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.0977, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.9855457544326782, |
|
"eval_runtime": 113.3525, |
|
"eval_samples_per_second": 911.096, |
|
"eval_steps_per_second": 56.946, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.9721323251724243, |
|
"eval_runtime": 114.0229, |
|
"eval_samples_per_second": 905.739, |
|
"eval_steps_per_second": 56.611, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.0892, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 1.9813446998596191, |
|
"eval_runtime": 113.5565, |
|
"eval_samples_per_second": 909.459, |
|
"eval_steps_per_second": 56.844, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 1.9827616214752197, |
|
"eval_runtime": 113.4289, |
|
"eval_samples_per_second": 910.482, |
|
"eval_steps_per_second": 56.908, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.0882, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.9703537225723267, |
|
"eval_runtime": 112.6429, |
|
"eval_samples_per_second": 916.835, |
|
"eval_steps_per_second": 57.305, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 1.9728624820709229, |
|
"eval_runtime": 113.7442, |
|
"eval_samples_per_second": 907.958, |
|
"eval_steps_per_second": 56.75, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.0884, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 1.9721413850784302, |
|
"eval_runtime": 113.2577, |
|
"eval_samples_per_second": 911.859, |
|
"eval_steps_per_second": 56.994, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 1.9663499593734741, |
|
"eval_runtime": 115.0819, |
|
"eval_samples_per_second": 897.404, |
|
"eval_steps_per_second": 56.09, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.0814, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 1.9612431526184082, |
|
"eval_runtime": 113.9384, |
|
"eval_samples_per_second": 906.411, |
|
"eval_steps_per_second": 56.653, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 1.971712350845337, |
|
"eval_runtime": 113.4328, |
|
"eval_samples_per_second": 910.451, |
|
"eval_steps_per_second": 56.906, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.0806, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 1.959405541419983, |
|
"eval_runtime": 113.1738, |
|
"eval_samples_per_second": 912.535, |
|
"eval_steps_per_second": 57.036, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.960507869720459, |
|
"eval_runtime": 112.7387, |
|
"eval_samples_per_second": 916.056, |
|
"eval_steps_per_second": 57.256, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.0838, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 1.9555588960647583, |
|
"eval_runtime": 113.0742, |
|
"eval_samples_per_second": 913.338, |
|
"eval_steps_per_second": 57.086, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 1.955155849456787, |
|
"eval_runtime": 114.9852, |
|
"eval_samples_per_second": 898.159, |
|
"eval_steps_per_second": 56.138, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.0711, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 1.965279459953308, |
|
"eval_runtime": 115.2918, |
|
"eval_samples_per_second": 895.771, |
|
"eval_steps_per_second": 55.988, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 1.9581040143966675, |
|
"eval_runtime": 112.9905, |
|
"eval_samples_per_second": 914.015, |
|
"eval_steps_per_second": 57.129, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.065, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 1.9558922052383423, |
|
"eval_runtime": 113.2393, |
|
"eval_samples_per_second": 912.007, |
|
"eval_steps_per_second": 57.003, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 1.9615230560302734, |
|
"eval_runtime": 113.7321, |
|
"eval_samples_per_second": 908.055, |
|
"eval_steps_per_second": 56.756, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.0769, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 1.9493736028671265, |
|
"eval_runtime": 113.1393, |
|
"eval_samples_per_second": 912.813, |
|
"eval_steps_per_second": 57.054, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 1.9487217664718628, |
|
"eval_runtime": 112.3053, |
|
"eval_samples_per_second": 919.591, |
|
"eval_steps_per_second": 57.477, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.0733, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.9546173810958862, |
|
"eval_runtime": 113.1729, |
|
"eval_samples_per_second": 912.542, |
|
"eval_steps_per_second": 57.037, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 1.9445385932922363, |
|
"eval_runtime": 112.3458, |
|
"eval_samples_per_second": 919.26, |
|
"eval_steps_per_second": 57.457, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.0675, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 1.9534730911254883, |
|
"eval_runtime": 113.0946, |
|
"eval_samples_per_second": 913.174, |
|
"eval_steps_per_second": 57.076, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.9580506086349487, |
|
"eval_runtime": 112.7099, |
|
"eval_samples_per_second": 916.291, |
|
"eval_steps_per_second": 57.271, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.0599, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 1.9472216367721558, |
|
"eval_runtime": 113.0268, |
|
"eval_samples_per_second": 913.722, |
|
"eval_steps_per_second": 57.11, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 1.9545352458953857, |
|
"eval_runtime": 112.8522, |
|
"eval_samples_per_second": 915.135, |
|
"eval_steps_per_second": 57.199, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.0675, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 1.9551931619644165, |
|
"eval_runtime": 112.8344, |
|
"eval_samples_per_second": 915.28, |
|
"eval_steps_per_second": 57.208, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 1.9397163391113281, |
|
"eval_runtime": 112.7266, |
|
"eval_samples_per_second": 916.155, |
|
"eval_steps_per_second": 57.262, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.0711, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.9475340843200684, |
|
"eval_runtime": 113.527, |
|
"eval_samples_per_second": 909.695, |
|
"eval_steps_per_second": 56.859, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 1.9387180805206299, |
|
"eval_runtime": 112.9318, |
|
"eval_samples_per_second": 914.49, |
|
"eval_steps_per_second": 57.158, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.0663, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 1.948356032371521, |
|
"eval_runtime": 113.6939, |
|
"eval_samples_per_second": 908.36, |
|
"eval_steps_per_second": 56.775, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 1.942366361618042, |
|
"eval_runtime": 112.9142, |
|
"eval_samples_per_second": 914.633, |
|
"eval_steps_per_second": 57.167, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.0628, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 1.941139578819275, |
|
"eval_runtime": 112.7091, |
|
"eval_samples_per_second": 916.297, |
|
"eval_steps_per_second": 57.271, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 1.940949559211731, |
|
"eval_runtime": 112.6381, |
|
"eval_samples_per_second": 916.875, |
|
"eval_steps_per_second": 57.307, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.0651, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 1.9446566104888916, |
|
"eval_runtime": 112.949, |
|
"eval_samples_per_second": 914.351, |
|
"eval_steps_per_second": 57.15, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 1.940216064453125, |
|
"eval_runtime": 114.0012, |
|
"eval_samples_per_second": 905.912, |
|
"eval_steps_per_second": 56.622, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.0598, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 1.9503767490386963, |
|
"eval_runtime": 113.8999, |
|
"eval_samples_per_second": 906.717, |
|
"eval_steps_per_second": 56.673, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_loss": 1.9414310455322266, |
|
"eval_runtime": 113.4917, |
|
"eval_samples_per_second": 909.978, |
|
"eval_steps_per_second": 56.876, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.0612, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 1.9329679012298584, |
|
"eval_runtime": 113.8065, |
|
"eval_samples_per_second": 907.462, |
|
"eval_steps_per_second": 56.719, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 1.942373514175415, |
|
"eval_runtime": 112.7208, |
|
"eval_samples_per_second": 916.202, |
|
"eval_steps_per_second": 57.265, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.0653, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 1.930959939956665, |
|
"eval_runtime": 113.0386, |
|
"eval_samples_per_second": 913.626, |
|
"eval_steps_per_second": 57.104, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 1.9363598823547363, |
|
"eval_runtime": 112.8486, |
|
"eval_samples_per_second": 915.164, |
|
"eval_steps_per_second": 57.201, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.0585, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 1.9507150650024414, |
|
"eval_runtime": 114.0147, |
|
"eval_samples_per_second": 905.804, |
|
"eval_steps_per_second": 56.615, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 1.9320358037948608, |
|
"eval_runtime": 113.3729, |
|
"eval_samples_per_second": 910.932, |
|
"eval_steps_per_second": 56.936, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.0593, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 1.9416472911834717, |
|
"eval_runtime": 113.4866, |
|
"eval_samples_per_second": 910.02, |
|
"eval_steps_per_second": 56.879, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 1.934741735458374, |
|
"eval_runtime": 112.805, |
|
"eval_samples_per_second": 915.518, |
|
"eval_steps_per_second": 57.223, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.0671, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 1.9390867948532104, |
|
"eval_runtime": 112.908, |
|
"eval_samples_per_second": 914.683, |
|
"eval_steps_per_second": 57.17, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 1.9453818798065186, |
|
"eval_runtime": 112.778, |
|
"eval_samples_per_second": 915.737, |
|
"eval_steps_per_second": 57.236, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.0552, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 1.9501063823699951, |
|
"eval_runtime": 112.534, |
|
"eval_samples_per_second": 917.723, |
|
"eval_steps_per_second": 57.36, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 1.935518741607666, |
|
"eval_runtime": 113.9789, |
|
"eval_samples_per_second": 906.089, |
|
"eval_steps_per_second": 56.633, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.0626, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 1.9239717721939087, |
|
"eval_runtime": 112.9585, |
|
"eval_samples_per_second": 914.273, |
|
"eval_steps_per_second": 57.145, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 1.9398826360702515, |
|
"eval_runtime": 113.7219, |
|
"eval_samples_per_second": 908.137, |
|
"eval_steps_per_second": 56.761, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.0592, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 1.9360318183898926, |
|
"eval_runtime": 113.1836, |
|
"eval_samples_per_second": 912.456, |
|
"eval_steps_per_second": 57.031, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 1.9377766847610474, |
|
"eval_runtime": 113.185, |
|
"eval_samples_per_second": 912.444, |
|
"eval_steps_per_second": 57.031, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.0584, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_loss": 1.9293311834335327, |
|
"eval_runtime": 113.6435, |
|
"eval_samples_per_second": 908.763, |
|
"eval_steps_per_second": 56.8, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 1.943053126335144, |
|
"eval_runtime": 113.0332, |
|
"eval_samples_per_second": 913.67, |
|
"eval_steps_per_second": 57.107, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.0515, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_loss": 1.9324830770492554, |
|
"eval_runtime": 113.1852, |
|
"eval_samples_per_second": 912.443, |
|
"eval_steps_per_second": 57.03, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_loss": 1.9265968799591064, |
|
"eval_runtime": 113.5248, |
|
"eval_samples_per_second": 909.713, |
|
"eval_steps_per_second": 56.86, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.0545, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 1.921515941619873, |
|
"eval_runtime": 113.9086, |
|
"eval_samples_per_second": 906.648, |
|
"eval_steps_per_second": 56.668, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 1.924493670463562, |
|
"eval_runtime": 113.1157, |
|
"eval_samples_per_second": 913.003, |
|
"eval_steps_per_second": 57.065, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.0525, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 1.9372978210449219, |
|
"eval_runtime": 113.8901, |
|
"eval_samples_per_second": 906.795, |
|
"eval_steps_per_second": 56.677, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 1.934131145477295, |
|
"eval_runtime": 112.8536, |
|
"eval_samples_per_second": 915.124, |
|
"eval_steps_per_second": 57.198, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.0556, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 1.9312899112701416, |
|
"eval_runtime": 113.0744, |
|
"eval_samples_per_second": 913.336, |
|
"eval_steps_per_second": 57.086, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 1.922965407371521, |
|
"eval_runtime": 113.8801, |
|
"eval_samples_per_second": 906.875, |
|
"eval_steps_per_second": 56.682, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.0567, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_loss": 1.930014729499817, |
|
"eval_runtime": 112.8937, |
|
"eval_samples_per_second": 914.799, |
|
"eval_steps_per_second": 57.178, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 1.9337064027786255, |
|
"eval_runtime": 113.4421, |
|
"eval_samples_per_second": 910.376, |
|
"eval_steps_per_second": 56.901, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.0506, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 1.9316705465316772, |
|
"eval_runtime": 113.3757, |
|
"eval_samples_per_second": 910.91, |
|
"eval_steps_per_second": 56.935, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 1.9275363683700562, |
|
"eval_runtime": 113.4405, |
|
"eval_samples_per_second": 910.389, |
|
"eval_steps_per_second": 56.902, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.0561, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 1.9376088380813599, |
|
"eval_runtime": 113.0322, |
|
"eval_samples_per_second": 913.678, |
|
"eval_steps_per_second": 57.108, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 1.9461050033569336, |
|
"eval_runtime": 113.738, |
|
"eval_samples_per_second": 908.008, |
|
"eval_steps_per_second": 56.753, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.0496, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 1.9239321947097778, |
|
"eval_runtime": 113.8527, |
|
"eval_samples_per_second": 907.093, |
|
"eval_steps_per_second": 56.696, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_loss": 1.9250530004501343, |
|
"eval_runtime": 113.6483, |
|
"eval_samples_per_second": 908.725, |
|
"eval_steps_per_second": 56.798, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.045, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_loss": 1.9309498071670532, |
|
"eval_runtime": 114.1187, |
|
"eval_samples_per_second": 904.979, |
|
"eval_steps_per_second": 56.564, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 1.925881266593933, |
|
"eval_runtime": 113.5651, |
|
"eval_samples_per_second": 909.391, |
|
"eval_steps_per_second": 56.84, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.0512, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.9236810207366943, |
|
"eval_runtime": 113.1127, |
|
"eval_samples_per_second": 913.027, |
|
"eval_steps_per_second": 57.067, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_loss": 1.9148136377334595, |
|
"eval_runtime": 113.2705, |
|
"eval_samples_per_second": 911.756, |
|
"eval_steps_per_second": 56.987, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.0512, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 1.9219812154769897, |
|
"eval_runtime": 114.1512, |
|
"eval_samples_per_second": 904.721, |
|
"eval_steps_per_second": 56.548, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 1.9397040605545044, |
|
"eval_runtime": 113.2005, |
|
"eval_samples_per_second": 912.319, |
|
"eval_steps_per_second": 57.023, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.0445, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 1.9240758419036865, |
|
"eval_runtime": 113.1775, |
|
"eval_samples_per_second": 912.505, |
|
"eval_steps_per_second": 57.034, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 1.9330027103424072, |
|
"eval_runtime": 113.0566, |
|
"eval_samples_per_second": 913.481, |
|
"eval_steps_per_second": 57.095, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.0481, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 1.9123960733413696, |
|
"eval_runtime": 113.268, |
|
"eval_samples_per_second": 911.775, |
|
"eval_steps_per_second": 56.989, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_loss": 1.9267631769180298, |
|
"eval_runtime": 113.7712, |
|
"eval_samples_per_second": 907.743, |
|
"eval_steps_per_second": 56.737, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.048, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 1.921078085899353, |
|
"eval_runtime": 113.8106, |
|
"eval_samples_per_second": 907.429, |
|
"eval_steps_per_second": 56.717, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 1.9279391765594482, |
|
"eval_runtime": 113.2864, |
|
"eval_samples_per_second": 911.627, |
|
"eval_steps_per_second": 56.979, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.0555, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 1.9168628454208374, |
|
"eval_runtime": 113.2491, |
|
"eval_samples_per_second": 911.928, |
|
"eval_steps_per_second": 56.998, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 1.922944188117981, |
|
"eval_runtime": 113.2414, |
|
"eval_samples_per_second": 911.99, |
|
"eval_steps_per_second": 57.002, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.052, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 1.9253454208374023, |
|
"eval_runtime": 114.0468, |
|
"eval_samples_per_second": 905.549, |
|
"eval_steps_per_second": 56.6, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_loss": 1.9244375228881836, |
|
"eval_runtime": 113.2582, |
|
"eval_samples_per_second": 911.855, |
|
"eval_steps_per_second": 56.994, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.0475, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 1.9191728830337524, |
|
"eval_runtime": 113.2946, |
|
"eval_samples_per_second": 911.561, |
|
"eval_steps_per_second": 56.975, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 1.9167262315750122, |
|
"eval_runtime": 113.3788, |
|
"eval_samples_per_second": 910.884, |
|
"eval_steps_per_second": 56.933, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.0521, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 1.9202110767364502, |
|
"eval_runtime": 113.3134, |
|
"eval_samples_per_second": 911.41, |
|
"eval_steps_per_second": 56.966, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 1.9240491390228271, |
|
"eval_runtime": 113.6592, |
|
"eval_samples_per_second": 908.638, |
|
"eval_steps_per_second": 56.793, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.0516, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_loss": 1.923065423965454, |
|
"eval_runtime": 113.5487, |
|
"eval_samples_per_second": 909.522, |
|
"eval_steps_per_second": 56.848, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 1.9245978593826294, |
|
"eval_runtime": 114.3166, |
|
"eval_samples_per_second": 903.412, |
|
"eval_steps_per_second": 56.466, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.0526, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 1.9173697233200073, |
|
"eval_runtime": 113.4859, |
|
"eval_samples_per_second": 910.025, |
|
"eval_steps_per_second": 56.879, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"eval_loss": 1.9256370067596436, |
|
"eval_runtime": 114.6588, |
|
"eval_samples_per_second": 900.716, |
|
"eval_steps_per_second": 56.297, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.044, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 1.9233709573745728, |
|
"eval_runtime": 114.8311, |
|
"eval_samples_per_second": 899.364, |
|
"eval_steps_per_second": 56.213, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 1.9208239316940308, |
|
"eval_runtime": 114.4555, |
|
"eval_samples_per_second": 902.316, |
|
"eval_steps_per_second": 56.397, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.0493, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 1.9232600927352905, |
|
"eval_runtime": 113.2901, |
|
"eval_samples_per_second": 911.598, |
|
"eval_steps_per_second": 56.978, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 1.918021321296692, |
|
"eval_runtime": 114.0382, |
|
"eval_samples_per_second": 905.617, |
|
"eval_steps_per_second": 56.604, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.0535, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 1.919961929321289, |
|
"eval_runtime": 114.7577, |
|
"eval_samples_per_second": 899.94, |
|
"eval_steps_per_second": 56.249, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"eval_loss": 1.9151924848556519, |
|
"eval_runtime": 113.434, |
|
"eval_samples_per_second": 910.441, |
|
"eval_steps_per_second": 56.905, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.0454, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 1.926845669746399, |
|
"eval_runtime": 114.0309, |
|
"eval_samples_per_second": 905.676, |
|
"eval_steps_per_second": 56.607, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_loss": 1.9206236600875854, |
|
"eval_runtime": 113.4283, |
|
"eval_samples_per_second": 910.487, |
|
"eval_steps_per_second": 56.908, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.0428, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"eval_loss": 1.9169600009918213, |
|
"eval_runtime": 113.0231, |
|
"eval_samples_per_second": 913.751, |
|
"eval_steps_per_second": 57.112, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 1.923983097076416, |
|
"eval_runtime": 114.0029, |
|
"eval_samples_per_second": 905.898, |
|
"eval_steps_per_second": 56.621, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.052, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"eval_loss": 1.9306118488311768, |
|
"eval_runtime": 114.2567, |
|
"eval_samples_per_second": 903.886, |
|
"eval_steps_per_second": 56.496, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 1.9191603660583496, |
|
"eval_runtime": 114.2092, |
|
"eval_samples_per_second": 904.262, |
|
"eval_steps_per_second": 56.519, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.0472, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 1.9313241243362427, |
|
"eval_runtime": 114.3737, |
|
"eval_samples_per_second": 902.961, |
|
"eval_steps_per_second": 56.438, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"eval_loss": 1.9238113164901733, |
|
"eval_runtime": 114.2747, |
|
"eval_samples_per_second": 903.743, |
|
"eval_steps_per_second": 56.487, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.0454, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 1.9162325859069824, |
|
"eval_runtime": 114.6251, |
|
"eval_samples_per_second": 900.98, |
|
"eval_steps_per_second": 56.314, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_loss": 1.913014531135559, |
|
"eval_runtime": 113.9073, |
|
"eval_samples_per_second": 906.658, |
|
"eval_steps_per_second": 56.669, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.0503, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_loss": 1.9260133504867554, |
|
"eval_runtime": 114.9945, |
|
"eval_samples_per_second": 898.086, |
|
"eval_steps_per_second": 56.133, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_loss": 1.9212397336959839, |
|
"eval_runtime": 113.1012, |
|
"eval_samples_per_second": 913.12, |
|
"eval_steps_per_second": 57.073, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.0511, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_loss": 1.9114716053009033, |
|
"eval_runtime": 113.5853, |
|
"eval_samples_per_second": 909.229, |
|
"eval_steps_per_second": 56.83, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"eval_loss": 1.9123215675354004, |
|
"eval_runtime": 113.8372, |
|
"eval_samples_per_second": 907.217, |
|
"eval_steps_per_second": 56.704, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.049, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_loss": 1.9258580207824707, |
|
"eval_runtime": 115.4682, |
|
"eval_samples_per_second": 894.402, |
|
"eval_steps_per_second": 55.903, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_loss": 1.932053804397583, |
|
"eval_runtime": 113.8053, |
|
"eval_samples_per_second": 907.471, |
|
"eval_steps_per_second": 56.72, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.0463, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_loss": 1.9148298501968384, |
|
"eval_runtime": 114.0323, |
|
"eval_samples_per_second": 905.664, |
|
"eval_steps_per_second": 56.607, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_loss": 1.9144847393035889, |
|
"eval_runtime": 113.7624, |
|
"eval_samples_per_second": 907.813, |
|
"eval_steps_per_second": 56.741, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.0494, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.9097198247909546, |
|
"eval_runtime": 114.8448, |
|
"eval_samples_per_second": 899.257, |
|
"eval_steps_per_second": 56.206, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_loss": 1.9135308265686035, |
|
"eval_runtime": 114.2552, |
|
"eval_samples_per_second": 903.898, |
|
"eval_steps_per_second": 56.496, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.0467, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 1.9163955450057983, |
|
"eval_runtime": 114.3157, |
|
"eval_samples_per_second": 903.419, |
|
"eval_steps_per_second": 56.466, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 1.9224337339401245, |
|
"eval_runtime": 113.5325, |
|
"eval_samples_per_second": 909.652, |
|
"eval_steps_per_second": 56.856, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.0483, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 1.9134596586227417, |
|
"eval_runtime": 113.9566, |
|
"eval_samples_per_second": 906.266, |
|
"eval_steps_per_second": 56.644, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 1.919922947883606, |
|
"eval_runtime": 114.7403, |
|
"eval_samples_per_second": 900.076, |
|
"eval_steps_per_second": 56.257, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.0437, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_loss": 1.9213168621063232, |
|
"eval_runtime": 113.9265, |
|
"eval_samples_per_second": 906.505, |
|
"eval_steps_per_second": 56.659, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"eval_loss": 1.9161458015441895, |
|
"eval_runtime": 114.3737, |
|
"eval_samples_per_second": 902.961, |
|
"eval_steps_per_second": 56.438, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.0526, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_loss": 1.9148198366165161, |
|
"eval_runtime": 113.75, |
|
"eval_samples_per_second": 907.912, |
|
"eval_steps_per_second": 56.747, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_loss": 1.9182627201080322, |
|
"eval_runtime": 114.4421, |
|
"eval_samples_per_second": 902.421, |
|
"eval_steps_per_second": 56.404, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.0408, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_loss": 1.9078502655029297, |
|
"eval_runtime": 116.0253, |
|
"eval_samples_per_second": 890.108, |
|
"eval_steps_per_second": 55.634, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_loss": 1.918637752532959, |
|
"eval_runtime": 116.0532, |
|
"eval_samples_per_second": 889.893, |
|
"eval_steps_per_second": 55.621, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.0488, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 1.9140615463256836, |
|
"eval_runtime": 114.9098, |
|
"eval_samples_per_second": 898.748, |
|
"eval_steps_per_second": 56.174, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_loss": 1.907893419265747, |
|
"eval_runtime": 114.1171, |
|
"eval_samples_per_second": 904.991, |
|
"eval_steps_per_second": 56.565, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.0441, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"eval_loss": 1.9250520467758179, |
|
"eval_runtime": 113.8841, |
|
"eval_samples_per_second": 906.843, |
|
"eval_steps_per_second": 56.68, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_loss": 1.9254791736602783, |
|
"eval_runtime": 114.3655, |
|
"eval_samples_per_second": 903.026, |
|
"eval_steps_per_second": 56.442, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.0483, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_loss": 1.9108103513717651, |
|
"eval_runtime": 114.7681, |
|
"eval_samples_per_second": 899.858, |
|
"eval_steps_per_second": 56.244, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 1.904534935951233, |
|
"eval_runtime": 115.2497, |
|
"eval_samples_per_second": 896.097, |
|
"eval_steps_per_second": 56.009, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.0503, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"eval_loss": 1.9169738292694092, |
|
"eval_runtime": 115.5795, |
|
"eval_samples_per_second": 893.541, |
|
"eval_steps_per_second": 55.849, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"eval_loss": 1.9024699926376343, |
|
"eval_runtime": 115.6553, |
|
"eval_samples_per_second": 892.955, |
|
"eval_steps_per_second": 55.812, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.0334, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"eval_loss": 1.9198503494262695, |
|
"eval_runtime": 115.9185, |
|
"eval_samples_per_second": 890.927, |
|
"eval_steps_per_second": 55.686, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_loss": 1.9187484979629517, |
|
"eval_runtime": 114.9362, |
|
"eval_samples_per_second": 898.542, |
|
"eval_steps_per_second": 56.162, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.0388, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"eval_loss": 1.902976393699646, |
|
"eval_runtime": 115.6842, |
|
"eval_samples_per_second": 892.732, |
|
"eval_steps_per_second": 55.798, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"eval_loss": 1.9231475591659546, |
|
"eval_runtime": 114.4315, |
|
"eval_samples_per_second": 902.505, |
|
"eval_steps_per_second": 56.409, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.0489, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"eval_loss": 1.9084066152572632, |
|
"eval_runtime": 114.2511, |
|
"eval_samples_per_second": 903.93, |
|
"eval_steps_per_second": 56.498, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"eval_loss": 1.9184343814849854, |
|
"eval_runtime": 115.1565, |
|
"eval_samples_per_second": 896.823, |
|
"eval_steps_per_second": 56.054, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.0476, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"eval_loss": 1.9159677028656006, |
|
"eval_runtime": 114.4329, |
|
"eval_samples_per_second": 902.494, |
|
"eval_steps_per_second": 56.409, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"eval_loss": 1.9276108741760254, |
|
"eval_runtime": 114.2813, |
|
"eval_samples_per_second": 903.691, |
|
"eval_steps_per_second": 56.483, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.037, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"eval_loss": 1.9041118621826172, |
|
"eval_runtime": 114.8143, |
|
"eval_samples_per_second": 899.496, |
|
"eval_steps_per_second": 56.221, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_loss": 1.9227638244628906, |
|
"eval_runtime": 115.0142, |
|
"eval_samples_per_second": 897.933, |
|
"eval_steps_per_second": 56.124, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.0447, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_loss": 1.9151026010513306, |
|
"eval_runtime": 115.3034, |
|
"eval_samples_per_second": 895.68, |
|
"eval_steps_per_second": 55.983, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_loss": 1.9068875312805176, |
|
"eval_runtime": 114.8441, |
|
"eval_samples_per_second": 899.263, |
|
"eval_steps_per_second": 56.207, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.039, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"eval_loss": 1.9274860620498657, |
|
"eval_runtime": 116.0333, |
|
"eval_samples_per_second": 890.046, |
|
"eval_steps_per_second": 55.631, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"eval_loss": 1.9066658020019531, |
|
"eval_runtime": 115.792, |
|
"eval_samples_per_second": 891.901, |
|
"eval_steps_per_second": 55.747, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.0434, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"eval_loss": 1.9086920022964478, |
|
"eval_runtime": 115.0319, |
|
"eval_samples_per_second": 897.795, |
|
"eval_steps_per_second": 56.115, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_loss": 1.9041084051132202, |
|
"eval_runtime": 115.5247, |
|
"eval_samples_per_second": 893.965, |
|
"eval_steps_per_second": 55.876, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.0501, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"eval_loss": 1.9032894372940063, |
|
"eval_runtime": 115.2083, |
|
"eval_samples_per_second": 896.42, |
|
"eval_steps_per_second": 56.029, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"eval_loss": 1.9152663946151733, |
|
"eval_runtime": 115.0179, |
|
"eval_samples_per_second": 897.903, |
|
"eval_steps_per_second": 56.122, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.0455, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"eval_loss": 1.9173645973205566, |
|
"eval_runtime": 115.462, |
|
"eval_samples_per_second": 894.45, |
|
"eval_steps_per_second": 55.906, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"eval_loss": 1.9174134731292725, |
|
"eval_runtime": 114.825, |
|
"eval_samples_per_second": 899.412, |
|
"eval_steps_per_second": 56.216, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.0466, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_loss": 1.9260660409927368, |
|
"eval_runtime": 114.5404, |
|
"eval_samples_per_second": 901.647, |
|
"eval_steps_per_second": 56.356, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"eval_loss": 1.9181084632873535, |
|
"eval_runtime": 115.0034, |
|
"eval_samples_per_second": 898.017, |
|
"eval_steps_per_second": 56.129, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.0424, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"eval_loss": 1.9141377210617065, |
|
"eval_runtime": 114.2837, |
|
"eval_samples_per_second": 903.672, |
|
"eval_steps_per_second": 56.482, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"eval_loss": 1.9004480838775635, |
|
"eval_runtime": 114.3666, |
|
"eval_samples_per_second": 903.017, |
|
"eval_steps_per_second": 56.441, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.0441, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"eval_loss": 1.919699788093567, |
|
"eval_runtime": 115.2012, |
|
"eval_samples_per_second": 896.475, |
|
"eval_steps_per_second": 56.032, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"eval_loss": 1.9074804782867432, |
|
"eval_runtime": 114.4122, |
|
"eval_samples_per_second": 902.658, |
|
"eval_steps_per_second": 56.419, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.04, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.9121414422988892, |
|
"eval_runtime": 114.3242, |
|
"eval_samples_per_second": 903.352, |
|
"eval_steps_per_second": 56.462, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"eval_loss": 1.9210638999938965, |
|
"eval_runtime": 114.213, |
|
"eval_samples_per_second": 904.231, |
|
"eval_steps_per_second": 56.517, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.0375, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 1.9110891819000244, |
|
"eval_runtime": 114.5716, |
|
"eval_samples_per_second": 901.401, |
|
"eval_steps_per_second": 56.34, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 1.9187558889389038, |
|
"eval_runtime": 114.4912, |
|
"eval_samples_per_second": 902.034, |
|
"eval_steps_per_second": 56.38, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.0482, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 1.9099169969558716, |
|
"eval_runtime": 114.3549, |
|
"eval_samples_per_second": 903.109, |
|
"eval_steps_per_second": 56.447, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": 1.9160943031311035, |
|
"eval_runtime": 116.1161, |
|
"eval_samples_per_second": 889.412, |
|
"eval_steps_per_second": 55.591, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.0432, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"eval_loss": 1.9197900295257568, |
|
"eval_runtime": 114.3248, |
|
"eval_samples_per_second": 903.347, |
|
"eval_steps_per_second": 56.462, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 1.9154330492019653, |
|
"eval_runtime": 114.7975, |
|
"eval_samples_per_second": 899.627, |
|
"eval_steps_per_second": 56.229, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.0514, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"eval_loss": 1.9058637619018555, |
|
"eval_runtime": 114.484, |
|
"eval_samples_per_second": 902.091, |
|
"eval_steps_per_second": 56.383, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"eval_loss": 1.920427680015564, |
|
"eval_runtime": 114.3098, |
|
"eval_samples_per_second": 903.466, |
|
"eval_steps_per_second": 56.469, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.0397, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"eval_loss": 1.9054511785507202, |
|
"eval_runtime": 114.3602, |
|
"eval_samples_per_second": 903.068, |
|
"eval_steps_per_second": 56.444, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"eval_loss": 1.896202802658081, |
|
"eval_runtime": 115.3412, |
|
"eval_samples_per_second": 895.387, |
|
"eval_steps_per_second": 55.964, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.0454, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"eval_loss": 1.9040275812149048, |
|
"eval_runtime": 114.7741, |
|
"eval_samples_per_second": 899.811, |
|
"eval_steps_per_second": 56.241, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"eval_loss": 1.916807770729065, |
|
"eval_runtime": 114.6956, |
|
"eval_samples_per_second": 900.427, |
|
"eval_steps_per_second": 56.279, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.0391, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_loss": 1.9037362337112427, |
|
"eval_runtime": 114.764, |
|
"eval_samples_per_second": 899.89, |
|
"eval_steps_per_second": 56.246, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_loss": 1.9186286926269531, |
|
"eval_runtime": 114.5005, |
|
"eval_samples_per_second": 901.961, |
|
"eval_steps_per_second": 56.375, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.0414, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"eval_loss": 1.9122203588485718, |
|
"eval_runtime": 114.4898, |
|
"eval_samples_per_second": 902.045, |
|
"eval_steps_per_second": 56.381, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 1.9115867614746094, |
|
"eval_runtime": 115.3456, |
|
"eval_samples_per_second": 895.352, |
|
"eval_steps_per_second": 55.962, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.0431, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"eval_loss": 1.9056520462036133, |
|
"eval_runtime": 114.6382, |
|
"eval_samples_per_second": 900.878, |
|
"eval_steps_per_second": 56.308, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"eval_loss": 1.9115238189697266, |
|
"eval_runtime": 114.36, |
|
"eval_samples_per_second": 903.07, |
|
"eval_steps_per_second": 56.445, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.0368, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"eval_loss": 1.911974549293518, |
|
"eval_runtime": 114.4999, |
|
"eval_samples_per_second": 901.966, |
|
"eval_steps_per_second": 56.376, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"eval_loss": 1.9113932847976685, |
|
"eval_runtime": 115.0038, |
|
"eval_samples_per_second": 898.014, |
|
"eval_steps_per_second": 56.129, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.0427, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_loss": 1.9128488302230835, |
|
"eval_runtime": 115.5184, |
|
"eval_samples_per_second": 894.013, |
|
"eval_steps_per_second": 55.879, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_loss": 1.9200862646102905, |
|
"eval_runtime": 115.9784, |
|
"eval_samples_per_second": 890.467, |
|
"eval_steps_per_second": 55.657, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.0366, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_loss": 1.9053164720535278, |
|
"eval_runtime": 115.3446, |
|
"eval_samples_per_second": 895.361, |
|
"eval_steps_per_second": 55.963, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"eval_loss": 1.9077204465866089, |
|
"eval_runtime": 114.7784, |
|
"eval_samples_per_second": 899.777, |
|
"eval_steps_per_second": 56.239, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.0423, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"eval_loss": 1.9155118465423584, |
|
"eval_runtime": 114.5734, |
|
"eval_samples_per_second": 901.387, |
|
"eval_steps_per_second": 56.339, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"eval_loss": 1.9025253057479858, |
|
"eval_runtime": 115.4889, |
|
"eval_samples_per_second": 894.242, |
|
"eval_steps_per_second": 55.893, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.0345, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"eval_loss": 1.911736011505127, |
|
"eval_runtime": 115.8028, |
|
"eval_samples_per_second": 891.818, |
|
"eval_steps_per_second": 55.741, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"eval_loss": 1.9146357774734497, |
|
"eval_runtime": 115.518, |
|
"eval_samples_per_second": 894.017, |
|
"eval_steps_per_second": 55.879, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.0523, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"eval_loss": 1.9094045162200928, |
|
"eval_runtime": 115.0577, |
|
"eval_samples_per_second": 897.593, |
|
"eval_steps_per_second": 56.102, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_loss": 1.9028066396713257, |
|
"eval_runtime": 115.3284, |
|
"eval_samples_per_second": 895.486, |
|
"eval_steps_per_second": 55.971, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.0405, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"eval_loss": 1.9033746719360352, |
|
"eval_runtime": 116.3993, |
|
"eval_samples_per_second": 887.247, |
|
"eval_steps_per_second": 55.456, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"eval_loss": 1.903308629989624, |
|
"eval_runtime": 115.3224, |
|
"eval_samples_per_second": 895.533, |
|
"eval_steps_per_second": 55.973, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.0416, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"eval_loss": 1.8958499431610107, |
|
"eval_runtime": 115.3629, |
|
"eval_samples_per_second": 895.218, |
|
"eval_steps_per_second": 55.954, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 1.9071624279022217, |
|
"eval_runtime": 114.3411, |
|
"eval_samples_per_second": 903.218, |
|
"eval_steps_per_second": 56.454, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.0453, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 1.90669846534729, |
|
"eval_runtime": 114.9673, |
|
"eval_samples_per_second": 898.299, |
|
"eval_steps_per_second": 56.146, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 1.9112778902053833, |
|
"eval_runtime": 115.0041, |
|
"eval_samples_per_second": 898.012, |
|
"eval_steps_per_second": 56.128, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.0425, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_loss": 1.9103703498840332, |
|
"eval_runtime": 115.7959, |
|
"eval_samples_per_second": 891.871, |
|
"eval_steps_per_second": 55.745, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_loss": 1.9110212326049805, |
|
"eval_runtime": 115.8835, |
|
"eval_samples_per_second": 891.197, |
|
"eval_steps_per_second": 55.702, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.0404, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"eval_loss": 1.9037020206451416, |
|
"eval_runtime": 115.7942, |
|
"eval_samples_per_second": 891.884, |
|
"eval_steps_per_second": 55.745, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"eval_loss": 1.9003052711486816, |
|
"eval_runtime": 115.8783, |
|
"eval_samples_per_second": 891.236, |
|
"eval_steps_per_second": 55.705, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.0427, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"eval_loss": 1.911608099937439, |
|
"eval_runtime": 116.1597, |
|
"eval_samples_per_second": 889.078, |
|
"eval_steps_per_second": 55.57, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 1.9105613231658936, |
|
"eval_runtime": 116.0079, |
|
"eval_samples_per_second": 890.241, |
|
"eval_steps_per_second": 55.643, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.0368, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"eval_loss": 1.9095083475112915, |
|
"eval_runtime": 116.8018, |
|
"eval_samples_per_second": 884.19, |
|
"eval_steps_per_second": 55.265, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"eval_loss": 1.8979859352111816, |
|
"eval_runtime": 114.5417, |
|
"eval_samples_per_second": 901.637, |
|
"eval_steps_per_second": 56.355, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.0441, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.918567419052124, |
|
"eval_runtime": 114.9208, |
|
"eval_samples_per_second": 898.662, |
|
"eval_steps_per_second": 56.169, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"eval_loss": 1.903983473777771, |
|
"eval_runtime": 114.4082, |
|
"eval_samples_per_second": 902.689, |
|
"eval_steps_per_second": 56.421, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.0313, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"eval_loss": 1.9185600280761719, |
|
"eval_runtime": 115.728, |
|
"eval_samples_per_second": 892.394, |
|
"eval_steps_per_second": 55.777, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_loss": 1.9016015529632568, |
|
"eval_runtime": 114.5834, |
|
"eval_samples_per_second": 901.308, |
|
"eval_steps_per_second": 56.335, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.0488, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"eval_loss": 1.9047600030899048, |
|
"eval_runtime": 115.0099, |
|
"eval_samples_per_second": 897.966, |
|
"eval_steps_per_second": 56.126, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"eval_loss": 1.899457335472107, |
|
"eval_runtime": 115.8151, |
|
"eval_samples_per_second": 891.723, |
|
"eval_steps_per_second": 55.735, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.0361, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"eval_loss": 1.9119617938995361, |
|
"eval_runtime": 116.351, |
|
"eval_samples_per_second": 887.616, |
|
"eval_steps_per_second": 55.479, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"eval_loss": 1.907942295074463, |
|
"eval_runtime": 116.3149, |
|
"eval_samples_per_second": 887.892, |
|
"eval_steps_per_second": 55.496, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.0449, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"eval_loss": 1.9109671115875244, |
|
"eval_runtime": 114.9775, |
|
"eval_samples_per_second": 898.219, |
|
"eval_steps_per_second": 56.141, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"eval_loss": 1.909091591835022, |
|
"eval_runtime": 115.2151, |
|
"eval_samples_per_second": 896.367, |
|
"eval_steps_per_second": 56.026, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.043, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 1.9061814546585083, |
|
"eval_runtime": 115.9012, |
|
"eval_samples_per_second": 891.06, |
|
"eval_steps_per_second": 55.694, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"eval_loss": 1.9070407152175903, |
|
"eval_runtime": 115.1701, |
|
"eval_samples_per_second": 896.717, |
|
"eval_steps_per_second": 56.048, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.0414, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"eval_loss": 1.913381576538086, |
|
"eval_runtime": 115.5442, |
|
"eval_samples_per_second": 893.814, |
|
"eval_steps_per_second": 55.866, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"eval_loss": 1.9079296588897705, |
|
"eval_runtime": 115.3858, |
|
"eval_samples_per_second": 895.041, |
|
"eval_steps_per_second": 55.943, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.0419, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"eval_loss": 1.9060734510421753, |
|
"eval_runtime": 115.6219, |
|
"eval_samples_per_second": 893.213, |
|
"eval_steps_per_second": 55.829, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"eval_loss": 1.9058138132095337, |
|
"eval_runtime": 115.3288, |
|
"eval_samples_per_second": 895.483, |
|
"eval_steps_per_second": 55.97, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.0384, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"eval_loss": 1.9113844633102417, |
|
"eval_runtime": 115.36, |
|
"eval_samples_per_second": 895.241, |
|
"eval_steps_per_second": 55.955, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 1.904008150100708, |
|
"eval_runtime": 115.3314, |
|
"eval_samples_per_second": 895.463, |
|
"eval_steps_per_second": 55.969, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.0391, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"eval_loss": 1.9094995260238647, |
|
"eval_runtime": 116.0038, |
|
"eval_samples_per_second": 890.273, |
|
"eval_steps_per_second": 55.645, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"eval_loss": 1.918182134628296, |
|
"eval_runtime": 117.9985, |
|
"eval_samples_per_second": 875.223, |
|
"eval_steps_per_second": 54.704, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.0405, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"eval_loss": 1.9111247062683105, |
|
"eval_runtime": 115.7318, |
|
"eval_samples_per_second": 892.365, |
|
"eval_steps_per_second": 55.776, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"eval_loss": 1.9056226015090942, |
|
"eval_runtime": 115.3321, |
|
"eval_samples_per_second": 895.458, |
|
"eval_steps_per_second": 55.969, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.0404, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"eval_loss": 1.9133949279785156, |
|
"eval_runtime": 115.6177, |
|
"eval_samples_per_second": 893.245, |
|
"eval_steps_per_second": 55.831, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"eval_loss": 1.9069831371307373, |
|
"eval_runtime": 116.2332, |
|
"eval_samples_per_second": 888.516, |
|
"eval_steps_per_second": 55.535, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.0414, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"eval_loss": 1.9084620475769043, |
|
"eval_runtime": 115.922, |
|
"eval_samples_per_second": 890.901, |
|
"eval_steps_per_second": 55.684, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"eval_loss": 1.9063148498535156, |
|
"eval_runtime": 116.5212, |
|
"eval_samples_per_second": 886.319, |
|
"eval_steps_per_second": 55.398, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.0483, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"eval_loss": 1.9186962842941284, |
|
"eval_runtime": 116.2964, |
|
"eval_samples_per_second": 888.032, |
|
"eval_steps_per_second": 55.505, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"eval_loss": 1.9105137586593628, |
|
"eval_runtime": 115.5049, |
|
"eval_samples_per_second": 894.118, |
|
"eval_steps_per_second": 55.885, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.0452, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"eval_loss": 1.9117952585220337, |
|
"eval_runtime": 116.0281, |
|
"eval_samples_per_second": 890.086, |
|
"eval_steps_per_second": 55.633, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_loss": 1.9091888666152954, |
|
"eval_runtime": 117.9897, |
|
"eval_samples_per_second": 875.288, |
|
"eval_steps_per_second": 54.708, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.0401, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"eval_loss": 1.9113515615463257, |
|
"eval_runtime": 116.2625, |
|
"eval_samples_per_second": 888.291, |
|
"eval_steps_per_second": 55.521, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"eval_loss": 1.9098221063613892, |
|
"eval_runtime": 115.7155, |
|
"eval_samples_per_second": 892.491, |
|
"eval_steps_per_second": 55.783, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.0353, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"eval_loss": 1.9069087505340576, |
|
"eval_runtime": 116.5348, |
|
"eval_samples_per_second": 886.216, |
|
"eval_steps_per_second": 55.391, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"eval_loss": 1.9027125835418701, |
|
"eval_runtime": 115.7058, |
|
"eval_samples_per_second": 892.566, |
|
"eval_steps_per_second": 55.788, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.0468, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"eval_loss": 1.910232424736023, |
|
"eval_runtime": 115.6722, |
|
"eval_samples_per_second": 892.825, |
|
"eval_steps_per_second": 55.804, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"eval_loss": 1.9046436548233032, |
|
"eval_runtime": 116.4733, |
|
"eval_samples_per_second": 886.684, |
|
"eval_steps_per_second": 55.42, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.0448, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"eval_loss": 1.9024384021759033, |
|
"eval_runtime": 115.5381, |
|
"eval_samples_per_second": 893.861, |
|
"eval_steps_per_second": 55.869, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"eval_loss": 1.910799264907837, |
|
"eval_runtime": 116.2371, |
|
"eval_samples_per_second": 888.486, |
|
"eval_steps_per_second": 55.533, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.0435, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"eval_loss": 1.9122228622436523, |
|
"eval_runtime": 116.6097, |
|
"eval_samples_per_second": 885.647, |
|
"eval_steps_per_second": 55.356, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"eval_loss": 1.9043642282485962, |
|
"eval_runtime": 115.6604, |
|
"eval_samples_per_second": 892.916, |
|
"eval_steps_per_second": 55.81, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.0421, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"eval_loss": 1.9069358110427856, |
|
"eval_runtime": 116.7738, |
|
"eval_samples_per_second": 884.402, |
|
"eval_steps_per_second": 55.278, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"eval_loss": 1.9019508361816406, |
|
"eval_runtime": 116.2558, |
|
"eval_samples_per_second": 888.342, |
|
"eval_steps_per_second": 55.524, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.0366, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"eval_loss": 1.9152798652648926, |
|
"eval_runtime": 116.0194, |
|
"eval_samples_per_second": 890.153, |
|
"eval_steps_per_second": 55.637, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"eval_loss": 1.9072139263153076, |
|
"eval_runtime": 115.6971, |
|
"eval_samples_per_second": 892.633, |
|
"eval_steps_per_second": 55.792, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.034, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_loss": 1.9181559085845947, |
|
"eval_runtime": 116.0059, |
|
"eval_samples_per_second": 890.256, |
|
"eval_steps_per_second": 55.644, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_loss": 1.9085872173309326, |
|
"eval_runtime": 116.4771, |
|
"eval_samples_per_second": 886.655, |
|
"eval_steps_per_second": 55.419, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.0397, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.9070638418197632, |
|
"eval_runtime": 116.2437, |
|
"eval_samples_per_second": 888.435, |
|
"eval_steps_per_second": 55.53, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"eval_loss": 1.9146629571914673, |
|
"eval_runtime": 117.2397, |
|
"eval_samples_per_second": 880.887, |
|
"eval_steps_per_second": 55.058, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.0374, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 1.9199682474136353, |
|
"eval_runtime": 116.9772, |
|
"eval_samples_per_second": 882.865, |
|
"eval_steps_per_second": 55.182, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"eval_loss": 1.917845368385315, |
|
"eval_runtime": 116.926, |
|
"eval_samples_per_second": 883.251, |
|
"eval_steps_per_second": 55.206, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.0413, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 1.9089611768722534, |
|
"eval_runtime": 116.5053, |
|
"eval_samples_per_second": 886.44, |
|
"eval_steps_per_second": 55.405, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"eval_loss": 1.9036976099014282, |
|
"eval_runtime": 115.8055, |
|
"eval_samples_per_second": 891.797, |
|
"eval_steps_per_second": 55.74, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.047, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_loss": 1.9126322269439697, |
|
"eval_runtime": 116.2519, |
|
"eval_samples_per_second": 888.372, |
|
"eval_steps_per_second": 55.526, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"eval_loss": 1.9117310047149658, |
|
"eval_runtime": 116.138, |
|
"eval_samples_per_second": 889.244, |
|
"eval_steps_per_second": 55.58, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.0395, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"eval_loss": 1.911027431488037, |
|
"eval_runtime": 116.0632, |
|
"eval_samples_per_second": 889.817, |
|
"eval_steps_per_second": 55.616, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"eval_loss": 1.9157801866531372, |
|
"eval_runtime": 116.0777, |
|
"eval_samples_per_second": 889.706, |
|
"eval_steps_per_second": 55.609, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.0447, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_loss": 1.9016647338867188, |
|
"eval_runtime": 116.0944, |
|
"eval_samples_per_second": 889.578, |
|
"eval_steps_per_second": 55.601, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"eval_loss": 1.9071747064590454, |
|
"eval_runtime": 117.2998, |
|
"eval_samples_per_second": 880.436, |
|
"eval_steps_per_second": 55.03, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.0377, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"eval_loss": 1.91358482837677, |
|
"eval_runtime": 118.15, |
|
"eval_samples_per_second": 874.101, |
|
"eval_steps_per_second": 54.634, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"eval_loss": 1.9084006547927856, |
|
"eval_runtime": 118.6409, |
|
"eval_samples_per_second": 870.484, |
|
"eval_steps_per_second": 54.408, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.0312, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"eval_loss": 1.909172773361206, |
|
"eval_runtime": 117.7813, |
|
"eval_samples_per_second": 876.837, |
|
"eval_steps_per_second": 54.805, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_loss": 1.9103314876556396, |
|
"eval_runtime": 117.0394, |
|
"eval_samples_per_second": 882.395, |
|
"eval_steps_per_second": 55.152, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.0387, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"eval_loss": 1.9023408889770508, |
|
"eval_runtime": 117.1786, |
|
"eval_samples_per_second": 881.347, |
|
"eval_steps_per_second": 55.087, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"eval_loss": 1.9034806489944458, |
|
"eval_runtime": 118.479, |
|
"eval_samples_per_second": 871.674, |
|
"eval_steps_per_second": 54.482, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.0358, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"eval_loss": 1.9131251573562622, |
|
"eval_runtime": 116.6651, |
|
"eval_samples_per_second": 885.226, |
|
"eval_steps_per_second": 55.329, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"eval_loss": 1.9065865278244019, |
|
"eval_runtime": 118.6652, |
|
"eval_samples_per_second": 870.306, |
|
"eval_steps_per_second": 54.397, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.0402, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"eval_loss": 1.9083107709884644, |
|
"eval_runtime": 117.2736, |
|
"eval_samples_per_second": 880.633, |
|
"eval_steps_per_second": 55.042, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"eval_loss": 1.9068409204483032, |
|
"eval_runtime": 117.4261, |
|
"eval_samples_per_second": 879.49, |
|
"eval_steps_per_second": 54.971, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.0319, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"eval_loss": 1.9011958837509155, |
|
"eval_runtime": 117.8174, |
|
"eval_samples_per_second": 876.568, |
|
"eval_steps_per_second": 54.788, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"eval_loss": 1.927274465560913, |
|
"eval_runtime": 118.5235, |
|
"eval_samples_per_second": 871.346, |
|
"eval_steps_per_second": 54.462, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.0, |
|
"loss": 2.0436, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_loss": 1.905896782875061, |
|
"eval_runtime": 116.8523, |
|
"eval_samples_per_second": 883.808, |
|
"eval_steps_per_second": 55.241, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"step": 2400000, |
|
"total_flos": 7.485113755399533e+17, |
|
"train_loss": 2.0565961393229166, |
|
"train_runtime": 185876.2415, |
|
"train_samples_per_second": 206.589, |
|
"train_steps_per_second": 12.912 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 20, |
|
"save_steps": 32000, |
|
"total_flos": 7.485113755399533e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|