|
{ |
|
"best_metric": 2.671285629272461, |
|
"best_model_checkpoint": "./model_tweets_2020_Q2/checkpoint-32000", |
|
"epoch": 19.569471624266146, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.635031223297119, |
|
"eval_runtime": 126.0563, |
|
"eval_samples_per_second": 819.277, |
|
"eval_steps_per_second": 51.207, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.939131159843243e-06, |
|
"loss": 2.7848, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.6555588245391846, |
|
"eval_runtime": 126.7417, |
|
"eval_samples_per_second": 814.846, |
|
"eval_steps_per_second": 50.93, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.6695027351379395, |
|
"eval_runtime": 125.9524, |
|
"eval_samples_per_second": 819.953, |
|
"eval_steps_per_second": 51.25, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.872425581589261e-06, |
|
"loss": 2.7545, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 2.671285629272461, |
|
"eval_runtime": 126.9886, |
|
"eval_samples_per_second": 813.262, |
|
"eval_steps_per_second": 50.831, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.708911895751953, |
|
"eval_runtime": 126.0433, |
|
"eval_samples_per_second": 819.361, |
|
"eval_steps_per_second": 51.213, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.80572000333528e-06, |
|
"loss": 2.7717, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.7143805027008057, |
|
"eval_runtime": 126.2378, |
|
"eval_samples_per_second": 818.099, |
|
"eval_steps_per_second": 51.134, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.7240307331085205, |
|
"eval_runtime": 125.5002, |
|
"eval_samples_per_second": 822.907, |
|
"eval_steps_per_second": 51.434, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.739014425081299e-06, |
|
"loss": 2.8043, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.749925374984741, |
|
"eval_runtime": 126.3275, |
|
"eval_samples_per_second": 817.518, |
|
"eval_steps_per_second": 51.097, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.770448684692383, |
|
"eval_runtime": 115.1543, |
|
"eval_samples_per_second": 896.84, |
|
"eval_steps_per_second": 56.055, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.672308846827316e-06, |
|
"loss": 2.8401, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.782008409500122, |
|
"eval_runtime": 116.1441, |
|
"eval_samples_per_second": 889.197, |
|
"eval_steps_per_second": 55.578, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.8068478107452393, |
|
"eval_runtime": 116.2984, |
|
"eval_samples_per_second": 888.017, |
|
"eval_steps_per_second": 55.504, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.605603268573334e-06, |
|
"loss": 2.8723, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 2.8150370121002197, |
|
"eval_runtime": 116.0456, |
|
"eval_samples_per_second": 889.952, |
|
"eval_steps_per_second": 55.625, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.8410351276397705, |
|
"eval_runtime": 114.7666, |
|
"eval_samples_per_second": 899.87, |
|
"eval_steps_per_second": 56.245, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.538897690319354e-06, |
|
"loss": 2.9004, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 2.865703582763672, |
|
"eval_runtime": 115.4628, |
|
"eval_samples_per_second": 894.444, |
|
"eval_steps_per_second": 55.905, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.882617950439453, |
|
"eval_runtime": 116.5627, |
|
"eval_samples_per_second": 886.004, |
|
"eval_steps_per_second": 55.378, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.472192112065373e-06, |
|
"loss": 2.9396, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 2.9071033000946045, |
|
"eval_runtime": 116.4638, |
|
"eval_samples_per_second": 886.756, |
|
"eval_steps_per_second": 55.425, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.949030876159668, |
|
"eval_runtime": 115.1354, |
|
"eval_samples_per_second": 896.987, |
|
"eval_steps_per_second": 56.064, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.405486533811392e-06, |
|
"loss": 2.9801, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.951450824737549, |
|
"eval_runtime": 114.8755, |
|
"eval_samples_per_second": 899.017, |
|
"eval_steps_per_second": 56.191, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.9862585067749023, |
|
"eval_runtime": 116.1529, |
|
"eval_samples_per_second": 889.129, |
|
"eval_steps_per_second": 55.573, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.338780955557409e-06, |
|
"loss": 3.0173, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 2.991586685180664, |
|
"eval_runtime": 116.0798, |
|
"eval_samples_per_second": 889.69, |
|
"eval_steps_per_second": 55.608, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 3.0230655670166016, |
|
"eval_runtime": 115.2701, |
|
"eval_samples_per_second": 895.939, |
|
"eval_steps_per_second": 55.999, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.272075377303427e-06, |
|
"loss": 3.0674, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 3.0447049140930176, |
|
"eval_runtime": 115.1489, |
|
"eval_samples_per_second": 896.882, |
|
"eval_steps_per_second": 56.058, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 3.0638155937194824, |
|
"eval_runtime": 116.1134, |
|
"eval_samples_per_second": 889.432, |
|
"eval_steps_per_second": 55.592, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.205369799049446e-06, |
|
"loss": 3.1059, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 3.094524383544922, |
|
"eval_runtime": 114.9725, |
|
"eval_samples_per_second": 898.258, |
|
"eval_steps_per_second": 56.144, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 3.1008002758026123, |
|
"eval_runtime": 116.6453, |
|
"eval_samples_per_second": 885.377, |
|
"eval_steps_per_second": 55.339, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 9.138664220795464e-06, |
|
"loss": 3.1283, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 3.1256680488586426, |
|
"eval_runtime": 115.0624, |
|
"eval_samples_per_second": 897.556, |
|
"eval_steps_per_second": 56.1, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 3.1262004375457764, |
|
"eval_runtime": 114.9392, |
|
"eval_samples_per_second": 898.518, |
|
"eval_steps_per_second": 56.16, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.071958642541483e-06, |
|
"loss": 3.1684, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 3.152285099029541, |
|
"eval_runtime": 115.5854, |
|
"eval_samples_per_second": 893.495, |
|
"eval_steps_per_second": 55.846, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 3.1842401027679443, |
|
"eval_runtime": 114.9021, |
|
"eval_samples_per_second": 898.809, |
|
"eval_steps_per_second": 56.178, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.005253064287502e-06, |
|
"loss": 3.1966, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 3.1820068359375, |
|
"eval_runtime": 117.5401, |
|
"eval_samples_per_second": 878.637, |
|
"eval_steps_per_second": 54.917, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 3.197575569152832, |
|
"eval_runtime": 119.2185, |
|
"eval_samples_per_second": 866.266, |
|
"eval_steps_per_second": 54.144, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.93854748603352e-06, |
|
"loss": 3.2055, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 3.2012782096862793, |
|
"eval_runtime": 116.0617, |
|
"eval_samples_per_second": 889.829, |
|
"eval_steps_per_second": 55.617, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 3.219731092453003, |
|
"eval_runtime": 115.3459, |
|
"eval_samples_per_second": 895.351, |
|
"eval_steps_per_second": 55.962, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.871841907779539e-06, |
|
"loss": 3.2186, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 3.2258596420288086, |
|
"eval_runtime": 117.0782, |
|
"eval_samples_per_second": 882.102, |
|
"eval_steps_per_second": 55.134, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 3.2410128116607666, |
|
"eval_runtime": 115.7081, |
|
"eval_samples_per_second": 892.547, |
|
"eval_steps_per_second": 55.787, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.805136329525557e-06, |
|
"loss": 3.2518, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 3.2448806762695312, |
|
"eval_runtime": 116.2706, |
|
"eval_samples_per_second": 888.23, |
|
"eval_steps_per_second": 55.517, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 3.2685933113098145, |
|
"eval_runtime": 117.0296, |
|
"eval_samples_per_second": 882.469, |
|
"eval_steps_per_second": 55.157, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.738430751271576e-06, |
|
"loss": 3.2705, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 3.270232915878296, |
|
"eval_runtime": 115.7748, |
|
"eval_samples_per_second": 892.034, |
|
"eval_steps_per_second": 55.755, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 3.271563768386841, |
|
"eval_runtime": 114.7956, |
|
"eval_samples_per_second": 899.643, |
|
"eval_steps_per_second": 56.23, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.671725173017595e-06, |
|
"loss": 3.2677, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 3.2934534549713135, |
|
"eval_runtime": 116.4472, |
|
"eval_samples_per_second": 886.883, |
|
"eval_steps_per_second": 55.433, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 3.2941575050354004, |
|
"eval_runtime": 115.658, |
|
"eval_samples_per_second": 892.934, |
|
"eval_steps_per_second": 55.811, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.605019594763613e-06, |
|
"loss": 3.2955, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 3.304429054260254, |
|
"eval_runtime": 115.4488, |
|
"eval_samples_per_second": 894.552, |
|
"eval_steps_per_second": 55.912, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 3.3109662532806396, |
|
"eval_runtime": 114.8039, |
|
"eval_samples_per_second": 899.577, |
|
"eval_steps_per_second": 56.226, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.538314016509632e-06, |
|
"loss": 3.2966, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 3.3053431510925293, |
|
"eval_runtime": 115.0477, |
|
"eval_samples_per_second": 897.671, |
|
"eval_steps_per_second": 56.107, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 3.3276007175445557, |
|
"eval_runtime": 115.8876, |
|
"eval_samples_per_second": 891.165, |
|
"eval_steps_per_second": 55.701, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.471608438255649e-06, |
|
"loss": 3.311, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 3.3256120681762695, |
|
"eval_runtime": 117.3196, |
|
"eval_samples_per_second": 880.288, |
|
"eval_steps_per_second": 55.021, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 3.3292236328125, |
|
"eval_runtime": 117.5646, |
|
"eval_samples_per_second": 878.453, |
|
"eval_steps_per_second": 54.906, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.404902860001667e-06, |
|
"loss": 3.3217, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 3.333477258682251, |
|
"eval_runtime": 116.7284, |
|
"eval_samples_per_second": 884.746, |
|
"eval_steps_per_second": 55.299, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 3.316025972366333, |
|
"eval_runtime": 118.1544, |
|
"eval_samples_per_second": 874.068, |
|
"eval_steps_per_second": 54.632, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.338197281747686e-06, |
|
"loss": 3.3145, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 3.337838649749756, |
|
"eval_runtime": 116.066, |
|
"eval_samples_per_second": 889.796, |
|
"eval_steps_per_second": 55.615, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 3.3306798934936523, |
|
"eval_runtime": 117.4533, |
|
"eval_samples_per_second": 879.285, |
|
"eval_steps_per_second": 54.958, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 8.271491703493705e-06, |
|
"loss": 3.3246, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 3.342693567276001, |
|
"eval_runtime": 115.6289, |
|
"eval_samples_per_second": 893.159, |
|
"eval_steps_per_second": 55.825, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 3.3543155193328857, |
|
"eval_runtime": 115.7056, |
|
"eval_samples_per_second": 892.567, |
|
"eval_steps_per_second": 55.788, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 8.204786125239725e-06, |
|
"loss": 3.3131, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 3.340524196624756, |
|
"eval_runtime": 116.2105, |
|
"eval_samples_per_second": 888.689, |
|
"eval_steps_per_second": 55.546, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_loss": 3.336106777191162, |
|
"eval_runtime": 114.9141, |
|
"eval_samples_per_second": 898.714, |
|
"eval_steps_per_second": 56.172, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 8.138080546985743e-06, |
|
"loss": 3.3266, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 3.370443344116211, |
|
"eval_runtime": 115.193, |
|
"eval_samples_per_second": 896.539, |
|
"eval_steps_per_second": 56.036, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 3.354923963546753, |
|
"eval_runtime": 115.5245, |
|
"eval_samples_per_second": 893.967, |
|
"eval_steps_per_second": 55.876, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 8.07137496873176e-06, |
|
"loss": 3.3358, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 3.360276937484741, |
|
"eval_runtime": 116.1443, |
|
"eval_samples_per_second": 889.196, |
|
"eval_steps_per_second": 55.577, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 3.3641881942749023, |
|
"eval_runtime": 115.4508, |
|
"eval_samples_per_second": 894.537, |
|
"eval_steps_per_second": 55.911, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.004669390477779e-06, |
|
"loss": 3.3385, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 3.3572633266448975, |
|
"eval_runtime": 114.9449, |
|
"eval_samples_per_second": 898.474, |
|
"eval_steps_per_second": 56.157, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 3.3658275604248047, |
|
"eval_runtime": 115.0066, |
|
"eval_samples_per_second": 897.992, |
|
"eval_steps_per_second": 56.127, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 7.937963812223798e-06, |
|
"loss": 3.3375, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 3.345881700515747, |
|
"eval_runtime": 115.316, |
|
"eval_samples_per_second": 895.583, |
|
"eval_steps_per_second": 55.977, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 3.3702762126922607, |
|
"eval_runtime": 114.9631, |
|
"eval_samples_per_second": 898.331, |
|
"eval_steps_per_second": 56.148, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.871258233969816e-06, |
|
"loss": 3.3237, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 3.3564202785491943, |
|
"eval_runtime": 116.3254, |
|
"eval_samples_per_second": 887.811, |
|
"eval_steps_per_second": 55.491, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 3.3553359508514404, |
|
"eval_runtime": 115.6968, |
|
"eval_samples_per_second": 892.635, |
|
"eval_steps_per_second": 55.792, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 7.804552655715835e-06, |
|
"loss": 3.34, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 3.35756778717041, |
|
"eval_runtime": 114.9307, |
|
"eval_samples_per_second": 898.585, |
|
"eval_steps_per_second": 56.164, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 3.3548436164855957, |
|
"eval_runtime": 116.9698, |
|
"eval_samples_per_second": 882.92, |
|
"eval_steps_per_second": 55.185, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 7.737847077461853e-06, |
|
"loss": 3.3247, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 3.3525540828704834, |
|
"eval_runtime": 114.951, |
|
"eval_samples_per_second": 898.427, |
|
"eval_steps_per_second": 56.154, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 3.367372512817383, |
|
"eval_runtime": 116.891, |
|
"eval_samples_per_second": 883.515, |
|
"eval_steps_per_second": 55.222, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.671141499207872e-06, |
|
"loss": 3.318, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 3.3607981204986572, |
|
"eval_runtime": 115.5047, |
|
"eval_samples_per_second": 894.12, |
|
"eval_steps_per_second": 55.885, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 3.3527328968048096, |
|
"eval_runtime": 116.278, |
|
"eval_samples_per_second": 888.173, |
|
"eval_steps_per_second": 55.514, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 7.604435920953891e-06, |
|
"loss": 3.3318, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_loss": 3.3600049018859863, |
|
"eval_runtime": 115.0864, |
|
"eval_samples_per_second": 897.369, |
|
"eval_steps_per_second": 56.088, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 3.366177797317505, |
|
"eval_runtime": 116.1802, |
|
"eval_samples_per_second": 888.921, |
|
"eval_steps_per_second": 55.56, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 7.537730342699909e-06, |
|
"loss": 3.3211, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_loss": 3.36027193069458, |
|
"eval_runtime": 115.5036, |
|
"eval_samples_per_second": 894.128, |
|
"eval_steps_per_second": 55.886, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_loss": 3.364029884338379, |
|
"eval_runtime": 114.9019, |
|
"eval_samples_per_second": 898.81, |
|
"eval_steps_per_second": 56.178, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.471024764445928e-06, |
|
"loss": 3.3344, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 3.376020669937134, |
|
"eval_runtime": 115.5882, |
|
"eval_samples_per_second": 893.473, |
|
"eval_steps_per_second": 55.845, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 3.3876428604125977, |
|
"eval_runtime": 115.0301, |
|
"eval_samples_per_second": 897.809, |
|
"eval_steps_per_second": 56.116, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 7.4043191861919465e-06, |
|
"loss": 3.331, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 3.351862668991089, |
|
"eval_runtime": 115.49, |
|
"eval_samples_per_second": 894.233, |
|
"eval_steps_per_second": 55.892, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 3.373405933380127, |
|
"eval_runtime": 115.9525, |
|
"eval_samples_per_second": 890.666, |
|
"eval_steps_per_second": 55.669, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 7.337613607937964e-06, |
|
"loss": 3.3293, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 3.373460531234741, |
|
"eval_runtime": 115.1854, |
|
"eval_samples_per_second": 896.598, |
|
"eval_steps_per_second": 56.04, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 3.3703157901763916, |
|
"eval_runtime": 115.0036, |
|
"eval_samples_per_second": 898.016, |
|
"eval_steps_per_second": 56.129, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 7.270908029683983e-06, |
|
"loss": 3.3317, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_loss": 3.382647752761841, |
|
"eval_runtime": 115.8086, |
|
"eval_samples_per_second": 891.773, |
|
"eval_steps_per_second": 55.739, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 3.3825886249542236, |
|
"eval_runtime": 115.3628, |
|
"eval_samples_per_second": 895.219, |
|
"eval_steps_per_second": 55.954, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 7.2042024514300015e-06, |
|
"loss": 3.3291, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 3.391868829727173, |
|
"eval_runtime": 115.4028, |
|
"eval_samples_per_second": 894.909, |
|
"eval_steps_per_second": 55.935, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 3.378626585006714, |
|
"eval_runtime": 115.4498, |
|
"eval_samples_per_second": 894.545, |
|
"eval_steps_per_second": 55.912, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 7.13749687317602e-06, |
|
"loss": 3.3423, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 3.377542734146118, |
|
"eval_runtime": 115.2629, |
|
"eval_samples_per_second": 895.995, |
|
"eval_steps_per_second": 56.002, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 3.373429298400879, |
|
"eval_runtime": 115.5205, |
|
"eval_samples_per_second": 893.997, |
|
"eval_steps_per_second": 55.878, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 7.070791294922038e-06, |
|
"loss": 3.3364, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 3.372532367706299, |
|
"eval_runtime": 115.5543, |
|
"eval_samples_per_second": 893.735, |
|
"eval_steps_per_second": 55.861, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_loss": 3.3855302333831787, |
|
"eval_runtime": 115.9379, |
|
"eval_samples_per_second": 890.778, |
|
"eval_steps_per_second": 55.676, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 7.0040857166680564e-06, |
|
"loss": 3.347, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_loss": 3.3774046897888184, |
|
"eval_runtime": 114.6511, |
|
"eval_samples_per_second": 900.776, |
|
"eval_steps_per_second": 56.301, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 3.3717195987701416, |
|
"eval_runtime": 115.9173, |
|
"eval_samples_per_second": 890.937, |
|
"eval_steps_per_second": 55.686, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.937380138414076e-06, |
|
"loss": 3.3311, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.392944097518921, |
|
"eval_runtime": 115.7013, |
|
"eval_samples_per_second": 892.6, |
|
"eval_steps_per_second": 55.79, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_loss": 3.389941930770874, |
|
"eval_runtime": 117.4363, |
|
"eval_samples_per_second": 879.413, |
|
"eval_steps_per_second": 54.966, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 6.8706745601600945e-06, |
|
"loss": 3.3445, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 3.3985016345977783, |
|
"eval_runtime": 115.5779, |
|
"eval_samples_per_second": 893.553, |
|
"eval_steps_per_second": 55.85, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 3.3865506649017334, |
|
"eval_runtime": 114.8487, |
|
"eval_samples_per_second": 899.227, |
|
"eval_steps_per_second": 56.204, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 6.803968981906113e-06, |
|
"loss": 3.345, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 3.3942770957946777, |
|
"eval_runtime": 115.533, |
|
"eval_samples_per_second": 893.901, |
|
"eval_steps_per_second": 55.871, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 3.373379945755005, |
|
"eval_runtime": 115.2598, |
|
"eval_samples_per_second": 896.019, |
|
"eval_steps_per_second": 56.004, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 6.737263403652131e-06, |
|
"loss": 3.3427, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 3.383202314376831, |
|
"eval_runtime": 114.9199, |
|
"eval_samples_per_second": 898.669, |
|
"eval_steps_per_second": 56.17, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_loss": 3.3966336250305176, |
|
"eval_runtime": 115.6206, |
|
"eval_samples_per_second": 893.223, |
|
"eval_steps_per_second": 55.829, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.6705578253981495e-06, |
|
"loss": 3.3406, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 3.3891854286193848, |
|
"eval_runtime": 115.5059, |
|
"eval_samples_per_second": 894.11, |
|
"eval_steps_per_second": 55.885, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 3.390401601791382, |
|
"eval_runtime": 116.1612, |
|
"eval_samples_per_second": 889.066, |
|
"eval_steps_per_second": 55.569, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 6.603852247144168e-06, |
|
"loss": 3.3406, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 3.386686086654663, |
|
"eval_runtime": 115.3671, |
|
"eval_samples_per_second": 895.186, |
|
"eval_steps_per_second": 55.952, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 3.390192747116089, |
|
"eval_runtime": 114.8586, |
|
"eval_samples_per_second": 899.149, |
|
"eval_steps_per_second": 56.2, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 6.537146668890187e-06, |
|
"loss": 3.3354, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 3.371840000152588, |
|
"eval_runtime": 115.0229, |
|
"eval_samples_per_second": 897.865, |
|
"eval_steps_per_second": 56.119, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_loss": 3.383141279220581, |
|
"eval_runtime": 115.453, |
|
"eval_samples_per_second": 894.52, |
|
"eval_steps_per_second": 55.91, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 6.4704410906362044e-06, |
|
"loss": 3.3521, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 3.3909192085266113, |
|
"eval_runtime": 115.5241, |
|
"eval_samples_per_second": 893.97, |
|
"eval_steps_per_second": 55.876, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 3.3798959255218506, |
|
"eval_runtime": 115.2184, |
|
"eval_samples_per_second": 896.342, |
|
"eval_steps_per_second": 56.024, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 6.403735512382223e-06, |
|
"loss": 3.3538, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 3.3828136920928955, |
|
"eval_runtime": 115.3784, |
|
"eval_samples_per_second": 895.098, |
|
"eval_steps_per_second": 55.946, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 3.378514051437378, |
|
"eval_runtime": 115.0377, |
|
"eval_samples_per_second": 897.749, |
|
"eval_steps_per_second": 56.112, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 6.337029934128242e-06, |
|
"loss": 3.3363, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_loss": 3.3993334770202637, |
|
"eval_runtime": 115.5145, |
|
"eval_samples_per_second": 894.043, |
|
"eval_steps_per_second": 55.88, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 3.3849687576293945, |
|
"eval_runtime": 114.7628, |
|
"eval_samples_per_second": 899.9, |
|
"eval_steps_per_second": 56.246, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 6.270324355874261e-06, |
|
"loss": 3.3341, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 3.3932485580444336, |
|
"eval_runtime": 115.0217, |
|
"eval_samples_per_second": 897.874, |
|
"eval_steps_per_second": 56.12, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"eval_loss": 3.398083209991455, |
|
"eval_runtime": 115.1782, |
|
"eval_samples_per_second": 896.654, |
|
"eval_steps_per_second": 56.044, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 6.20361877762028e-06, |
|
"loss": 3.3458, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 3.393594741821289, |
|
"eval_runtime": 116.8302, |
|
"eval_samples_per_second": 883.975, |
|
"eval_steps_per_second": 55.251, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 3.4032301902770996, |
|
"eval_runtime": 115.4692, |
|
"eval_samples_per_second": 894.394, |
|
"eval_steps_per_second": 55.902, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 6.1369131993662975e-06, |
|
"loss": 3.3327, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 3.385192394256592, |
|
"eval_runtime": 115.7558, |
|
"eval_samples_per_second": 892.18, |
|
"eval_steps_per_second": 55.764, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 3.38653826713562, |
|
"eval_runtime": 116.1964, |
|
"eval_samples_per_second": 888.797, |
|
"eval_steps_per_second": 55.553, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.070207621112316e-06, |
|
"loss": 3.3507, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 3.390004873275757, |
|
"eval_runtime": 115.6497, |
|
"eval_samples_per_second": 892.999, |
|
"eval_steps_per_second": 55.815, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"eval_loss": 3.3772072792053223, |
|
"eval_runtime": 115.4517, |
|
"eval_samples_per_second": 894.53, |
|
"eval_steps_per_second": 55.911, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.003502042858335e-06, |
|
"loss": 3.3493, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 3.388688802719116, |
|
"eval_runtime": 115.7986, |
|
"eval_samples_per_second": 891.85, |
|
"eval_steps_per_second": 55.743, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_loss": 3.395124912261963, |
|
"eval_runtime": 115.4739, |
|
"eval_samples_per_second": 894.358, |
|
"eval_steps_per_second": 55.9, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 5.936796464604353e-06, |
|
"loss": 3.3412, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"eval_loss": 3.3833136558532715, |
|
"eval_runtime": 114.7504, |
|
"eval_samples_per_second": 899.997, |
|
"eval_steps_per_second": 56.253, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 3.381627321243286, |
|
"eval_runtime": 115.0253, |
|
"eval_samples_per_second": 897.846, |
|
"eval_steps_per_second": 56.118, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.870090886350371e-06, |
|
"loss": 3.3232, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"eval_loss": 3.37522292137146, |
|
"eval_runtime": 114.2933, |
|
"eval_samples_per_second": 903.597, |
|
"eval_steps_per_second": 56.478, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 3.384525775909424, |
|
"eval_runtime": 115.119, |
|
"eval_samples_per_second": 897.115, |
|
"eval_steps_per_second": 56.072, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 5.80338530809639e-06, |
|
"loss": 3.333, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 3.3906686305999756, |
|
"eval_runtime": 115.1127, |
|
"eval_samples_per_second": 897.164, |
|
"eval_steps_per_second": 56.075, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"eval_loss": 3.3822684288024902, |
|
"eval_runtime": 114.8049, |
|
"eval_samples_per_second": 899.569, |
|
"eval_steps_per_second": 56.226, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 5.736679729842408e-06, |
|
"loss": 3.3449, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 3.3724589347839355, |
|
"eval_runtime": 114.8265, |
|
"eval_samples_per_second": 899.4, |
|
"eval_steps_per_second": 56.215, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_loss": 3.37973952293396, |
|
"eval_runtime": 115.0872, |
|
"eval_samples_per_second": 897.363, |
|
"eval_steps_per_second": 56.088, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 5.669974151588427e-06, |
|
"loss": 3.3336, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_loss": 3.38781476020813, |
|
"eval_runtime": 116.3835, |
|
"eval_samples_per_second": 887.368, |
|
"eval_steps_per_second": 55.463, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_loss": 3.384516716003418, |
|
"eval_runtime": 115.2938, |
|
"eval_samples_per_second": 895.755, |
|
"eval_steps_per_second": 55.987, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 5.603268573334446e-06, |
|
"loss": 3.3307, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_loss": 3.390652894973755, |
|
"eval_runtime": 116.7145, |
|
"eval_samples_per_second": 884.851, |
|
"eval_steps_per_second": 55.306, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"eval_loss": 3.3857922554016113, |
|
"eval_runtime": 115.6915, |
|
"eval_samples_per_second": 892.676, |
|
"eval_steps_per_second": 55.795, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 5.536562995080464e-06, |
|
"loss": 3.3267, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_loss": 3.3951947689056396, |
|
"eval_runtime": 115.1111, |
|
"eval_samples_per_second": 897.177, |
|
"eval_steps_per_second": 56.076, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_loss": 3.391402006149292, |
|
"eval_runtime": 114.8898, |
|
"eval_samples_per_second": 898.905, |
|
"eval_steps_per_second": 56.184, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 5.469857416826483e-06, |
|
"loss": 3.335, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_loss": 3.3904380798339844, |
|
"eval_runtime": 116.7468, |
|
"eval_samples_per_second": 884.607, |
|
"eval_steps_per_second": 55.291, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_loss": 3.3894879817962646, |
|
"eval_runtime": 115.0778, |
|
"eval_samples_per_second": 897.437, |
|
"eval_steps_per_second": 56.092, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.403151838572501e-06, |
|
"loss": 3.3411, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 3.395911455154419, |
|
"eval_runtime": 116.3802, |
|
"eval_samples_per_second": 887.393, |
|
"eval_steps_per_second": 55.465, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_loss": 3.391462802886963, |
|
"eval_runtime": 115.5689, |
|
"eval_samples_per_second": 893.623, |
|
"eval_steps_per_second": 55.854, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 5.33644626031852e-06, |
|
"loss": 3.3324, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 3.4030401706695557, |
|
"eval_runtime": 115.7261, |
|
"eval_samples_per_second": 892.409, |
|
"eval_steps_per_second": 55.778, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 3.4083750247955322, |
|
"eval_runtime": 118.5809, |
|
"eval_samples_per_second": 870.924, |
|
"eval_steps_per_second": 54.435, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 5.269740682064538e-06, |
|
"loss": 3.3297, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 3.402348518371582, |
|
"eval_runtime": 115.6049, |
|
"eval_samples_per_second": 893.344, |
|
"eval_steps_per_second": 55.837, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 3.3967323303222656, |
|
"eval_runtime": 115.5344, |
|
"eval_samples_per_second": 893.889, |
|
"eval_steps_per_second": 55.871, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 5.203035103810556e-06, |
|
"loss": 3.3492, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_loss": 3.393101215362549, |
|
"eval_runtime": 115.5769, |
|
"eval_samples_per_second": 893.561, |
|
"eval_steps_per_second": 55.85, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"eval_loss": 3.4064693450927734, |
|
"eval_runtime": 114.7523, |
|
"eval_samples_per_second": 899.982, |
|
"eval_steps_per_second": 56.252, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.136329525556575e-06, |
|
"loss": 3.3317, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_loss": 3.3905270099639893, |
|
"eval_runtime": 115.5534, |
|
"eval_samples_per_second": 893.743, |
|
"eval_steps_per_second": 55.862, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_loss": 3.402090072631836, |
|
"eval_runtime": 114.6435, |
|
"eval_samples_per_second": 900.836, |
|
"eval_steps_per_second": 56.305, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 5.0696239473025935e-06, |
|
"loss": 3.3447, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_loss": 3.400120735168457, |
|
"eval_runtime": 116.0858, |
|
"eval_samples_per_second": 889.643, |
|
"eval_steps_per_second": 55.605, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_loss": 3.3942949771881104, |
|
"eval_runtime": 114.8922, |
|
"eval_samples_per_second": 898.886, |
|
"eval_steps_per_second": 56.183, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 5.002918369048611e-06, |
|
"loss": 3.3377, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 3.3970954418182373, |
|
"eval_runtime": 114.8942, |
|
"eval_samples_per_second": 898.871, |
|
"eval_steps_per_second": 56.182, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_loss": 3.3946433067321777, |
|
"eval_runtime": 114.9828, |
|
"eval_samples_per_second": 898.178, |
|
"eval_steps_per_second": 56.139, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 4.936212790794631e-06, |
|
"loss": 3.3486, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"eval_loss": 3.392373561859131, |
|
"eval_runtime": 115.6846, |
|
"eval_samples_per_second": 892.729, |
|
"eval_steps_per_second": 55.798, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_loss": 3.398346424102783, |
|
"eval_runtime": 115.4236, |
|
"eval_samples_per_second": 894.747, |
|
"eval_steps_per_second": 55.924, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.869507212540649e-06, |
|
"loss": 3.3471, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_loss": 3.414100408554077, |
|
"eval_runtime": 115.0455, |
|
"eval_samples_per_second": 897.689, |
|
"eval_steps_per_second": 56.108, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 3.4220006465911865, |
|
"eval_runtime": 115.4764, |
|
"eval_samples_per_second": 894.339, |
|
"eval_steps_per_second": 55.899, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 4.802801634286667e-06, |
|
"loss": 3.3457, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"eval_loss": 3.4085357189178467, |
|
"eval_runtime": 115.0154, |
|
"eval_samples_per_second": 897.923, |
|
"eval_steps_per_second": 56.123, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"eval_loss": 3.424273729324341, |
|
"eval_runtime": 114.96, |
|
"eval_samples_per_second": 898.356, |
|
"eval_steps_per_second": 56.15, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 4.7360960560326865e-06, |
|
"loss": 3.3278, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"eval_loss": 3.4058358669281006, |
|
"eval_runtime": 115.4303, |
|
"eval_samples_per_second": 894.696, |
|
"eval_steps_per_second": 55.921, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_loss": 3.403254985809326, |
|
"eval_runtime": 114.783, |
|
"eval_samples_per_second": 899.741, |
|
"eval_steps_per_second": 56.237, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 4.669390477778704e-06, |
|
"loss": 3.325, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"eval_loss": 3.3866589069366455, |
|
"eval_runtime": 115.6771, |
|
"eval_samples_per_second": 892.787, |
|
"eval_steps_per_second": 55.802, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"eval_loss": 3.3878674507141113, |
|
"eval_runtime": 114.7924, |
|
"eval_samples_per_second": 899.667, |
|
"eval_steps_per_second": 56.232, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 4.602684899524723e-06, |
|
"loss": 3.3248, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"eval_loss": 3.380067825317383, |
|
"eval_runtime": 115.2061, |
|
"eval_samples_per_second": 896.437, |
|
"eval_steps_per_second": 56.03, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"eval_loss": 3.4026682376861572, |
|
"eval_runtime": 117.5473, |
|
"eval_samples_per_second": 878.583, |
|
"eval_steps_per_second": 54.914, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 4.5359793212707415e-06, |
|
"loss": 3.3217, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"eval_loss": 3.3781392574310303, |
|
"eval_runtime": 116.9837, |
|
"eval_samples_per_second": 882.816, |
|
"eval_steps_per_second": 55.179, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"eval_loss": 3.38712477684021, |
|
"eval_runtime": 116.1554, |
|
"eval_samples_per_second": 889.111, |
|
"eval_steps_per_second": 55.572, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 4.46927374301676e-06, |
|
"loss": 3.3227, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"eval_loss": 3.386099338531494, |
|
"eval_runtime": 116.8959, |
|
"eval_samples_per_second": 883.478, |
|
"eval_steps_per_second": 55.22, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_loss": 3.378852605819702, |
|
"eval_runtime": 116.5746, |
|
"eval_samples_per_second": 885.913, |
|
"eval_steps_per_second": 55.372, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 4.402568164762779e-06, |
|
"loss": 3.3259, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_loss": 3.386458158493042, |
|
"eval_runtime": 116.5428, |
|
"eval_samples_per_second": 886.155, |
|
"eval_steps_per_second": 55.387, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_loss": 3.386268377304077, |
|
"eval_runtime": 115.7105, |
|
"eval_samples_per_second": 892.529, |
|
"eval_steps_per_second": 55.786, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 4.335862586508797e-06, |
|
"loss": 3.3094, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"eval_loss": 3.3826916217803955, |
|
"eval_runtime": 118.0068, |
|
"eval_samples_per_second": 875.161, |
|
"eval_steps_per_second": 54.7, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"eval_loss": 3.3880295753479004, |
|
"eval_runtime": 115.413, |
|
"eval_samples_per_second": 894.83, |
|
"eval_steps_per_second": 55.93, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 4.269157008254816e-06, |
|
"loss": 3.3128, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"eval_loss": 3.365227460861206, |
|
"eval_runtime": 116.1062, |
|
"eval_samples_per_second": 889.487, |
|
"eval_steps_per_second": 55.596, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_loss": 3.381347179412842, |
|
"eval_runtime": 119.0899, |
|
"eval_samples_per_second": 867.202, |
|
"eval_steps_per_second": 54.203, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 4.202451430000834e-06, |
|
"loss": 3.3088, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"eval_loss": 3.385295867919922, |
|
"eval_runtime": 115.9391, |
|
"eval_samples_per_second": 890.769, |
|
"eval_steps_per_second": 55.676, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"eval_loss": 3.3708653450012207, |
|
"eval_runtime": 116.9766, |
|
"eval_samples_per_second": 882.869, |
|
"eval_steps_per_second": 55.182, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.135745851746852e-06, |
|
"loss": 3.3067, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"eval_loss": 3.3830504417419434, |
|
"eval_runtime": 115.9272, |
|
"eval_samples_per_second": 890.861, |
|
"eval_steps_per_second": 55.682, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"eval_loss": 3.370314598083496, |
|
"eval_runtime": 117.2105, |
|
"eval_samples_per_second": 881.107, |
|
"eval_steps_per_second": 55.072, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 4.069040273492872e-06, |
|
"loss": 3.311, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_loss": 3.369617223739624, |
|
"eval_runtime": 116.4339, |
|
"eval_samples_per_second": 886.984, |
|
"eval_steps_per_second": 55.439, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"eval_loss": 3.3768646717071533, |
|
"eval_runtime": 118.1326, |
|
"eval_samples_per_second": 874.23, |
|
"eval_steps_per_second": 54.642, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 4.0023346952388895e-06, |
|
"loss": 3.3048, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"eval_loss": 3.373983860015869, |
|
"eval_runtime": 118.2179, |
|
"eval_samples_per_second": 873.598, |
|
"eval_steps_per_second": 54.603, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"eval_loss": 3.3731493949890137, |
|
"eval_runtime": 116.9055, |
|
"eval_samples_per_second": 883.406, |
|
"eval_steps_per_second": 55.216, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 3.935629116984908e-06, |
|
"loss": 3.3055, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"eval_loss": 3.365483283996582, |
|
"eval_runtime": 117.1876, |
|
"eval_samples_per_second": 881.279, |
|
"eval_steps_per_second": 55.083, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"eval_loss": 3.3697094917297363, |
|
"eval_runtime": 117.1788, |
|
"eval_samples_per_second": 881.346, |
|
"eval_steps_per_second": 55.087, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.868923538730927e-06, |
|
"loss": 3.3105, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 3.3741800785064697, |
|
"eval_runtime": 116.7081, |
|
"eval_samples_per_second": 884.9, |
|
"eval_steps_per_second": 55.309, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"eval_loss": 3.3614203929901123, |
|
"eval_runtime": 118.1522, |
|
"eval_samples_per_second": 874.084, |
|
"eval_steps_per_second": 54.633, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.8022179604769453e-06, |
|
"loss": 3.2977, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 3.370495319366455, |
|
"eval_runtime": 117.0737, |
|
"eval_samples_per_second": 882.137, |
|
"eval_steps_per_second": 55.136, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 3.3746001720428467, |
|
"eval_runtime": 117.4262, |
|
"eval_samples_per_second": 879.489, |
|
"eval_steps_per_second": 54.971, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 3.735512382222964e-06, |
|
"loss": 3.2999, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 3.3690757751464844, |
|
"eval_runtime": 114.9601, |
|
"eval_samples_per_second": 898.355, |
|
"eval_steps_per_second": 56.15, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": 3.374530792236328, |
|
"eval_runtime": 115.3595, |
|
"eval_samples_per_second": 895.245, |
|
"eval_steps_per_second": 55.955, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 3.668806803968982e-06, |
|
"loss": 3.2983, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"eval_loss": 3.3717198371887207, |
|
"eval_runtime": 114.9666, |
|
"eval_samples_per_second": 898.304, |
|
"eval_steps_per_second": 56.147, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 3.368246555328369, |
|
"eval_runtime": 115.591, |
|
"eval_samples_per_second": 893.452, |
|
"eval_steps_per_second": 55.843, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 3.6021012257150007e-06, |
|
"loss": 3.2957, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"eval_loss": 3.369278907775879, |
|
"eval_runtime": 116.1156, |
|
"eval_samples_per_second": 889.416, |
|
"eval_steps_per_second": 55.591, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"eval_loss": 3.376443386077881, |
|
"eval_runtime": 114.7209, |
|
"eval_samples_per_second": 900.228, |
|
"eval_steps_per_second": 56.267, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 3.535395647461019e-06, |
|
"loss": 3.293, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"eval_loss": 3.3690662384033203, |
|
"eval_runtime": 114.9457, |
|
"eval_samples_per_second": 898.468, |
|
"eval_steps_per_second": 56.157, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"eval_loss": 3.380187511444092, |
|
"eval_runtime": 115.2975, |
|
"eval_samples_per_second": 895.726, |
|
"eval_steps_per_second": 55.986, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 3.468690069207038e-06, |
|
"loss": 3.2919, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"eval_loss": 3.3626480102539062, |
|
"eval_runtime": 115.0018, |
|
"eval_samples_per_second": 898.03, |
|
"eval_steps_per_second": 56.13, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"eval_loss": 3.3604438304901123, |
|
"eval_runtime": 116.2394, |
|
"eval_samples_per_second": 888.468, |
|
"eval_steps_per_second": 55.532, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 3.4019844909530565e-06, |
|
"loss": 3.3023, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_loss": 3.374943971633911, |
|
"eval_runtime": 115.4828, |
|
"eval_samples_per_second": 894.289, |
|
"eval_steps_per_second": 55.896, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_loss": 3.368828773498535, |
|
"eval_runtime": 114.8626, |
|
"eval_samples_per_second": 899.118, |
|
"eval_steps_per_second": 56.198, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 3.3352789126990747e-06, |
|
"loss": 3.2988, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"eval_loss": 3.3666255474090576, |
|
"eval_runtime": 115.7226, |
|
"eval_samples_per_second": 892.436, |
|
"eval_steps_per_second": 55.78, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 3.369481325149536, |
|
"eval_runtime": 116.2492, |
|
"eval_samples_per_second": 888.393, |
|
"eval_steps_per_second": 55.527, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 3.2685733344450933e-06, |
|
"loss": 3.2924, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"eval_loss": 3.364980697631836, |
|
"eval_runtime": 114.892, |
|
"eval_samples_per_second": 898.887, |
|
"eval_steps_per_second": 56.183, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"eval_loss": 3.3651351928710938, |
|
"eval_runtime": 114.7414, |
|
"eval_samples_per_second": 900.068, |
|
"eval_steps_per_second": 56.257, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 3.2018677561911115e-06, |
|
"loss": 3.2958, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"eval_loss": 3.369225263595581, |
|
"eval_runtime": 115.9526, |
|
"eval_samples_per_second": 890.666, |
|
"eval_steps_per_second": 55.669, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"eval_loss": 3.3855459690093994, |
|
"eval_runtime": 114.8307, |
|
"eval_samples_per_second": 899.367, |
|
"eval_steps_per_second": 56.213, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 3.1351621779371306e-06, |
|
"loss": 3.2918, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_loss": 3.3706300258636475, |
|
"eval_runtime": 115.344, |
|
"eval_samples_per_second": 895.365, |
|
"eval_steps_per_second": 55.963, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_loss": 3.3680288791656494, |
|
"eval_runtime": 114.7321, |
|
"eval_samples_per_second": 900.14, |
|
"eval_steps_per_second": 56.261, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 3.0684565996831487e-06, |
|
"loss": 3.2948, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_loss": 3.353415012359619, |
|
"eval_runtime": 116.4266, |
|
"eval_samples_per_second": 887.039, |
|
"eval_steps_per_second": 55.443, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"eval_loss": 3.369929790496826, |
|
"eval_runtime": 114.8306, |
|
"eval_samples_per_second": 899.369, |
|
"eval_steps_per_second": 56.213, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 3.0017510214291673e-06, |
|
"loss": 3.2996, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"eval_loss": 3.3732664585113525, |
|
"eval_runtime": 115.7005, |
|
"eval_samples_per_second": 892.607, |
|
"eval_steps_per_second": 55.791, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"eval_loss": 3.3764214515686035, |
|
"eval_runtime": 115.4981, |
|
"eval_samples_per_second": 894.171, |
|
"eval_steps_per_second": 55.888, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 2.9350454431751855e-06, |
|
"loss": 3.2999, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"eval_loss": 3.3792943954467773, |
|
"eval_runtime": 116.0913, |
|
"eval_samples_per_second": 889.602, |
|
"eval_steps_per_second": 55.603, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"eval_loss": 3.368272304534912, |
|
"eval_runtime": 116.0753, |
|
"eval_samples_per_second": 889.724, |
|
"eval_steps_per_second": 55.61, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 2.868339864921204e-06, |
|
"loss": 3.291, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"eval_loss": 3.3653597831726074, |
|
"eval_runtime": 115.5031, |
|
"eval_samples_per_second": 894.132, |
|
"eval_steps_per_second": 55.886, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_loss": 3.372131109237671, |
|
"eval_runtime": 115.6199, |
|
"eval_samples_per_second": 893.228, |
|
"eval_steps_per_second": 55.829, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 2.801634286667223e-06, |
|
"loss": 3.2952, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"eval_loss": 3.367438316345215, |
|
"eval_runtime": 115.0009, |
|
"eval_samples_per_second": 898.037, |
|
"eval_steps_per_second": 56.13, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"eval_loss": 3.3762009143829346, |
|
"eval_runtime": 115.4616, |
|
"eval_samples_per_second": 894.453, |
|
"eval_steps_per_second": 55.906, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 2.7349287084132413e-06, |
|
"loss": 3.2866, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"eval_loss": 3.3699355125427246, |
|
"eval_runtime": 114.9346, |
|
"eval_samples_per_second": 898.554, |
|
"eval_steps_per_second": 56.162, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 3.3690149784088135, |
|
"eval_runtime": 115.9293, |
|
"eval_samples_per_second": 890.845, |
|
"eval_steps_per_second": 55.681, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 2.66822313015926e-06, |
|
"loss": 3.2825, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 3.365321636199951, |
|
"eval_runtime": 114.9037, |
|
"eval_samples_per_second": 898.796, |
|
"eval_steps_per_second": 56.177, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 3.368727207183838, |
|
"eval_runtime": 115.3436, |
|
"eval_samples_per_second": 895.369, |
|
"eval_steps_per_second": 55.963, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.601517551905278e-06, |
|
"loss": 3.2825, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_loss": 3.3617701530456543, |
|
"eval_runtime": 115.7714, |
|
"eval_samples_per_second": 892.06, |
|
"eval_steps_per_second": 55.756, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_loss": 3.3609282970428467, |
|
"eval_runtime": 114.879, |
|
"eval_samples_per_second": 898.989, |
|
"eval_steps_per_second": 56.19, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 2.5348119736512967e-06, |
|
"loss": 3.2744, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"eval_loss": 3.3552184104919434, |
|
"eval_runtime": 114.6789, |
|
"eval_samples_per_second": 900.558, |
|
"eval_steps_per_second": 56.288, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"eval_loss": 3.3549087047576904, |
|
"eval_runtime": 116.3921, |
|
"eval_samples_per_second": 887.303, |
|
"eval_steps_per_second": 55.459, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 2.4681063953973154e-06, |
|
"loss": 3.2811, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"eval_loss": 3.3504152297973633, |
|
"eval_runtime": 115.0014, |
|
"eval_samples_per_second": 898.032, |
|
"eval_steps_per_second": 56.13, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 3.3574647903442383, |
|
"eval_runtime": 115.1236, |
|
"eval_samples_per_second": 897.079, |
|
"eval_steps_per_second": 56.07, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 2.4014008171433335e-06, |
|
"loss": 3.2672, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"eval_loss": 3.3587796688079834, |
|
"eval_runtime": 116.6416, |
|
"eval_samples_per_second": 885.404, |
|
"eval_steps_per_second": 55.34, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"eval_loss": 3.3559627532958984, |
|
"eval_runtime": 116.2457, |
|
"eval_samples_per_second": 888.42, |
|
"eval_steps_per_second": 55.529, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.334695238889352e-06, |
|
"loss": 3.2919, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 3.359805107116699, |
|
"eval_runtime": 115.5497, |
|
"eval_samples_per_second": 893.771, |
|
"eval_steps_per_second": 55.863, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"eval_loss": 3.344524383544922, |
|
"eval_runtime": 115.5133, |
|
"eval_samples_per_second": 894.053, |
|
"eval_steps_per_second": 55.881, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 2.2679896606353707e-06, |
|
"loss": 3.2724, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"eval_loss": 3.3516576290130615, |
|
"eval_runtime": 115.2664, |
|
"eval_samples_per_second": 895.968, |
|
"eval_steps_per_second": 56.001, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_loss": 3.359280824661255, |
|
"eval_runtime": 116.0103, |
|
"eval_samples_per_second": 890.223, |
|
"eval_steps_per_second": 55.642, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 2.2012840823813894e-06, |
|
"loss": 3.277, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"eval_loss": 3.3597874641418457, |
|
"eval_runtime": 114.9804, |
|
"eval_samples_per_second": 898.197, |
|
"eval_steps_per_second": 56.14, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"eval_loss": 3.345801591873169, |
|
"eval_runtime": 116.1901, |
|
"eval_samples_per_second": 888.845, |
|
"eval_steps_per_second": 55.555, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 2.134578504127408e-06, |
|
"loss": 3.2842, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"eval_loss": 3.3583106994628906, |
|
"eval_runtime": 114.8266, |
|
"eval_samples_per_second": 899.399, |
|
"eval_steps_per_second": 56.215, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"eval_loss": 3.3447749614715576, |
|
"eval_runtime": 114.9801, |
|
"eval_samples_per_second": 898.199, |
|
"eval_steps_per_second": 56.14, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 2.067872925873426e-06, |
|
"loss": 3.2758, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"eval_loss": 3.3593051433563232, |
|
"eval_runtime": 114.9092, |
|
"eval_samples_per_second": 898.753, |
|
"eval_steps_per_second": 56.175, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"eval_loss": 3.3551743030548096, |
|
"eval_runtime": 115.5179, |
|
"eval_samples_per_second": 894.017, |
|
"eval_steps_per_second": 55.879, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 2.0011673476194448e-06, |
|
"loss": 3.2684, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 3.371454954147339, |
|
"eval_runtime": 114.8944, |
|
"eval_samples_per_second": 898.869, |
|
"eval_steps_per_second": 56.182, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"eval_loss": 3.3543806076049805, |
|
"eval_runtime": 115.4862, |
|
"eval_samples_per_second": 894.263, |
|
"eval_steps_per_second": 55.894, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 1.9344617693654634e-06, |
|
"loss": 3.2924, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"eval_loss": 3.3514981269836426, |
|
"eval_runtime": 115.0356, |
|
"eval_samples_per_second": 897.766, |
|
"eval_steps_per_second": 56.113, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"eval_loss": 3.36460018157959, |
|
"eval_runtime": 115.4242, |
|
"eval_samples_per_second": 894.743, |
|
"eval_steps_per_second": 55.924, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 1.867756191111482e-06, |
|
"loss": 3.2673, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"eval_loss": 3.353806495666504, |
|
"eval_runtime": 115.3905, |
|
"eval_samples_per_second": 895.004, |
|
"eval_steps_per_second": 55.94, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"eval_loss": 3.3436896800994873, |
|
"eval_runtime": 114.7945, |
|
"eval_samples_per_second": 899.651, |
|
"eval_steps_per_second": 56.231, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 1.8010506128575004e-06, |
|
"loss": 3.2833, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"eval_loss": 3.3442821502685547, |
|
"eval_runtime": 116.1629, |
|
"eval_samples_per_second": 889.053, |
|
"eval_steps_per_second": 55.569, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 3.361924886703491, |
|
"eval_runtime": 116.4426, |
|
"eval_samples_per_second": 886.917, |
|
"eval_steps_per_second": 55.435, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 1.734345034603519e-06, |
|
"loss": 3.2636, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"eval_loss": 3.3510515689849854, |
|
"eval_runtime": 115.8529, |
|
"eval_samples_per_second": 891.432, |
|
"eval_steps_per_second": 55.717, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"eval_loss": 3.3447539806365967, |
|
"eval_runtime": 114.926, |
|
"eval_samples_per_second": 898.622, |
|
"eval_steps_per_second": 56.167, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 1.6676394563495374e-06, |
|
"loss": 3.2753, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"eval_loss": 3.355980396270752, |
|
"eval_runtime": 115.4649, |
|
"eval_samples_per_second": 894.427, |
|
"eval_steps_per_second": 55.904, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"eval_loss": 3.3524882793426514, |
|
"eval_runtime": 118.2786, |
|
"eval_samples_per_second": 873.151, |
|
"eval_steps_per_second": 54.575, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 1.6009338780955558e-06, |
|
"loss": 3.2701, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"eval_loss": 3.355792760848999, |
|
"eval_runtime": 115.0046, |
|
"eval_samples_per_second": 898.008, |
|
"eval_steps_per_second": 56.128, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"eval_loss": 3.3558590412139893, |
|
"eval_runtime": 115.5093, |
|
"eval_samples_per_second": 894.084, |
|
"eval_steps_per_second": 55.883, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 1.5342282998415744e-06, |
|
"loss": 3.2761, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"eval_loss": 3.3439648151397705, |
|
"eval_runtime": 114.8803, |
|
"eval_samples_per_second": 898.979, |
|
"eval_steps_per_second": 56.189, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"eval_loss": 3.3505825996398926, |
|
"eval_runtime": 115.5177, |
|
"eval_samples_per_second": 894.019, |
|
"eval_steps_per_second": 55.879, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 1.4675227215875928e-06, |
|
"loss": 3.2677, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"eval_loss": 3.3473587036132812, |
|
"eval_runtime": 115.2604, |
|
"eval_samples_per_second": 896.014, |
|
"eval_steps_per_second": 56.004, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"eval_loss": 3.3614845275878906, |
|
"eval_runtime": 114.7851, |
|
"eval_samples_per_second": 899.724, |
|
"eval_steps_per_second": 56.236, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 1.4008171433336116e-06, |
|
"loss": 3.2614, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"eval_loss": 3.350660562515259, |
|
"eval_runtime": 116.1258, |
|
"eval_samples_per_second": 889.337, |
|
"eval_steps_per_second": 55.586, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_loss": 3.34436297416687, |
|
"eval_runtime": 114.7641, |
|
"eval_samples_per_second": 899.89, |
|
"eval_steps_per_second": 56.246, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 1.33411156507963e-06, |
|
"loss": 3.2608, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"eval_loss": 3.352665901184082, |
|
"eval_runtime": 114.9595, |
|
"eval_samples_per_second": 898.36, |
|
"eval_steps_per_second": 56.15, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"eval_loss": 3.3398256301879883, |
|
"eval_runtime": 114.8716, |
|
"eval_samples_per_second": 899.047, |
|
"eval_steps_per_second": 56.193, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 1.2674059868256484e-06, |
|
"loss": 3.2643, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"eval_loss": 3.3497581481933594, |
|
"eval_runtime": 115.3741, |
|
"eval_samples_per_second": 895.132, |
|
"eval_steps_per_second": 55.948, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"eval_loss": 3.3348639011383057, |
|
"eval_runtime": 114.8223, |
|
"eval_samples_per_second": 899.434, |
|
"eval_steps_per_second": 56.217, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 1.2007004085716668e-06, |
|
"loss": 3.2721, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"eval_loss": 3.356008291244507, |
|
"eval_runtime": 115.5116, |
|
"eval_samples_per_second": 894.066, |
|
"eval_steps_per_second": 55.882, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"eval_loss": 3.3421435356140137, |
|
"eval_runtime": 115.5912, |
|
"eval_samples_per_second": 893.45, |
|
"eval_steps_per_second": 55.843, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 1.1339948303176854e-06, |
|
"loss": 3.266, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"eval_loss": 3.342872142791748, |
|
"eval_runtime": 115.0319, |
|
"eval_samples_per_second": 897.794, |
|
"eval_steps_per_second": 56.115, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"eval_loss": 3.337078809738159, |
|
"eval_runtime": 114.7057, |
|
"eval_samples_per_second": 900.347, |
|
"eval_steps_per_second": 56.274, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 1.067289252063704e-06, |
|
"loss": 3.2551, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"eval_loss": 3.340388774871826, |
|
"eval_runtime": 115.5719, |
|
"eval_samples_per_second": 893.599, |
|
"eval_steps_per_second": 55.853, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"eval_loss": 3.349374771118164, |
|
"eval_runtime": 116.2218, |
|
"eval_samples_per_second": 888.603, |
|
"eval_steps_per_second": 55.54, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 1.0005836738097224e-06, |
|
"loss": 3.26, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"eval_loss": 3.3389031887054443, |
|
"eval_runtime": 115.0165, |
|
"eval_samples_per_second": 897.915, |
|
"eval_steps_per_second": 56.122, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"eval_loss": 3.345613718032837, |
|
"eval_runtime": 114.2481, |
|
"eval_samples_per_second": 903.954, |
|
"eval_steps_per_second": 56.5, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 9.33878095555741e-07, |
|
"loss": 3.2528, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"eval_loss": 3.3248987197875977, |
|
"eval_runtime": 115.0558, |
|
"eval_samples_per_second": 897.608, |
|
"eval_steps_per_second": 56.103, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"eval_loss": 3.3452157974243164, |
|
"eval_runtime": 116.2164, |
|
"eval_samples_per_second": 888.644, |
|
"eval_steps_per_second": 55.543, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 8.671725173017595e-07, |
|
"loss": 3.2602, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_loss": 3.33760929107666, |
|
"eval_runtime": 116.1157, |
|
"eval_samples_per_second": 889.414, |
|
"eval_steps_per_second": 55.591, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_loss": 3.351128101348877, |
|
"eval_runtime": 114.6575, |
|
"eval_samples_per_second": 900.726, |
|
"eval_steps_per_second": 56.298, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 8.004669390477779e-07, |
|
"loss": 3.2492, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 3.347473621368408, |
|
"eval_runtime": 115.2092, |
|
"eval_samples_per_second": 896.413, |
|
"eval_steps_per_second": 56.029, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"eval_loss": 3.349674940109253, |
|
"eval_runtime": 115.6497, |
|
"eval_samples_per_second": 892.998, |
|
"eval_steps_per_second": 55.815, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 7.337613607937964e-07, |
|
"loss": 3.2469, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 3.3378491401672363, |
|
"eval_runtime": 114.9296, |
|
"eval_samples_per_second": 898.594, |
|
"eval_steps_per_second": 56.165, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"eval_loss": 3.332571029663086, |
|
"eval_runtime": 115.4244, |
|
"eval_samples_per_second": 894.742, |
|
"eval_steps_per_second": 55.924, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 6.67055782539815e-07, |
|
"loss": 3.2589, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 3.3277342319488525, |
|
"eval_runtime": 114.9762, |
|
"eval_samples_per_second": 898.229, |
|
"eval_steps_per_second": 56.142, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"eval_loss": 3.3456978797912598, |
|
"eval_runtime": 116.0675, |
|
"eval_samples_per_second": 889.784, |
|
"eval_steps_per_second": 55.614, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 6.003502042858334e-07, |
|
"loss": 3.2548, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_loss": 3.334270715713501, |
|
"eval_runtime": 115.7666, |
|
"eval_samples_per_second": 892.097, |
|
"eval_steps_per_second": 55.759, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"eval_loss": 3.3362197875976562, |
|
"eval_runtime": 115.5031, |
|
"eval_samples_per_second": 894.132, |
|
"eval_steps_per_second": 55.886, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 5.33644626031852e-07, |
|
"loss": 3.2589, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"eval_loss": 3.343080997467041, |
|
"eval_runtime": 115.3187, |
|
"eval_samples_per_second": 895.561, |
|
"eval_steps_per_second": 55.975, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"eval_loss": 3.3428003787994385, |
|
"eval_runtime": 115.3186, |
|
"eval_samples_per_second": 895.563, |
|
"eval_steps_per_second": 55.975, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 4.669390477778705e-07, |
|
"loss": 3.2674, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_loss": 3.3400795459747314, |
|
"eval_runtime": 114.7905, |
|
"eval_samples_per_second": 899.682, |
|
"eval_steps_per_second": 56.233, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"eval_loss": 3.337498903274536, |
|
"eval_runtime": 114.9489, |
|
"eval_samples_per_second": 898.443, |
|
"eval_steps_per_second": 56.155, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 4.0023346952388894e-07, |
|
"loss": 3.2561, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"eval_loss": 3.3333868980407715, |
|
"eval_runtime": 114.8393, |
|
"eval_samples_per_second": 899.3, |
|
"eval_steps_per_second": 56.209, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"eval_loss": 3.3320717811584473, |
|
"eval_runtime": 115.0159, |
|
"eval_samples_per_second": 897.919, |
|
"eval_steps_per_second": 56.123, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 3.335278912699075e-07, |
|
"loss": 3.2452, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"eval_loss": 3.3445632457733154, |
|
"eval_runtime": 114.9617, |
|
"eval_samples_per_second": 898.342, |
|
"eval_steps_per_second": 56.149, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_loss": 3.3525032997131348, |
|
"eval_runtime": 116.2145, |
|
"eval_samples_per_second": 888.659, |
|
"eval_steps_per_second": 55.544, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 2.66822313015926e-07, |
|
"loss": 3.259, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"eval_loss": 3.331772804260254, |
|
"eval_runtime": 115.4929, |
|
"eval_samples_per_second": 894.211, |
|
"eval_steps_per_second": 55.891, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"eval_loss": 3.3451852798461914, |
|
"eval_runtime": 115.1546, |
|
"eval_samples_per_second": 896.838, |
|
"eval_steps_per_second": 56.055, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 2.0011673476194447e-07, |
|
"loss": 3.2494, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"eval_loss": 3.335479497909546, |
|
"eval_runtime": 114.4583, |
|
"eval_samples_per_second": 902.293, |
|
"eval_steps_per_second": 56.396, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"eval_loss": 3.3322434425354004, |
|
"eval_runtime": 116.1476, |
|
"eval_samples_per_second": 889.17, |
|
"eval_steps_per_second": 55.576, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 1.33411156507963e-07, |
|
"loss": 3.2558, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"eval_loss": 3.325453281402588, |
|
"eval_runtime": 114.8662, |
|
"eval_samples_per_second": 899.089, |
|
"eval_steps_per_second": 56.196, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"eval_loss": 3.3329989910125732, |
|
"eval_runtime": 117.9929, |
|
"eval_samples_per_second": 875.265, |
|
"eval_steps_per_second": 54.707, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 6.67055782539815e-08, |
|
"loss": 3.2436, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"eval_loss": 3.3357789516448975, |
|
"eval_runtime": 117.7235, |
|
"eval_samples_per_second": 877.268, |
|
"eval_steps_per_second": 54.832, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"eval_loss": 3.3287487030029297, |
|
"eval_runtime": 115.6745, |
|
"eval_samples_per_second": 892.807, |
|
"eval_steps_per_second": 55.803, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.0, |
|
"loss": 3.2545, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_loss": 3.3321266174316406, |
|
"eval_runtime": 115.8716, |
|
"eval_samples_per_second": 891.289, |
|
"eval_steps_per_second": 55.708, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"step": 2400000, |
|
"total_flos": 6.9600759359113e+17, |
|
"train_loss": 3.268406458333333, |
|
"train_runtime": 194422.9949, |
|
"train_samples_per_second": 197.508, |
|
"train_steps_per_second": 12.344 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 20, |
|
"save_steps": 32000, |
|
"total_flos": 6.9600759359113e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|