|
{ |
|
"best_metric": 2.7100777626037598, |
|
"best_model_checkpoint": "./model_tweets_2020_Q2/checkpoint-192000", |
|
"epoch": 19.569471624266146, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.977957248687744, |
|
"eval_runtime": 125.5213, |
|
"eval_samples_per_second": 822.769, |
|
"eval_steps_per_second": 51.426, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.939131159843243e-06, |
|
"loss": 3.1296, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.894831418991089, |
|
"eval_runtime": 126.0129, |
|
"eval_samples_per_second": 819.559, |
|
"eval_steps_per_second": 51.225, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.8589611053466797, |
|
"eval_runtime": 125.8909, |
|
"eval_samples_per_second": 820.353, |
|
"eval_steps_per_second": 51.275, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.872425581589261e-06, |
|
"loss": 2.9018, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 2.8033480644226074, |
|
"eval_runtime": 125.7264, |
|
"eval_samples_per_second": 821.427, |
|
"eval_steps_per_second": 51.342, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.7938032150268555, |
|
"eval_runtime": 125.7192, |
|
"eval_samples_per_second": 821.474, |
|
"eval_steps_per_second": 51.345, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.80572000333528e-06, |
|
"loss": 2.8331, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.7694976329803467, |
|
"eval_runtime": 127.0405, |
|
"eval_samples_per_second": 812.93, |
|
"eval_steps_per_second": 50.811, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.7614457607269287, |
|
"eval_runtime": 125.7185, |
|
"eval_samples_per_second": 821.478, |
|
"eval_steps_per_second": 51.345, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.739014425081299e-06, |
|
"loss": 2.7723, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.7416625022888184, |
|
"eval_runtime": 126.1624, |
|
"eval_samples_per_second": 818.588, |
|
"eval_steps_per_second": 51.164, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.7248806953430176, |
|
"eval_runtime": 126.0454, |
|
"eval_samples_per_second": 819.348, |
|
"eval_steps_per_second": 51.212, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.672308846827316e-06, |
|
"loss": 2.75, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.7202229499816895, |
|
"eval_runtime": 126.948, |
|
"eval_samples_per_second": 813.522, |
|
"eval_steps_per_second": 50.848, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.7112038135528564, |
|
"eval_runtime": 126.8524, |
|
"eval_samples_per_second": 814.135, |
|
"eval_steps_per_second": 50.886, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.605603268573334e-06, |
|
"loss": 2.735, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 2.7228710651397705, |
|
"eval_runtime": 126.981, |
|
"eval_samples_per_second": 813.311, |
|
"eval_steps_per_second": 50.834, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.7370951175689697, |
|
"eval_runtime": 126.6893, |
|
"eval_samples_per_second": 815.183, |
|
"eval_steps_per_second": 50.951, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.538897690319354e-06, |
|
"loss": 2.7137, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 2.7059037685394287, |
|
"eval_runtime": 126.3306, |
|
"eval_samples_per_second": 817.498, |
|
"eval_steps_per_second": 51.096, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.7120730876922607, |
|
"eval_runtime": 126.2744, |
|
"eval_samples_per_second": 817.862, |
|
"eval_steps_per_second": 51.119, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.472192112065373e-06, |
|
"loss": 2.7155, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 2.7248668670654297, |
|
"eval_runtime": 126.8126, |
|
"eval_samples_per_second": 814.391, |
|
"eval_steps_per_second": 50.902, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.7130985260009766, |
|
"eval_runtime": 126.6262, |
|
"eval_samples_per_second": 815.589, |
|
"eval_steps_per_second": 50.977, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.405486533811392e-06, |
|
"loss": 2.7152, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.6999881267547607, |
|
"eval_runtime": 126.4279, |
|
"eval_samples_per_second": 816.868, |
|
"eval_steps_per_second": 51.057, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.703012704849243, |
|
"eval_runtime": 126.2932, |
|
"eval_samples_per_second": 817.74, |
|
"eval_steps_per_second": 51.111, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.338780955557409e-06, |
|
"loss": 2.7151, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 2.721385955810547, |
|
"eval_runtime": 126.566, |
|
"eval_samples_per_second": 815.977, |
|
"eval_steps_per_second": 51.001, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 2.707641839981079, |
|
"eval_runtime": 126.5896, |
|
"eval_samples_per_second": 815.826, |
|
"eval_steps_per_second": 50.992, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.272075377303427e-06, |
|
"loss": 2.7166, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.7106387615203857, |
|
"eval_runtime": 126.9356, |
|
"eval_samples_per_second": 813.602, |
|
"eval_steps_per_second": 50.853, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 2.719717025756836, |
|
"eval_runtime": 127.5317, |
|
"eval_samples_per_second": 809.798, |
|
"eval_steps_per_second": 50.615, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.205369799049446e-06, |
|
"loss": 2.7144, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 2.7100777626037598, |
|
"eval_runtime": 126.3318, |
|
"eval_samples_per_second": 817.49, |
|
"eval_steps_per_second": 51.096, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 2.723472833633423, |
|
"eval_runtime": 127.1568, |
|
"eval_samples_per_second": 812.186, |
|
"eval_steps_per_second": 50.764, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 9.138664220795464e-06, |
|
"loss": 2.7179, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 2.706564426422119, |
|
"eval_runtime": 127.089, |
|
"eval_samples_per_second": 812.62, |
|
"eval_steps_per_second": 50.791, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 2.7282984256744385, |
|
"eval_runtime": 127.4927, |
|
"eval_samples_per_second": 810.047, |
|
"eval_steps_per_second": 50.63, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.071958642541483e-06, |
|
"loss": 2.7231, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 2.7203216552734375, |
|
"eval_runtime": 127.4298, |
|
"eval_samples_per_second": 810.446, |
|
"eval_steps_per_second": 50.655, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.711085319519043, |
|
"eval_runtime": 126.4739, |
|
"eval_samples_per_second": 816.571, |
|
"eval_steps_per_second": 51.038, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.005253064287502e-06, |
|
"loss": 2.7284, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 2.721714973449707, |
|
"eval_runtime": 126.401, |
|
"eval_samples_per_second": 817.043, |
|
"eval_steps_per_second": 51.068, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.725090265274048, |
|
"eval_runtime": 127.0199, |
|
"eval_samples_per_second": 813.061, |
|
"eval_steps_per_second": 50.819, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.93854748603352e-06, |
|
"loss": 2.7242, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.718090057373047, |
|
"eval_runtime": 127.9402, |
|
"eval_samples_per_second": 807.213, |
|
"eval_steps_per_second": 50.453, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 2.723750591278076, |
|
"eval_runtime": 127.1287, |
|
"eval_samples_per_second": 812.366, |
|
"eval_steps_per_second": 50.775, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.871841907779539e-06, |
|
"loss": 2.7171, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.748772144317627, |
|
"eval_runtime": 128.6406, |
|
"eval_samples_per_second": 802.818, |
|
"eval_steps_per_second": 50.179, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 2.731541633605957, |
|
"eval_runtime": 127.5368, |
|
"eval_samples_per_second": 809.766, |
|
"eval_steps_per_second": 50.613, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.805136329525557e-06, |
|
"loss": 2.7312, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 2.746854305267334, |
|
"eval_runtime": 127.6128, |
|
"eval_samples_per_second": 809.284, |
|
"eval_steps_per_second": 50.583, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 2.7363078594207764, |
|
"eval_runtime": 127.9259, |
|
"eval_samples_per_second": 807.303, |
|
"eval_steps_per_second": 50.459, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.738430751271576e-06, |
|
"loss": 2.7386, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 2.7398250102996826, |
|
"eval_runtime": 127.3013, |
|
"eval_samples_per_second": 811.264, |
|
"eval_steps_per_second": 50.706, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 2.747743844985962, |
|
"eval_runtime": 127.9865, |
|
"eval_samples_per_second": 806.921, |
|
"eval_steps_per_second": 50.435, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.671725173017595e-06, |
|
"loss": 2.7457, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 2.753558397293091, |
|
"eval_runtime": 128.9208, |
|
"eval_samples_per_second": 801.073, |
|
"eval_steps_per_second": 50.069, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 2.748337984085083, |
|
"eval_runtime": 128.758, |
|
"eval_samples_per_second": 802.086, |
|
"eval_steps_per_second": 50.133, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.605019594763613e-06, |
|
"loss": 2.7496, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 2.752856969833374, |
|
"eval_runtime": 128.3684, |
|
"eval_samples_per_second": 804.521, |
|
"eval_steps_per_second": 50.285, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 2.749178171157837, |
|
"eval_runtime": 129.8422, |
|
"eval_samples_per_second": 795.388, |
|
"eval_steps_per_second": 49.714, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.538314016509632e-06, |
|
"loss": 2.7521, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 2.761200189590454, |
|
"eval_runtime": 127.6309, |
|
"eval_samples_per_second": 809.169, |
|
"eval_steps_per_second": 50.576, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 2.7700963020324707, |
|
"eval_runtime": 128.3946, |
|
"eval_samples_per_second": 804.356, |
|
"eval_steps_per_second": 50.275, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.471608438255649e-06, |
|
"loss": 2.7649, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.7705161571502686, |
|
"eval_runtime": 128.8577, |
|
"eval_samples_per_second": 801.466, |
|
"eval_steps_per_second": 50.094, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 2.782761335372925, |
|
"eval_runtime": 129.17, |
|
"eval_samples_per_second": 799.528, |
|
"eval_steps_per_second": 49.973, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.404902860001667e-06, |
|
"loss": 2.7516, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 2.7680482864379883, |
|
"eval_runtime": 128.9028, |
|
"eval_samples_per_second": 801.185, |
|
"eval_steps_per_second": 50.077, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 2.784294605255127, |
|
"eval_runtime": 128.4737, |
|
"eval_samples_per_second": 803.861, |
|
"eval_steps_per_second": 50.244, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.338197281747686e-06, |
|
"loss": 2.762, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 2.7915961742401123, |
|
"eval_runtime": 128.2651, |
|
"eval_samples_per_second": 805.168, |
|
"eval_steps_per_second": 50.325, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 2.7691826820373535, |
|
"eval_runtime": 128.6705, |
|
"eval_samples_per_second": 802.632, |
|
"eval_steps_per_second": 50.167, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 8.271491703493705e-06, |
|
"loss": 2.7789, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 2.783369302749634, |
|
"eval_runtime": 128.6603, |
|
"eval_samples_per_second": 802.695, |
|
"eval_steps_per_second": 50.171, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 2.7788405418395996, |
|
"eval_runtime": 129.7209, |
|
"eval_samples_per_second": 796.132, |
|
"eval_steps_per_second": 49.761, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 8.204786125239725e-06, |
|
"loss": 2.7879, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 2.803699493408203, |
|
"eval_runtime": 128.2575, |
|
"eval_samples_per_second": 805.216, |
|
"eval_steps_per_second": 50.328, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_loss": 2.791905403137207, |
|
"eval_runtime": 129.4159, |
|
"eval_samples_per_second": 798.009, |
|
"eval_steps_per_second": 49.878, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 8.138080546985743e-06, |
|
"loss": 2.7853, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 2.8077127933502197, |
|
"eval_runtime": 127.9753, |
|
"eval_samples_per_second": 806.992, |
|
"eval_steps_per_second": 50.439, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 2.7903032302856445, |
|
"eval_runtime": 128.9005, |
|
"eval_samples_per_second": 801.2, |
|
"eval_steps_per_second": 50.077, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 8.07137496873176e-06, |
|
"loss": 2.7976, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 2.810896158218384, |
|
"eval_runtime": 129.0626, |
|
"eval_samples_per_second": 800.193, |
|
"eval_steps_per_second": 50.014, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 2.795713424682617, |
|
"eval_runtime": 128.0638, |
|
"eval_samples_per_second": 806.434, |
|
"eval_steps_per_second": 50.405, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.004669390477779e-06, |
|
"loss": 2.789, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.8023178577423096, |
|
"eval_runtime": 128.1962, |
|
"eval_samples_per_second": 805.601, |
|
"eval_steps_per_second": 50.353, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 2.8125839233398438, |
|
"eval_runtime": 128.7992, |
|
"eval_samples_per_second": 801.83, |
|
"eval_steps_per_second": 50.117, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 7.937963812223798e-06, |
|
"loss": 2.8089, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.815424919128418, |
|
"eval_runtime": 128.7985, |
|
"eval_samples_per_second": 801.834, |
|
"eval_steps_per_second": 50.117, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.8122923374176025, |
|
"eval_runtime": 127.4092, |
|
"eval_samples_per_second": 810.577, |
|
"eval_steps_per_second": 50.664, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 7.871258233969816e-06, |
|
"loss": 2.7915, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 2.8145976066589355, |
|
"eval_runtime": 128.9266, |
|
"eval_samples_per_second": 801.037, |
|
"eval_steps_per_second": 50.067, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.8249683380126953, |
|
"eval_runtime": 129.1348, |
|
"eval_samples_per_second": 799.746, |
|
"eval_steps_per_second": 49.987, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 7.804552655715835e-06, |
|
"loss": 2.8094, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.820560932159424, |
|
"eval_runtime": 129.6096, |
|
"eval_samples_per_second": 796.816, |
|
"eval_steps_per_second": 49.803, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 2.818159341812134, |
|
"eval_runtime": 128.5096, |
|
"eval_samples_per_second": 803.637, |
|
"eval_steps_per_second": 50.23, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 7.737847077461853e-06, |
|
"loss": 2.8196, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 2.8351361751556396, |
|
"eval_runtime": 129.1287, |
|
"eval_samples_per_second": 799.783, |
|
"eval_steps_per_second": 49.989, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 2.839430570602417, |
|
"eval_runtime": 129.5203, |
|
"eval_samples_per_second": 797.365, |
|
"eval_steps_per_second": 49.838, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.671141499207872e-06, |
|
"loss": 2.8316, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 2.8396623134613037, |
|
"eval_runtime": 128.6713, |
|
"eval_samples_per_second": 802.627, |
|
"eval_steps_per_second": 50.167, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 2.8402562141418457, |
|
"eval_runtime": 128.654, |
|
"eval_samples_per_second": 802.735, |
|
"eval_steps_per_second": 50.173, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 7.604435920953891e-06, |
|
"loss": 2.8444, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_loss": 2.8350980281829834, |
|
"eval_runtime": 129.3424, |
|
"eval_samples_per_second": 798.462, |
|
"eval_steps_per_second": 49.906, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 2.8574254512786865, |
|
"eval_runtime": 129.6206, |
|
"eval_samples_per_second": 796.748, |
|
"eval_steps_per_second": 49.799, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 7.537730342699909e-06, |
|
"loss": 2.833, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_loss": 2.86171293258667, |
|
"eval_runtime": 129.2684, |
|
"eval_samples_per_second": 798.919, |
|
"eval_steps_per_second": 49.935, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_loss": 2.857750654220581, |
|
"eval_runtime": 128.5027, |
|
"eval_samples_per_second": 803.679, |
|
"eval_steps_per_second": 50.232, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.471024764445928e-06, |
|
"loss": 2.839, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 2.8577184677124023, |
|
"eval_runtime": 128.7081, |
|
"eval_samples_per_second": 802.397, |
|
"eval_steps_per_second": 50.152, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 2.8726649284362793, |
|
"eval_runtime": 128.6474, |
|
"eval_samples_per_second": 802.776, |
|
"eval_steps_per_second": 50.176, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 7.4043191861919465e-06, |
|
"loss": 2.8427, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 2.858550786972046, |
|
"eval_runtime": 129.0947, |
|
"eval_samples_per_second": 799.994, |
|
"eval_steps_per_second": 50.002, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 2.880849599838257, |
|
"eval_runtime": 128.221, |
|
"eval_samples_per_second": 805.445, |
|
"eval_steps_per_second": 50.343, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 7.337613607937964e-06, |
|
"loss": 2.8599, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 2.8959789276123047, |
|
"eval_runtime": 129.9831, |
|
"eval_samples_per_second": 794.527, |
|
"eval_steps_per_second": 49.66, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 2.8883421421051025, |
|
"eval_runtime": 129.4941, |
|
"eval_samples_per_second": 797.527, |
|
"eval_steps_per_second": 49.848, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 7.270908029683983e-06, |
|
"loss": 2.8694, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_loss": 2.8884825706481934, |
|
"eval_runtime": 129.3172, |
|
"eval_samples_per_second": 798.618, |
|
"eval_steps_per_second": 49.916, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 2.887291431427002, |
|
"eval_runtime": 129.1298, |
|
"eval_samples_per_second": 799.777, |
|
"eval_steps_per_second": 49.988, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 7.2042024514300015e-06, |
|
"loss": 2.8626, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 2.8929550647735596, |
|
"eval_runtime": 129.4886, |
|
"eval_samples_per_second": 797.56, |
|
"eval_steps_per_second": 49.85, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 2.8987772464752197, |
|
"eval_runtime": 129.8683, |
|
"eval_samples_per_second": 795.229, |
|
"eval_steps_per_second": 49.704, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 7.13749687317602e-06, |
|
"loss": 2.8921, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 2.9117259979248047, |
|
"eval_runtime": 128.3205, |
|
"eval_samples_per_second": 804.821, |
|
"eval_steps_per_second": 50.304, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 2.912231206893921, |
|
"eval_runtime": 128.7871, |
|
"eval_samples_per_second": 801.905, |
|
"eval_steps_per_second": 50.121, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 7.070791294922038e-06, |
|
"loss": 2.8884, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 2.900118827819824, |
|
"eval_runtime": 130.1834, |
|
"eval_samples_per_second": 793.304, |
|
"eval_steps_per_second": 49.584, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_loss": 2.9093644618988037, |
|
"eval_runtime": 129.4918, |
|
"eval_samples_per_second": 797.541, |
|
"eval_steps_per_second": 49.849, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 7.0040857166680564e-06, |
|
"loss": 2.8974, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_loss": 2.9110264778137207, |
|
"eval_runtime": 129.9051, |
|
"eval_samples_per_second": 795.003, |
|
"eval_steps_per_second": 49.69, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 2.9044594764709473, |
|
"eval_runtime": 129.2324, |
|
"eval_samples_per_second": 799.141, |
|
"eval_steps_per_second": 49.949, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.937380138414076e-06, |
|
"loss": 2.903, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.933678388595581, |
|
"eval_runtime": 130.3644, |
|
"eval_samples_per_second": 792.202, |
|
"eval_steps_per_second": 49.515, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_loss": 2.931581735610962, |
|
"eval_runtime": 128.3976, |
|
"eval_samples_per_second": 804.337, |
|
"eval_steps_per_second": 50.274, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 6.8706745601600945e-06, |
|
"loss": 2.9057, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.944746971130371, |
|
"eval_runtime": 128.9912, |
|
"eval_samples_per_second": 800.636, |
|
"eval_steps_per_second": 50.042, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.936281681060791, |
|
"eval_runtime": 129.9533, |
|
"eval_samples_per_second": 794.709, |
|
"eval_steps_per_second": 49.672, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 6.803968981906113e-06, |
|
"loss": 2.9146, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 2.943751096725464, |
|
"eval_runtime": 129.9494, |
|
"eval_samples_per_second": 794.732, |
|
"eval_steps_per_second": 49.673, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.9474806785583496, |
|
"eval_runtime": 130.0993, |
|
"eval_samples_per_second": 793.817, |
|
"eval_steps_per_second": 49.616, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 6.737263403652131e-06, |
|
"loss": 2.9221, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 2.9394171237945557, |
|
"eval_runtime": 129.1928, |
|
"eval_samples_per_second": 799.387, |
|
"eval_steps_per_second": 49.964, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_loss": 2.937087297439575, |
|
"eval_runtime": 129.9118, |
|
"eval_samples_per_second": 794.963, |
|
"eval_steps_per_second": 49.688, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.6705578253981495e-06, |
|
"loss": 2.9316, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 2.949429512023926, |
|
"eval_runtime": 129.8602, |
|
"eval_samples_per_second": 795.278, |
|
"eval_steps_per_second": 49.707, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 2.9727399349212646, |
|
"eval_runtime": 130.9441, |
|
"eval_samples_per_second": 788.695, |
|
"eval_steps_per_second": 49.296, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 6.603852247144168e-06, |
|
"loss": 2.9421, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 2.9758830070495605, |
|
"eval_runtime": 129.8861, |
|
"eval_samples_per_second": 795.12, |
|
"eval_steps_per_second": 49.697, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 2.966480016708374, |
|
"eval_runtime": 129.44, |
|
"eval_samples_per_second": 797.86, |
|
"eval_steps_per_second": 49.869, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 6.537146668890187e-06, |
|
"loss": 2.9538, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 2.9650251865386963, |
|
"eval_runtime": 129.4919, |
|
"eval_samples_per_second": 797.54, |
|
"eval_steps_per_second": 49.849, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_loss": 2.976144313812256, |
|
"eval_runtime": 129.8294, |
|
"eval_samples_per_second": 795.467, |
|
"eval_steps_per_second": 49.719, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 6.4704410906362044e-06, |
|
"loss": 2.9594, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 2.990086317062378, |
|
"eval_runtime": 129.827, |
|
"eval_samples_per_second": 795.482, |
|
"eval_steps_per_second": 49.72, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 2.973181962966919, |
|
"eval_runtime": 131.5126, |
|
"eval_samples_per_second": 785.286, |
|
"eval_steps_per_second": 49.083, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 6.403735512382223e-06, |
|
"loss": 2.9564, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 2.9896528720855713, |
|
"eval_runtime": 129.878, |
|
"eval_samples_per_second": 795.169, |
|
"eval_steps_per_second": 49.7, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 2.980059862136841, |
|
"eval_runtime": 129.5351, |
|
"eval_samples_per_second": 797.274, |
|
"eval_steps_per_second": 49.832, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 6.337029934128242e-06, |
|
"loss": 2.9561, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_loss": 2.983869791030884, |
|
"eval_runtime": 130.0357, |
|
"eval_samples_per_second": 794.205, |
|
"eval_steps_per_second": 49.64, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 2.9887585639953613, |
|
"eval_runtime": 130.015, |
|
"eval_samples_per_second": 794.331, |
|
"eval_steps_per_second": 49.648, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 6.270324355874261e-06, |
|
"loss": 2.9669, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 2.99999737739563, |
|
"eval_runtime": 130.6345, |
|
"eval_samples_per_second": 790.564, |
|
"eval_steps_per_second": 49.413, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"eval_loss": 2.9786183834075928, |
|
"eval_runtime": 129.9739, |
|
"eval_samples_per_second": 794.582, |
|
"eval_steps_per_second": 49.664, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 6.20361877762028e-06, |
|
"loss": 2.9649, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 2.994581460952759, |
|
"eval_runtime": 131.0156, |
|
"eval_samples_per_second": 788.265, |
|
"eval_steps_per_second": 49.269, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 3.0002031326293945, |
|
"eval_runtime": 131.7355, |
|
"eval_samples_per_second": 783.957, |
|
"eval_steps_per_second": 49.0, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 6.1369131993662975e-06, |
|
"loss": 2.9665, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 2.9960474967956543, |
|
"eval_runtime": 131.6559, |
|
"eval_samples_per_second": 784.431, |
|
"eval_steps_per_second": 49.029, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 3.0067989826202393, |
|
"eval_runtime": 131.8152, |
|
"eval_samples_per_second": 783.483, |
|
"eval_steps_per_second": 48.97, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.070207621112316e-06, |
|
"loss": 2.9708, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 2.993788242340088, |
|
"eval_runtime": 130.3799, |
|
"eval_samples_per_second": 792.108, |
|
"eval_steps_per_second": 49.509, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"eval_loss": 3.0126230716705322, |
|
"eval_runtime": 130.4447, |
|
"eval_samples_per_second": 791.715, |
|
"eval_steps_per_second": 49.485, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.003502042858335e-06, |
|
"loss": 2.981, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 2.9959194660186768, |
|
"eval_runtime": 132.0738, |
|
"eval_samples_per_second": 781.949, |
|
"eval_steps_per_second": 48.874, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_loss": 2.995976448059082, |
|
"eval_runtime": 130.9412, |
|
"eval_samples_per_second": 788.713, |
|
"eval_steps_per_second": 49.297, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 5.936796464604353e-06, |
|
"loss": 2.9805, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"eval_loss": 2.991947889328003, |
|
"eval_runtime": 130.0819, |
|
"eval_samples_per_second": 793.923, |
|
"eval_steps_per_second": 49.623, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 3.0058255195617676, |
|
"eval_runtime": 130.7007, |
|
"eval_samples_per_second": 790.164, |
|
"eval_steps_per_second": 49.388, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.870090886350371e-06, |
|
"loss": 2.9705, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"eval_loss": 3.0232017040252686, |
|
"eval_runtime": 129.9163, |
|
"eval_samples_per_second": 794.935, |
|
"eval_steps_per_second": 49.686, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 3.0046939849853516, |
|
"eval_runtime": 130.7903, |
|
"eval_samples_per_second": 789.623, |
|
"eval_steps_per_second": 49.354, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 5.80338530809639e-06, |
|
"loss": 2.9715, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 3.0068600177764893, |
|
"eval_runtime": 131.6119, |
|
"eval_samples_per_second": 784.693, |
|
"eval_steps_per_second": 49.046, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"eval_loss": 3.0018742084503174, |
|
"eval_runtime": 131.7567, |
|
"eval_samples_per_second": 783.831, |
|
"eval_steps_per_second": 48.992, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 5.736679729842408e-06, |
|
"loss": 2.9695, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 3.021596670150757, |
|
"eval_runtime": 131.2334, |
|
"eval_samples_per_second": 786.956, |
|
"eval_steps_per_second": 49.187, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_loss": 3.0219063758850098, |
|
"eval_runtime": 131.6228, |
|
"eval_samples_per_second": 784.629, |
|
"eval_steps_per_second": 49.042, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 5.669974151588427e-06, |
|
"loss": 2.9762, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_loss": 3.018242597579956, |
|
"eval_runtime": 131.898, |
|
"eval_samples_per_second": 782.991, |
|
"eval_steps_per_second": 48.939, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_loss": 3.0332210063934326, |
|
"eval_runtime": 132.3771, |
|
"eval_samples_per_second": 780.158, |
|
"eval_steps_per_second": 48.762, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 5.603268573334446e-06, |
|
"loss": 2.9786, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_loss": 3.001666307449341, |
|
"eval_runtime": 131.4368, |
|
"eval_samples_per_second": 785.739, |
|
"eval_steps_per_second": 49.111, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"eval_loss": 3.0236458778381348, |
|
"eval_runtime": 130.9562, |
|
"eval_samples_per_second": 788.622, |
|
"eval_steps_per_second": 49.291, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 5.536562995080464e-06, |
|
"loss": 2.9889, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_loss": 3.0273077487945557, |
|
"eval_runtime": 131.9047, |
|
"eval_samples_per_second": 782.952, |
|
"eval_steps_per_second": 48.937, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_loss": 3.01967191696167, |
|
"eval_runtime": 131.9615, |
|
"eval_samples_per_second": 782.615, |
|
"eval_steps_per_second": 48.916, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 5.469857416826483e-06, |
|
"loss": 2.9842, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_loss": 3.037600040435791, |
|
"eval_runtime": 131.9507, |
|
"eval_samples_per_second": 782.679, |
|
"eval_steps_per_second": 48.92, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_loss": 3.032285213470459, |
|
"eval_runtime": 131.7234, |
|
"eval_samples_per_second": 784.029, |
|
"eval_steps_per_second": 49.004, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.403151838572501e-06, |
|
"loss": 2.9912, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 3.031731367111206, |
|
"eval_runtime": 131.8868, |
|
"eval_samples_per_second": 783.058, |
|
"eval_steps_per_second": 48.944, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_loss": 3.022475481033325, |
|
"eval_runtime": 131.0568, |
|
"eval_samples_per_second": 788.017, |
|
"eval_steps_per_second": 49.253, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 5.33644626031852e-06, |
|
"loss": 2.9919, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 3.036106824874878, |
|
"eval_runtime": 132.2182, |
|
"eval_samples_per_second": 781.095, |
|
"eval_steps_per_second": 48.821, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 3.0432300567626953, |
|
"eval_runtime": 131.9088, |
|
"eval_samples_per_second": 782.927, |
|
"eval_steps_per_second": 48.935, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 5.269740682064538e-06, |
|
"loss": 2.9872, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 3.0306613445281982, |
|
"eval_runtime": 131.2348, |
|
"eval_samples_per_second": 786.948, |
|
"eval_steps_per_second": 49.187, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 3.0481879711151123, |
|
"eval_runtime": 131.7205, |
|
"eval_samples_per_second": 784.046, |
|
"eval_steps_per_second": 49.005, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 5.203035103810556e-06, |
|
"loss": 2.9823, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_loss": 3.035399913787842, |
|
"eval_runtime": 131.2188, |
|
"eval_samples_per_second": 787.044, |
|
"eval_steps_per_second": 49.193, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"eval_loss": 3.0419015884399414, |
|
"eval_runtime": 131.8024, |
|
"eval_samples_per_second": 783.559, |
|
"eval_steps_per_second": 48.975, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.136329525556575e-06, |
|
"loss": 2.9882, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_loss": 3.0567431449890137, |
|
"eval_runtime": 132.7773, |
|
"eval_samples_per_second": 777.806, |
|
"eval_steps_per_second": 48.615, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_loss": 3.0395400524139404, |
|
"eval_runtime": 131.6554, |
|
"eval_samples_per_second": 784.434, |
|
"eval_steps_per_second": 49.03, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 5.0696239473025935e-06, |
|
"loss": 3.0079, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_loss": 3.0572261810302734, |
|
"eval_runtime": 132.0184, |
|
"eval_samples_per_second": 782.278, |
|
"eval_steps_per_second": 48.895, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_loss": 3.04028058052063, |
|
"eval_runtime": 131.8056, |
|
"eval_samples_per_second": 783.54, |
|
"eval_steps_per_second": 48.974, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 5.002918369048611e-06, |
|
"loss": 3.0243, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 3.047227621078491, |
|
"eval_runtime": 131.9863, |
|
"eval_samples_per_second": 782.467, |
|
"eval_steps_per_second": 48.907, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_loss": 3.052279472351074, |
|
"eval_runtime": 132.2017, |
|
"eval_samples_per_second": 781.193, |
|
"eval_steps_per_second": 48.827, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 4.936212790794631e-06, |
|
"loss": 3.0127, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"eval_loss": 3.053439140319824, |
|
"eval_runtime": 131.3363, |
|
"eval_samples_per_second": 786.34, |
|
"eval_steps_per_second": 49.149, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_loss": 3.0434141159057617, |
|
"eval_runtime": 131.7363, |
|
"eval_samples_per_second": 783.952, |
|
"eval_steps_per_second": 48.999, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.869507212540649e-06, |
|
"loss": 3.0106, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_loss": 3.0687036514282227, |
|
"eval_runtime": 131.4287, |
|
"eval_samples_per_second": 785.788, |
|
"eval_steps_per_second": 49.114, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 3.0677733421325684, |
|
"eval_runtime": 132.6312, |
|
"eval_samples_per_second": 778.663, |
|
"eval_steps_per_second": 48.669, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 4.802801634286667e-06, |
|
"loss": 3.0063, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"eval_loss": 3.0652401447296143, |
|
"eval_runtime": 132.5035, |
|
"eval_samples_per_second": 779.413, |
|
"eval_steps_per_second": 48.716, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"eval_loss": 3.0768234729766846, |
|
"eval_runtime": 131.7104, |
|
"eval_samples_per_second": 784.107, |
|
"eval_steps_per_second": 49.009, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 4.7360960560326865e-06, |
|
"loss": 3.0187, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"eval_loss": 3.069179058074951, |
|
"eval_runtime": 132.7895, |
|
"eval_samples_per_second": 777.735, |
|
"eval_steps_per_second": 48.611, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_loss": 3.0621213912963867, |
|
"eval_runtime": 132.041, |
|
"eval_samples_per_second": 782.144, |
|
"eval_steps_per_second": 48.886, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 4.669390477778704e-06, |
|
"loss": 3.0202, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"eval_loss": 3.0663187503814697, |
|
"eval_runtime": 132.2635, |
|
"eval_samples_per_second": 780.828, |
|
"eval_steps_per_second": 48.804, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"eval_loss": 3.0537171363830566, |
|
"eval_runtime": 132.2536, |
|
"eval_samples_per_second": 780.886, |
|
"eval_steps_per_second": 48.808, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 4.602684899524723e-06, |
|
"loss": 3.0219, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"eval_loss": 3.072500705718994, |
|
"eval_runtime": 132.0295, |
|
"eval_samples_per_second": 782.212, |
|
"eval_steps_per_second": 48.891, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"eval_loss": 3.0664169788360596, |
|
"eval_runtime": 131.9651, |
|
"eval_samples_per_second": 782.593, |
|
"eval_steps_per_second": 48.914, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 4.5359793212707415e-06, |
|
"loss": 3.0232, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"eval_loss": 3.0724074840545654, |
|
"eval_runtime": 133.2104, |
|
"eval_samples_per_second": 775.277, |
|
"eval_steps_per_second": 48.457, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"eval_loss": 3.0476126670837402, |
|
"eval_runtime": 132.7171, |
|
"eval_samples_per_second": 778.159, |
|
"eval_steps_per_second": 48.637, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 4.46927374301676e-06, |
|
"loss": 3.0247, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"eval_loss": 3.0729353427886963, |
|
"eval_runtime": 132.4018, |
|
"eval_samples_per_second": 780.012, |
|
"eval_steps_per_second": 48.753, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_loss": 3.0645902156829834, |
|
"eval_runtime": 133.3334, |
|
"eval_samples_per_second": 774.562, |
|
"eval_steps_per_second": 48.412, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 4.402568164762779e-06, |
|
"loss": 3.0335, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_loss": 3.0603559017181396, |
|
"eval_runtime": 131.9232, |
|
"eval_samples_per_second": 782.842, |
|
"eval_steps_per_second": 48.93, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_loss": 3.0630509853363037, |
|
"eval_runtime": 132.4502, |
|
"eval_samples_per_second": 779.727, |
|
"eval_steps_per_second": 48.735, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 4.335862586508797e-06, |
|
"loss": 3.0182, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"eval_loss": 3.0669026374816895, |
|
"eval_runtime": 133.3499, |
|
"eval_samples_per_second": 774.466, |
|
"eval_steps_per_second": 48.406, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"eval_loss": 3.0626471042633057, |
|
"eval_runtime": 133.0041, |
|
"eval_samples_per_second": 776.48, |
|
"eval_steps_per_second": 48.532, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 4.269157008254816e-06, |
|
"loss": 3.0124, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"eval_loss": 3.053469181060791, |
|
"eval_runtime": 133.5969, |
|
"eval_samples_per_second": 773.034, |
|
"eval_steps_per_second": 48.317, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_loss": 3.076792001724243, |
|
"eval_runtime": 133.0672, |
|
"eval_samples_per_second": 776.112, |
|
"eval_steps_per_second": 48.509, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 4.202451430000834e-06, |
|
"loss": 3.016, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"eval_loss": 3.0615081787109375, |
|
"eval_runtime": 133.9693, |
|
"eval_samples_per_second": 770.886, |
|
"eval_steps_per_second": 48.183, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"eval_loss": 3.0689148902893066, |
|
"eval_runtime": 134.418, |
|
"eval_samples_per_second": 768.312, |
|
"eval_steps_per_second": 48.022, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.135745851746852e-06, |
|
"loss": 3.0133, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"eval_loss": 3.069943428039551, |
|
"eval_runtime": 133.7409, |
|
"eval_samples_per_second": 772.202, |
|
"eval_steps_per_second": 48.265, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"eval_loss": 3.0647213459014893, |
|
"eval_runtime": 134.5422, |
|
"eval_samples_per_second": 767.603, |
|
"eval_steps_per_second": 47.977, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 4.069040273492872e-06, |
|
"loss": 3.0227, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_loss": 3.0704684257507324, |
|
"eval_runtime": 135.8934, |
|
"eval_samples_per_second": 759.97, |
|
"eval_steps_per_second": 47.5, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"eval_loss": 3.0705504417419434, |
|
"eval_runtime": 133.4155, |
|
"eval_samples_per_second": 774.086, |
|
"eval_steps_per_second": 48.383, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 4.0023346952388895e-06, |
|
"loss": 3.0267, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"eval_loss": 3.069384813308716, |
|
"eval_runtime": 133.2021, |
|
"eval_samples_per_second": 775.326, |
|
"eval_steps_per_second": 48.46, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"eval_loss": 3.0720527172088623, |
|
"eval_runtime": 133.9349, |
|
"eval_samples_per_second": 771.083, |
|
"eval_steps_per_second": 48.195, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 3.935629116984908e-06, |
|
"loss": 3.021, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"eval_loss": 3.068966865539551, |
|
"eval_runtime": 132.597, |
|
"eval_samples_per_second": 778.864, |
|
"eval_steps_per_second": 48.681, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"eval_loss": 3.060349702835083, |
|
"eval_runtime": 134.1972, |
|
"eval_samples_per_second": 769.576, |
|
"eval_steps_per_second": 48.101, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.868923538730927e-06, |
|
"loss": 3.0144, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 3.065760374069214, |
|
"eval_runtime": 134.4544, |
|
"eval_samples_per_second": 768.104, |
|
"eval_steps_per_second": 48.009, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"eval_loss": 3.0719916820526123, |
|
"eval_runtime": 133.6199, |
|
"eval_samples_per_second": 772.902, |
|
"eval_steps_per_second": 48.309, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.8022179604769453e-06, |
|
"loss": 3.0204, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 3.066779851913452, |
|
"eval_runtime": 133.3793, |
|
"eval_samples_per_second": 774.296, |
|
"eval_steps_per_second": 48.396, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 3.0773117542266846, |
|
"eval_runtime": 135.2249, |
|
"eval_samples_per_second": 763.728, |
|
"eval_steps_per_second": 47.735, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 3.735512382222964e-06, |
|
"loss": 3.0085, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 3.0847675800323486, |
|
"eval_runtime": 133.4406, |
|
"eval_samples_per_second": 773.94, |
|
"eval_steps_per_second": 48.374, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": 3.0567853450775146, |
|
"eval_runtime": 136.5184, |
|
"eval_samples_per_second": 756.492, |
|
"eval_steps_per_second": 47.283, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 3.668806803968982e-06, |
|
"loss": 3.0146, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"eval_loss": 3.0783281326293945, |
|
"eval_runtime": 134.8805, |
|
"eval_samples_per_second": 765.678, |
|
"eval_steps_per_second": 47.857, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 3.073552370071411, |
|
"eval_runtime": 133.8542, |
|
"eval_samples_per_second": 771.549, |
|
"eval_steps_per_second": 48.224, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 3.6021012257150007e-06, |
|
"loss": 3.02, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"eval_loss": 3.0533952713012695, |
|
"eval_runtime": 133.5934, |
|
"eval_samples_per_second": 773.055, |
|
"eval_steps_per_second": 48.318, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"eval_loss": 3.0684494972229004, |
|
"eval_runtime": 133.2901, |
|
"eval_samples_per_second": 774.814, |
|
"eval_steps_per_second": 48.428, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 3.535395647461019e-06, |
|
"loss": 3.0229, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"eval_loss": 3.07673978805542, |
|
"eval_runtime": 134.115, |
|
"eval_samples_per_second": 770.048, |
|
"eval_steps_per_second": 48.13, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"eval_loss": 3.0568747520446777, |
|
"eval_runtime": 134.3484, |
|
"eval_samples_per_second": 768.71, |
|
"eval_steps_per_second": 48.047, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 3.468690069207038e-06, |
|
"loss": 3.0152, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"eval_loss": 3.0787863731384277, |
|
"eval_runtime": 133.764, |
|
"eval_samples_per_second": 772.068, |
|
"eval_steps_per_second": 48.257, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"eval_loss": 3.066344738006592, |
|
"eval_runtime": 133.9216, |
|
"eval_samples_per_second": 771.16, |
|
"eval_steps_per_second": 48.2, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 3.4019844909530565e-06, |
|
"loss": 3.02, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_loss": 3.067016839981079, |
|
"eval_runtime": 133.9971, |
|
"eval_samples_per_second": 770.725, |
|
"eval_steps_per_second": 48.173, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_loss": 3.0683343410491943, |
|
"eval_runtime": 134.2208, |
|
"eval_samples_per_second": 769.441, |
|
"eval_steps_per_second": 48.092, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 3.3352789126990747e-06, |
|
"loss": 3.0128, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"eval_loss": 3.071779489517212, |
|
"eval_runtime": 134.2033, |
|
"eval_samples_per_second": 769.541, |
|
"eval_steps_per_second": 48.099, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 3.0846707820892334, |
|
"eval_runtime": 134.6625, |
|
"eval_samples_per_second": 766.917, |
|
"eval_steps_per_second": 47.935, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 3.2685733344450933e-06, |
|
"loss": 3.016, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"eval_loss": 3.066356897354126, |
|
"eval_runtime": 134.4556, |
|
"eval_samples_per_second": 768.097, |
|
"eval_steps_per_second": 48.008, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"eval_loss": 3.0687520503997803, |
|
"eval_runtime": 134.3299, |
|
"eval_samples_per_second": 768.816, |
|
"eval_steps_per_second": 48.053, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 3.2018677561911115e-06, |
|
"loss": 3.0007, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"eval_loss": 3.0740671157836914, |
|
"eval_runtime": 134.4424, |
|
"eval_samples_per_second": 768.173, |
|
"eval_steps_per_second": 48.013, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"eval_loss": 3.0663323402404785, |
|
"eval_runtime": 134.2383, |
|
"eval_samples_per_second": 769.341, |
|
"eval_steps_per_second": 48.086, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 3.1351621779371306e-06, |
|
"loss": 3.0241, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_loss": 3.0607213973999023, |
|
"eval_runtime": 134.0502, |
|
"eval_samples_per_second": 770.42, |
|
"eval_steps_per_second": 48.154, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_loss": 3.0635085105895996, |
|
"eval_runtime": 133.9453, |
|
"eval_samples_per_second": 771.024, |
|
"eval_steps_per_second": 48.191, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 3.0684565996831487e-06, |
|
"loss": 3.0103, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_loss": 3.0730724334716797, |
|
"eval_runtime": 135.0683, |
|
"eval_samples_per_second": 764.613, |
|
"eval_steps_per_second": 47.791, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"eval_loss": 3.0649466514587402, |
|
"eval_runtime": 134.138, |
|
"eval_samples_per_second": 769.916, |
|
"eval_steps_per_second": 48.122, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 3.0017510214291673e-06, |
|
"loss": 3.0188, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"eval_loss": 3.058675765991211, |
|
"eval_runtime": 134.4659, |
|
"eval_samples_per_second": 768.039, |
|
"eval_steps_per_second": 48.005, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"eval_loss": 3.0703861713409424, |
|
"eval_runtime": 134.1628, |
|
"eval_samples_per_second": 769.773, |
|
"eval_steps_per_second": 48.113, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 2.9350454431751855e-06, |
|
"loss": 3.0217, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"eval_loss": 3.066443920135498, |
|
"eval_runtime": 135.8944, |
|
"eval_samples_per_second": 759.965, |
|
"eval_steps_per_second": 47.5, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"eval_loss": 3.0626626014709473, |
|
"eval_runtime": 135.45, |
|
"eval_samples_per_second": 762.458, |
|
"eval_steps_per_second": 47.656, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 2.868339864921204e-06, |
|
"loss": 3.0282, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"eval_loss": 3.071357488632202, |
|
"eval_runtime": 134.3182, |
|
"eval_samples_per_second": 768.883, |
|
"eval_steps_per_second": 48.058, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"eval_loss": 3.0688371658325195, |
|
"eval_runtime": 135.2782, |
|
"eval_samples_per_second": 763.427, |
|
"eval_steps_per_second": 47.716, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 2.801634286667223e-06, |
|
"loss": 3.0166, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"eval_loss": 3.05212664604187, |
|
"eval_runtime": 135.0648, |
|
"eval_samples_per_second": 764.633, |
|
"eval_steps_per_second": 47.792, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"eval_loss": 3.0538179874420166, |
|
"eval_runtime": 134.2844, |
|
"eval_samples_per_second": 769.076, |
|
"eval_steps_per_second": 48.07, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 2.7349287084132413e-06, |
|
"loss": 3.0134, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"eval_loss": 3.064086437225342, |
|
"eval_runtime": 135.4053, |
|
"eval_samples_per_second": 762.71, |
|
"eval_steps_per_second": 47.672, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 3.063884735107422, |
|
"eval_runtime": 134.2537, |
|
"eval_samples_per_second": 769.253, |
|
"eval_steps_per_second": 48.081, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 2.66822313015926e-06, |
|
"loss": 3.0032, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 3.0587823390960693, |
|
"eval_runtime": 135.0451, |
|
"eval_samples_per_second": 764.745, |
|
"eval_steps_per_second": 47.799, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 3.064620018005371, |
|
"eval_runtime": 134.9837, |
|
"eval_samples_per_second": 765.092, |
|
"eval_steps_per_second": 47.821, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.601517551905278e-06, |
|
"loss": 3.0136, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_loss": 3.062889337539673, |
|
"eval_runtime": 134.9119, |
|
"eval_samples_per_second": 765.499, |
|
"eval_steps_per_second": 47.846, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_loss": 3.0578110218048096, |
|
"eval_runtime": 136.5221, |
|
"eval_samples_per_second": 756.471, |
|
"eval_steps_per_second": 47.282, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 2.5348119736512967e-06, |
|
"loss": 3.0086, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"eval_loss": 3.0528934001922607, |
|
"eval_runtime": 135.6145, |
|
"eval_samples_per_second": 761.534, |
|
"eval_steps_per_second": 47.598, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"eval_loss": 3.0615251064300537, |
|
"eval_runtime": 135.3281, |
|
"eval_samples_per_second": 763.145, |
|
"eval_steps_per_second": 47.699, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 2.4681063953973154e-06, |
|
"loss": 3.019, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"eval_loss": 3.0565857887268066, |
|
"eval_runtime": 134.9377, |
|
"eval_samples_per_second": 765.353, |
|
"eval_steps_per_second": 47.837, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 3.0658679008483887, |
|
"eval_runtime": 135.2159, |
|
"eval_samples_per_second": 763.778, |
|
"eval_steps_per_second": 47.738, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 2.4014008171433335e-06, |
|
"loss": 3.024, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"eval_loss": 3.061464786529541, |
|
"eval_runtime": 135.2789, |
|
"eval_samples_per_second": 763.423, |
|
"eval_steps_per_second": 47.716, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"eval_loss": 3.0530033111572266, |
|
"eval_runtime": 135.9081, |
|
"eval_samples_per_second": 759.889, |
|
"eval_steps_per_second": 47.495, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.334695238889352e-06, |
|
"loss": 3.0089, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 3.0796985626220703, |
|
"eval_runtime": 135.2715, |
|
"eval_samples_per_second": 763.465, |
|
"eval_steps_per_second": 47.719, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"eval_loss": 3.0700411796569824, |
|
"eval_runtime": 136.6273, |
|
"eval_samples_per_second": 755.888, |
|
"eval_steps_per_second": 47.245, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 2.2679896606353707e-06, |
|
"loss": 3.0174, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"eval_loss": 3.0748071670532227, |
|
"eval_runtime": 136.44, |
|
"eval_samples_per_second": 756.926, |
|
"eval_steps_per_second": 47.31, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_loss": 3.064267635345459, |
|
"eval_runtime": 135.3728, |
|
"eval_samples_per_second": 762.894, |
|
"eval_steps_per_second": 47.683, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 2.2012840823813894e-06, |
|
"loss": 3.0176, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"eval_loss": 3.0627517700195312, |
|
"eval_runtime": 135.5713, |
|
"eval_samples_per_second": 761.776, |
|
"eval_steps_per_second": 47.613, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"eval_loss": 3.0629563331604004, |
|
"eval_runtime": 135.894, |
|
"eval_samples_per_second": 759.967, |
|
"eval_steps_per_second": 47.5, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 2.134578504127408e-06, |
|
"loss": 3.0164, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"eval_loss": 3.0721538066864014, |
|
"eval_runtime": 135.9329, |
|
"eval_samples_per_second": 759.75, |
|
"eval_steps_per_second": 47.487, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"eval_loss": 3.0744197368621826, |
|
"eval_runtime": 135.4506, |
|
"eval_samples_per_second": 762.455, |
|
"eval_steps_per_second": 47.656, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 2.067872925873426e-06, |
|
"loss": 3.0302, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"eval_loss": 3.0739452838897705, |
|
"eval_runtime": 135.8281, |
|
"eval_samples_per_second": 760.336, |
|
"eval_steps_per_second": 47.523, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"eval_loss": 3.0700225830078125, |
|
"eval_runtime": 136.0724, |
|
"eval_samples_per_second": 758.971, |
|
"eval_steps_per_second": 47.438, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 2.0011673476194448e-06, |
|
"loss": 3.0204, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 3.0751476287841797, |
|
"eval_runtime": 136.119, |
|
"eval_samples_per_second": 758.711, |
|
"eval_steps_per_second": 47.422, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"eval_loss": 3.0597870349884033, |
|
"eval_runtime": 136.3427, |
|
"eval_samples_per_second": 757.466, |
|
"eval_steps_per_second": 47.344, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 1.9344617693654634e-06, |
|
"loss": 3.0147, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"eval_loss": 3.0522122383117676, |
|
"eval_runtime": 136.0082, |
|
"eval_samples_per_second": 759.329, |
|
"eval_steps_per_second": 47.46, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"eval_loss": 3.065509557723999, |
|
"eval_runtime": 136.1421, |
|
"eval_samples_per_second": 758.582, |
|
"eval_steps_per_second": 47.414, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 1.867756191111482e-06, |
|
"loss": 3.0245, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"eval_loss": 3.0568597316741943, |
|
"eval_runtime": 136.6776, |
|
"eval_samples_per_second": 755.61, |
|
"eval_steps_per_second": 47.228, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"eval_loss": 3.062300205230713, |
|
"eval_runtime": 136.0258, |
|
"eval_samples_per_second": 759.231, |
|
"eval_steps_per_second": 47.454, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 1.8010506128575004e-06, |
|
"loss": 3.0069, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"eval_loss": 3.059983730316162, |
|
"eval_runtime": 136.4638, |
|
"eval_samples_per_second": 756.794, |
|
"eval_steps_per_second": 47.302, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 3.0638678073883057, |
|
"eval_runtime": 137.569, |
|
"eval_samples_per_second": 750.714, |
|
"eval_steps_per_second": 46.922, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 1.734345034603519e-06, |
|
"loss": 3.0068, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"eval_loss": 3.077465534210205, |
|
"eval_runtime": 136.0507, |
|
"eval_samples_per_second": 759.092, |
|
"eval_steps_per_second": 47.446, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"eval_loss": 3.0668864250183105, |
|
"eval_runtime": 136.2552, |
|
"eval_samples_per_second": 757.953, |
|
"eval_steps_per_second": 47.374, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 1.6676394563495374e-06, |
|
"loss": 3.0275, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"eval_loss": 3.062725782394409, |
|
"eval_runtime": 136.3436, |
|
"eval_samples_per_second": 757.461, |
|
"eval_steps_per_second": 47.344, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"eval_loss": 3.0644514560699463, |
|
"eval_runtime": 137.752, |
|
"eval_samples_per_second": 749.717, |
|
"eval_steps_per_second": 46.86, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 1.6009338780955558e-06, |
|
"loss": 3.0164, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"eval_loss": 3.0666866302490234, |
|
"eval_runtime": 135.9171, |
|
"eval_samples_per_second": 759.838, |
|
"eval_steps_per_second": 47.492, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"eval_loss": 3.048987627029419, |
|
"eval_runtime": 136.0156, |
|
"eval_samples_per_second": 759.288, |
|
"eval_steps_per_second": 47.458, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 1.5342282998415744e-06, |
|
"loss": 3.0148, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"eval_loss": 3.061800003051758, |
|
"eval_runtime": 137.187, |
|
"eval_samples_per_second": 752.805, |
|
"eval_steps_per_second": 47.053, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"eval_loss": 3.0544731616973877, |
|
"eval_runtime": 137.5014, |
|
"eval_samples_per_second": 751.083, |
|
"eval_steps_per_second": 46.945, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 1.4675227215875928e-06, |
|
"loss": 3.022, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"eval_loss": 3.0651352405548096, |
|
"eval_runtime": 137.0124, |
|
"eval_samples_per_second": 753.764, |
|
"eval_steps_per_second": 47.113, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"eval_loss": 3.068650484085083, |
|
"eval_runtime": 137.324, |
|
"eval_samples_per_second": 752.053, |
|
"eval_steps_per_second": 47.006, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 1.4008171433336116e-06, |
|
"loss": 3.0235, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"eval_loss": 3.0515873432159424, |
|
"eval_runtime": 137.8405, |
|
"eval_samples_per_second": 749.235, |
|
"eval_steps_per_second": 46.829, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_loss": 3.0761473178863525, |
|
"eval_runtime": 137.435, |
|
"eval_samples_per_second": 751.446, |
|
"eval_steps_per_second": 46.968, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 1.33411156507963e-06, |
|
"loss": 3.0194, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"eval_loss": 3.0807414054870605, |
|
"eval_runtime": 136.8928, |
|
"eval_samples_per_second": 754.423, |
|
"eval_steps_per_second": 47.154, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"eval_loss": 3.060075283050537, |
|
"eval_runtime": 136.6441, |
|
"eval_samples_per_second": 755.796, |
|
"eval_steps_per_second": 47.24, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 1.2674059868256484e-06, |
|
"loss": 3.0142, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"eval_loss": 3.0721395015716553, |
|
"eval_runtime": 136.5201, |
|
"eval_samples_per_second": 756.482, |
|
"eval_steps_per_second": 47.282, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"eval_loss": 3.0653316974639893, |
|
"eval_runtime": 138.2812, |
|
"eval_samples_per_second": 746.848, |
|
"eval_steps_per_second": 46.68, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 1.2007004085716668e-06, |
|
"loss": 3.0183, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"eval_loss": 3.061683416366577, |
|
"eval_runtime": 136.6654, |
|
"eval_samples_per_second": 755.678, |
|
"eval_steps_per_second": 47.232, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"eval_loss": 3.062178373336792, |
|
"eval_runtime": 137.9621, |
|
"eval_samples_per_second": 748.575, |
|
"eval_steps_per_second": 46.788, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 1.1339948303176854e-06, |
|
"loss": 3.0092, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"eval_loss": 3.068242311477661, |
|
"eval_runtime": 137.4752, |
|
"eval_samples_per_second": 751.227, |
|
"eval_steps_per_second": 46.954, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"eval_loss": 3.073157787322998, |
|
"eval_runtime": 136.5003, |
|
"eval_samples_per_second": 756.592, |
|
"eval_steps_per_second": 47.289, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 1.067289252063704e-06, |
|
"loss": 3.0071, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"eval_loss": 3.0763022899627686, |
|
"eval_runtime": 137.95, |
|
"eval_samples_per_second": 748.641, |
|
"eval_steps_per_second": 46.792, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"eval_loss": 3.0674524307250977, |
|
"eval_runtime": 137.3106, |
|
"eval_samples_per_second": 752.127, |
|
"eval_steps_per_second": 47.01, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 1.0005836738097224e-06, |
|
"loss": 3.0272, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"eval_loss": 3.0671498775482178, |
|
"eval_runtime": 138.0717, |
|
"eval_samples_per_second": 747.981, |
|
"eval_steps_per_second": 46.751, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"eval_loss": 3.062239170074463, |
|
"eval_runtime": 138.0499, |
|
"eval_samples_per_second": 748.099, |
|
"eval_steps_per_second": 46.758, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 9.33878095555741e-07, |
|
"loss": 3.0235, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"eval_loss": 3.0789263248443604, |
|
"eval_runtime": 137.5626, |
|
"eval_samples_per_second": 750.749, |
|
"eval_steps_per_second": 46.924, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"eval_loss": 3.062295436859131, |
|
"eval_runtime": 138.8694, |
|
"eval_samples_per_second": 743.684, |
|
"eval_steps_per_second": 46.483, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 8.671725173017595e-07, |
|
"loss": 3.0179, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_loss": 3.078376054763794, |
|
"eval_runtime": 136.985, |
|
"eval_samples_per_second": 753.914, |
|
"eval_steps_per_second": 47.122, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_loss": 3.062905788421631, |
|
"eval_runtime": 137.7472, |
|
"eval_samples_per_second": 749.743, |
|
"eval_steps_per_second": 46.861, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 8.004669390477779e-07, |
|
"loss": 3.0209, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 3.0731070041656494, |
|
"eval_runtime": 138.0906, |
|
"eval_samples_per_second": 747.878, |
|
"eval_steps_per_second": 46.745, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"eval_loss": 3.0945563316345215, |
|
"eval_runtime": 137.4959, |
|
"eval_samples_per_second": 751.113, |
|
"eval_steps_per_second": 46.947, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 7.337613607937964e-07, |
|
"loss": 3.0237, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 3.065315008163452, |
|
"eval_runtime": 138.0159, |
|
"eval_samples_per_second": 748.283, |
|
"eval_steps_per_second": 46.77, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"eval_loss": 3.0589962005615234, |
|
"eval_runtime": 137.6387, |
|
"eval_samples_per_second": 750.334, |
|
"eval_steps_per_second": 46.898, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 6.67055782539815e-07, |
|
"loss": 3.0164, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 3.070741891860962, |
|
"eval_runtime": 138.4523, |
|
"eval_samples_per_second": 745.925, |
|
"eval_steps_per_second": 46.623, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"eval_loss": 3.0545763969421387, |
|
"eval_runtime": 138.1194, |
|
"eval_samples_per_second": 747.723, |
|
"eval_steps_per_second": 46.735, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 6.003502042858334e-07, |
|
"loss": 3.0206, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_loss": 3.0741806030273438, |
|
"eval_runtime": 138.8634, |
|
"eval_samples_per_second": 743.717, |
|
"eval_steps_per_second": 46.485, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"eval_loss": 3.0793333053588867, |
|
"eval_runtime": 138.6181, |
|
"eval_samples_per_second": 745.032, |
|
"eval_steps_per_second": 46.567, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 5.33644626031852e-07, |
|
"loss": 3.0138, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"eval_loss": 3.05604887008667, |
|
"eval_runtime": 139.1325, |
|
"eval_samples_per_second": 742.278, |
|
"eval_steps_per_second": 46.395, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"eval_loss": 3.086977958679199, |
|
"eval_runtime": 137.8163, |
|
"eval_samples_per_second": 749.367, |
|
"eval_steps_per_second": 46.838, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 4.669390477778705e-07, |
|
"loss": 3.0377, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_loss": 3.07423996925354, |
|
"eval_runtime": 137.4738, |
|
"eval_samples_per_second": 751.234, |
|
"eval_steps_per_second": 46.954, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"eval_loss": 3.0675508975982666, |
|
"eval_runtime": 138.0596, |
|
"eval_samples_per_second": 748.046, |
|
"eval_steps_per_second": 46.755, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 4.0023346952388894e-07, |
|
"loss": 3.0227, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"eval_loss": 3.06254506111145, |
|
"eval_runtime": 139.3504, |
|
"eval_samples_per_second": 741.117, |
|
"eval_steps_per_second": 46.322, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"eval_loss": 3.0736207962036133, |
|
"eval_runtime": 139.5433, |
|
"eval_samples_per_second": 740.093, |
|
"eval_steps_per_second": 46.258, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 3.335278912699075e-07, |
|
"loss": 3.0359, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"eval_loss": 3.0800607204437256, |
|
"eval_runtime": 138.2846, |
|
"eval_samples_per_second": 746.829, |
|
"eval_steps_per_second": 46.679, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_loss": 3.0709972381591797, |
|
"eval_runtime": 139.0505, |
|
"eval_samples_per_second": 742.716, |
|
"eval_steps_per_second": 46.422, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 2.66822313015926e-07, |
|
"loss": 3.0248, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"eval_loss": 3.069218158721924, |
|
"eval_runtime": 138.9779, |
|
"eval_samples_per_second": 743.104, |
|
"eval_steps_per_second": 46.446, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"eval_loss": 3.067660331726074, |
|
"eval_runtime": 138.4099, |
|
"eval_samples_per_second": 746.154, |
|
"eval_steps_per_second": 46.637, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 2.0011673476194447e-07, |
|
"loss": 3.0235, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"eval_loss": 3.089552879333496, |
|
"eval_runtime": 138.573, |
|
"eval_samples_per_second": 745.275, |
|
"eval_steps_per_second": 46.582, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"eval_loss": 3.0777699947357178, |
|
"eval_runtime": 140.4362, |
|
"eval_samples_per_second": 735.388, |
|
"eval_steps_per_second": 45.964, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 1.33411156507963e-07, |
|
"loss": 3.0187, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"eval_loss": 3.069951295852661, |
|
"eval_runtime": 140.0319, |
|
"eval_samples_per_second": 737.511, |
|
"eval_steps_per_second": 46.097, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"eval_loss": 3.0742506980895996, |
|
"eval_runtime": 139.3192, |
|
"eval_samples_per_second": 741.283, |
|
"eval_steps_per_second": 46.332, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 6.67055782539815e-08, |
|
"loss": 3.0189, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"eval_loss": 3.0780065059661865, |
|
"eval_runtime": 138.4114, |
|
"eval_samples_per_second": 746.145, |
|
"eval_steps_per_second": 46.636, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"eval_loss": 3.0866599082946777, |
|
"eval_runtime": 138.3665, |
|
"eval_samples_per_second": 746.387, |
|
"eval_steps_per_second": 46.651, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.0, |
|
"loss": 3.0184, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_loss": 3.079288959503174, |
|
"eval_runtime": 138.4519, |
|
"eval_samples_per_second": 745.927, |
|
"eval_steps_per_second": 46.623, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"step": 2400000, |
|
"total_flos": 7.178820925216543e+17, |
|
"train_loss": 2.9400340771484377, |
|
"train_runtime": 198144.865, |
|
"train_samples_per_second": 193.798, |
|
"train_steps_per_second": 12.112 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 20, |
|
"save_steps": 32000, |
|
"total_flos": 7.178820925216543e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|