|
{ |
|
"best_metric": 2.2240703105926514, |
|
"best_model_checkpoint": "./model_tweets_2020_Q2_75/checkpoint-2368000", |
|
"epoch": 6.7372770733268394, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.486480474472046, |
|
"eval_runtime": 326.396, |
|
"eval_samples_per_second": 919.068, |
|
"eval_steps_per_second": 57.442, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.6592, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.459786891937256, |
|
"eval_runtime": 328.9344, |
|
"eval_samples_per_second": 911.975, |
|
"eval_steps_per_second": 56.999, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.447173595428467, |
|
"eval_runtime": 326.9299, |
|
"eval_samples_per_second": 917.567, |
|
"eval_steps_per_second": 57.349, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.6211, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.4340574741363525, |
|
"eval_runtime": 331.0965, |
|
"eval_samples_per_second": 906.02, |
|
"eval_steps_per_second": 56.627, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.4222826957702637, |
|
"eval_runtime": 330.5401, |
|
"eval_samples_per_second": 907.545, |
|
"eval_steps_per_second": 56.722, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.6048, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.4217443466186523, |
|
"eval_runtime": 328.8317, |
|
"eval_samples_per_second": 912.26, |
|
"eval_steps_per_second": 57.017, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.418403387069702, |
|
"eval_runtime": 328.052, |
|
"eval_samples_per_second": 914.428, |
|
"eval_steps_per_second": 57.153, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.5861, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.4061949253082275, |
|
"eval_runtime": 330.9569, |
|
"eval_samples_per_second": 906.402, |
|
"eval_steps_per_second": 56.651, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.3918895721435547, |
|
"eval_runtime": 327.7147, |
|
"eval_samples_per_second": 915.369, |
|
"eval_steps_per_second": 57.211, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.5736, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.3896288871765137, |
|
"eval_runtime": 329.9795, |
|
"eval_samples_per_second": 909.087, |
|
"eval_steps_per_second": 56.819, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.3951096534729004, |
|
"eval_runtime": 328.5879, |
|
"eval_samples_per_second": 912.937, |
|
"eval_steps_per_second": 57.059, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.5559, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.3903470039367676, |
|
"eval_runtime": 328.446, |
|
"eval_samples_per_second": 913.331, |
|
"eval_steps_per_second": 57.084, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.3835983276367188, |
|
"eval_runtime": 330.3118, |
|
"eval_samples_per_second": 908.172, |
|
"eval_steps_per_second": 56.762, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.5551, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 2.374908685684204, |
|
"eval_runtime": 328.8458, |
|
"eval_samples_per_second": 912.221, |
|
"eval_steps_per_second": 57.015, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.3793506622314453, |
|
"eval_runtime": 329.1311, |
|
"eval_samples_per_second": 911.43, |
|
"eval_steps_per_second": 56.965, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.5371, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.3733017444610596, |
|
"eval_runtime": 328.0343, |
|
"eval_samples_per_second": 914.477, |
|
"eval_steps_per_second": 57.156, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 2.3703365325927734, |
|
"eval_runtime": 328.4858, |
|
"eval_samples_per_second": 913.221, |
|
"eval_steps_per_second": 57.077, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.5417, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.366170883178711, |
|
"eval_runtime": 328.5536, |
|
"eval_samples_per_second": 913.032, |
|
"eval_steps_per_second": 57.065, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.372772216796875, |
|
"eval_runtime": 330.4279, |
|
"eval_samples_per_second": 907.853, |
|
"eval_steps_per_second": 56.742, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.5316, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 2.364302158355713, |
|
"eval_runtime": 328.3485, |
|
"eval_samples_per_second": 913.603, |
|
"eval_steps_per_second": 57.101, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.3567655086517334, |
|
"eval_runtime": 329.7531, |
|
"eval_samples_per_second": 909.711, |
|
"eval_steps_per_second": 56.858, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.5296, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.3554866313934326, |
|
"eval_runtime": 329.7554, |
|
"eval_samples_per_second": 909.705, |
|
"eval_steps_per_second": 56.857, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.3506195545196533, |
|
"eval_runtime": 331.0345, |
|
"eval_samples_per_second": 906.19, |
|
"eval_steps_per_second": 56.638, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.5215, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.348207473754883, |
|
"eval_runtime": 329.4713, |
|
"eval_samples_per_second": 910.489, |
|
"eval_steps_per_second": 56.906, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 2.351372480392456, |
|
"eval_runtime": 329.1084, |
|
"eval_samples_per_second": 911.493, |
|
"eval_steps_per_second": 56.969, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.5274, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.3531200885772705, |
|
"eval_runtime": 330.4502, |
|
"eval_samples_per_second": 907.792, |
|
"eval_steps_per_second": 56.738, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.3463432788848877, |
|
"eval_runtime": 331.5879, |
|
"eval_samples_per_second": 904.677, |
|
"eval_steps_per_second": 56.543, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.5215, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.346996784210205, |
|
"eval_runtime": 330.4016, |
|
"eval_samples_per_second": 907.925, |
|
"eval_steps_per_second": 56.746, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.3407442569732666, |
|
"eval_runtime": 331.6282, |
|
"eval_samples_per_second": 904.567, |
|
"eval_steps_per_second": 56.536, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.5096, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.340013265609741, |
|
"eval_runtime": 330.2111, |
|
"eval_samples_per_second": 908.449, |
|
"eval_steps_per_second": 56.779, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 2.340172290802002, |
|
"eval_runtime": 330.4785, |
|
"eval_samples_per_second": 907.714, |
|
"eval_steps_per_second": 56.733, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.5176, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.330843210220337, |
|
"eval_runtime": 329.4037, |
|
"eval_samples_per_second": 910.676, |
|
"eval_steps_per_second": 56.918, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.3342106342315674, |
|
"eval_runtime": 329.9573, |
|
"eval_samples_per_second": 909.148, |
|
"eval_steps_per_second": 56.823, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.5048, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 2.333300828933716, |
|
"eval_runtime": 330.8736, |
|
"eval_samples_per_second": 906.63, |
|
"eval_steps_per_second": 56.665, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.3288071155548096, |
|
"eval_runtime": 329.6491, |
|
"eval_samples_per_second": 909.998, |
|
"eval_steps_per_second": 56.876, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.4979, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.329832077026367, |
|
"eval_runtime": 329.6289, |
|
"eval_samples_per_second": 910.054, |
|
"eval_steps_per_second": 56.879, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 2.323723554611206, |
|
"eval_runtime": 330.3451, |
|
"eval_samples_per_second": 908.081, |
|
"eval_steps_per_second": 56.756, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.4963, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.326643943786621, |
|
"eval_runtime": 331.0075, |
|
"eval_samples_per_second": 906.263, |
|
"eval_steps_per_second": 56.642, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 2.3196959495544434, |
|
"eval_runtime": 329.7349, |
|
"eval_samples_per_second": 909.761, |
|
"eval_steps_per_second": 56.861, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.4972, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.327077627182007, |
|
"eval_runtime": 329.8959, |
|
"eval_samples_per_second": 909.317, |
|
"eval_steps_per_second": 56.833, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 2.327465534210205, |
|
"eval_runtime": 329.8835, |
|
"eval_samples_per_second": 909.351, |
|
"eval_steps_per_second": 56.835, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.4969, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.3209738731384277, |
|
"eval_runtime": 330.0001, |
|
"eval_samples_per_second": 909.03, |
|
"eval_steps_per_second": 56.815, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 2.3222391605377197, |
|
"eval_runtime": 331.2832, |
|
"eval_samples_per_second": 905.509, |
|
"eval_steps_per_second": 56.595, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.4961, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 2.324232339859009, |
|
"eval_runtime": 329.8848, |
|
"eval_samples_per_second": 909.348, |
|
"eval_steps_per_second": 56.835, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.3154807090759277, |
|
"eval_runtime": 330.051, |
|
"eval_samples_per_second": 908.89, |
|
"eval_steps_per_second": 56.806, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.49, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.3175013065338135, |
|
"eval_runtime": 331.4361, |
|
"eval_samples_per_second": 905.092, |
|
"eval_steps_per_second": 56.569, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 2.307647228240967, |
|
"eval_runtime": 332.1323, |
|
"eval_samples_per_second": 903.194, |
|
"eval_steps_per_second": 56.45, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.4847, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.313831090927124, |
|
"eval_runtime": 330.4544, |
|
"eval_samples_per_second": 907.78, |
|
"eval_steps_per_second": 56.737, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 2.3183014392852783, |
|
"eval_runtime": 331.0864, |
|
"eval_samples_per_second": 906.047, |
|
"eval_steps_per_second": 56.629, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.4767, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 2.3118338584899902, |
|
"eval_runtime": 330.5298, |
|
"eval_samples_per_second": 907.573, |
|
"eval_steps_per_second": 56.724, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.3151934146881104, |
|
"eval_runtime": 334.1069, |
|
"eval_samples_per_second": 897.856, |
|
"eval_steps_per_second": 56.117, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.4788, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.3089170455932617, |
|
"eval_runtime": 330.2914, |
|
"eval_samples_per_second": 908.228, |
|
"eval_steps_per_second": 56.765, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 2.3051483631134033, |
|
"eval_runtime": 330.8266, |
|
"eval_samples_per_second": 906.759, |
|
"eval_steps_per_second": 56.673, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.4738, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.310180425643921, |
|
"eval_runtime": 329.7325, |
|
"eval_samples_per_second": 909.768, |
|
"eval_steps_per_second": 56.861, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.3069398403167725, |
|
"eval_runtime": 330.3228, |
|
"eval_samples_per_second": 908.142, |
|
"eval_steps_per_second": 56.76, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.4635, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 2.3003976345062256, |
|
"eval_runtime": 331.7126, |
|
"eval_samples_per_second": 904.337, |
|
"eval_steps_per_second": 56.522, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.3066189289093018, |
|
"eval_runtime": 331.273, |
|
"eval_samples_per_second": 905.537, |
|
"eval_steps_per_second": 56.597, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.4828, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 2.307849168777466, |
|
"eval_runtime": 333.5774, |
|
"eval_samples_per_second": 899.282, |
|
"eval_steps_per_second": 56.206, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 2.3072116374969482, |
|
"eval_runtime": 330.1882, |
|
"eval_samples_per_second": 908.512, |
|
"eval_steps_per_second": 56.783, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.4675, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.3072662353515625, |
|
"eval_runtime": 330.6229, |
|
"eval_samples_per_second": 907.318, |
|
"eval_steps_per_second": 56.708, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 2.3013877868652344, |
|
"eval_runtime": 332.2733, |
|
"eval_samples_per_second": 902.811, |
|
"eval_steps_per_second": 56.426, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.4676, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 2.298736095428467, |
|
"eval_runtime": 330.3572, |
|
"eval_samples_per_second": 908.047, |
|
"eval_steps_per_second": 56.754, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.2987987995147705, |
|
"eval_runtime": 330.8652, |
|
"eval_samples_per_second": 906.653, |
|
"eval_steps_per_second": 56.667, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.4678, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.2971158027648926, |
|
"eval_runtime": 333.0983, |
|
"eval_samples_per_second": 900.575, |
|
"eval_steps_per_second": 56.287, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.2968783378601074, |
|
"eval_runtime": 331.5018, |
|
"eval_samples_per_second": 904.912, |
|
"eval_steps_per_second": 56.558, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.4634, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.2989814281463623, |
|
"eval_runtime": 333.5333, |
|
"eval_samples_per_second": 899.4, |
|
"eval_steps_per_second": 56.213, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 2.2869136333465576, |
|
"eval_runtime": 332.2841, |
|
"eval_samples_per_second": 902.782, |
|
"eval_steps_per_second": 56.425, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.4657, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.293611526489258, |
|
"eval_runtime": 331.5364, |
|
"eval_samples_per_second": 904.818, |
|
"eval_steps_per_second": 56.552, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.291510581970215, |
|
"eval_runtime": 331.7602, |
|
"eval_samples_per_second": 904.207, |
|
"eval_steps_per_second": 56.514, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.4607, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 2.290339469909668, |
|
"eval_runtime": 331.9545, |
|
"eval_samples_per_second": 903.678, |
|
"eval_steps_per_second": 56.481, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 2.2934372425079346, |
|
"eval_runtime": 334.4873, |
|
"eval_samples_per_second": 896.835, |
|
"eval_steps_per_second": 56.053, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.4558, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.284529447555542, |
|
"eval_runtime": 334.3226, |
|
"eval_samples_per_second": 897.277, |
|
"eval_steps_per_second": 56.081, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.289668083190918, |
|
"eval_runtime": 335.2625, |
|
"eval_samples_per_second": 894.761, |
|
"eval_steps_per_second": 55.923, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.4662, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 2.2928452491760254, |
|
"eval_runtime": 330.9741, |
|
"eval_samples_per_second": 906.355, |
|
"eval_steps_per_second": 56.648, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.286137580871582, |
|
"eval_runtime": 332.2239, |
|
"eval_samples_per_second": 902.945, |
|
"eval_steps_per_second": 56.435, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.4658, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 2.2883219718933105, |
|
"eval_runtime": 334.2582, |
|
"eval_samples_per_second": 897.45, |
|
"eval_steps_per_second": 56.091, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 2.287848472595215, |
|
"eval_runtime": 332.6298, |
|
"eval_samples_per_second": 901.843, |
|
"eval_steps_per_second": 56.366, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.4533, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.2891786098480225, |
|
"eval_runtime": 333.5921, |
|
"eval_samples_per_second": 899.242, |
|
"eval_steps_per_second": 56.203, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 2.2885706424713135, |
|
"eval_runtime": 332.9432, |
|
"eval_samples_per_second": 900.995, |
|
"eval_steps_per_second": 56.313, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.4575, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 2.2894177436828613, |
|
"eval_runtime": 334.1499, |
|
"eval_samples_per_second": 897.741, |
|
"eval_steps_per_second": 56.11, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.2870869636535645, |
|
"eval_runtime": 332.2509, |
|
"eval_samples_per_second": 902.872, |
|
"eval_steps_per_second": 56.43, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.4565, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 2.2797837257385254, |
|
"eval_runtime": 332.3564, |
|
"eval_samples_per_second": 902.585, |
|
"eval_steps_per_second": 56.412, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 2.2877373695373535, |
|
"eval_runtime": 332.7462, |
|
"eval_samples_per_second": 901.528, |
|
"eval_steps_per_second": 56.346, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.4548, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.2859256267547607, |
|
"eval_runtime": 333.4649, |
|
"eval_samples_per_second": 899.585, |
|
"eval_steps_per_second": 56.225, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 2.2786755561828613, |
|
"eval_runtime": 331.6465, |
|
"eval_samples_per_second": 904.517, |
|
"eval_steps_per_second": 56.533, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.4507, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 2.277973175048828, |
|
"eval_runtime": 332.624, |
|
"eval_samples_per_second": 901.859, |
|
"eval_steps_per_second": 56.367, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.2825992107391357, |
|
"eval_runtime": 332.2329, |
|
"eval_samples_per_second": 902.921, |
|
"eval_steps_per_second": 56.433, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.4455, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 2.283816337585449, |
|
"eval_runtime": 332.7513, |
|
"eval_samples_per_second": 901.514, |
|
"eval_steps_per_second": 56.345, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.2763917446136475, |
|
"eval_runtime": 331.7671, |
|
"eval_samples_per_second": 904.188, |
|
"eval_steps_per_second": 56.513, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.4516, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.281355381011963, |
|
"eval_runtime": 331.7857, |
|
"eval_samples_per_second": 904.138, |
|
"eval_steps_per_second": 56.509, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.2807059288024902, |
|
"eval_runtime": 332.6438, |
|
"eval_samples_per_second": 901.805, |
|
"eval_steps_per_second": 56.364, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.445, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 2.2740111351013184, |
|
"eval_runtime": 332.4045, |
|
"eval_samples_per_second": 902.455, |
|
"eval_steps_per_second": 56.404, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.277953624725342, |
|
"eval_runtime": 331.8291, |
|
"eval_samples_per_second": 904.02, |
|
"eval_steps_per_second": 56.502, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.4466, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.2774717807769775, |
|
"eval_runtime": 331.8071, |
|
"eval_samples_per_second": 904.079, |
|
"eval_steps_per_second": 56.506, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 2.2783188819885254, |
|
"eval_runtime": 333.2568, |
|
"eval_samples_per_second": 900.147, |
|
"eval_steps_per_second": 56.26, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.4476, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.2762770652770996, |
|
"eval_runtime": 331.8887, |
|
"eval_samples_per_second": 903.857, |
|
"eval_steps_per_second": 56.492, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 2.2737369537353516, |
|
"eval_runtime": 331.8743, |
|
"eval_samples_per_second": 903.896, |
|
"eval_steps_per_second": 56.494, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.4449, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 2.2752888202667236, |
|
"eval_runtime": 334.1528, |
|
"eval_samples_per_second": 897.733, |
|
"eval_steps_per_second": 56.109, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.276200532913208, |
|
"eval_runtime": 332.3689, |
|
"eval_samples_per_second": 902.551, |
|
"eval_steps_per_second": 56.41, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.4424, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 2.276653528213501, |
|
"eval_runtime": 332.4217, |
|
"eval_samples_per_second": 902.408, |
|
"eval_steps_per_second": 56.401, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 2.2701988220214844, |
|
"eval_runtime": 332.7419, |
|
"eval_samples_per_second": 901.54, |
|
"eval_steps_per_second": 56.347, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.4528, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.26547908782959, |
|
"eval_runtime": 332.284, |
|
"eval_samples_per_second": 902.782, |
|
"eval_steps_per_second": 56.425, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 2.272664785385132, |
|
"eval_runtime": 332.94, |
|
"eval_samples_per_second": 901.003, |
|
"eval_steps_per_second": 56.313, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.4523, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 2.2732608318328857, |
|
"eval_runtime": 332.6487, |
|
"eval_samples_per_second": 901.792, |
|
"eval_steps_per_second": 56.363, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.2654263973236084, |
|
"eval_runtime": 332.7531, |
|
"eval_samples_per_second": 901.509, |
|
"eval_steps_per_second": 56.345, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.4395, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 2.2673776149749756, |
|
"eval_runtime": 332.3327, |
|
"eval_samples_per_second": 902.65, |
|
"eval_steps_per_second": 56.416, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.275400161743164, |
|
"eval_runtime": 333.0968, |
|
"eval_samples_per_second": 900.579, |
|
"eval_steps_per_second": 56.287, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.434, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.2722461223602295, |
|
"eval_runtime": 333.3836, |
|
"eval_samples_per_second": 899.804, |
|
"eval_steps_per_second": 56.239, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 2.266554117202759, |
|
"eval_runtime": 332.9633, |
|
"eval_samples_per_second": 900.94, |
|
"eval_steps_per_second": 56.31, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.4407, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 2.265575647354126, |
|
"eval_runtime": 334.6536, |
|
"eval_samples_per_second": 896.39, |
|
"eval_steps_per_second": 56.025, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.265437602996826, |
|
"eval_runtime": 333.1051, |
|
"eval_samples_per_second": 900.556, |
|
"eval_steps_per_second": 56.286, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.4352, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 2.263028383255005, |
|
"eval_runtime": 333.7641, |
|
"eval_samples_per_second": 898.778, |
|
"eval_steps_per_second": 56.174, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 2.2662160396575928, |
|
"eval_runtime": 333.089, |
|
"eval_samples_per_second": 900.6, |
|
"eval_steps_per_second": 56.288, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.4393, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.2692363262176514, |
|
"eval_runtime": 333.5532, |
|
"eval_samples_per_second": 899.347, |
|
"eval_steps_per_second": 56.21, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 2.2558484077453613, |
|
"eval_runtime": 335.5892, |
|
"eval_samples_per_second": 893.891, |
|
"eval_steps_per_second": 55.869, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.4378, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 2.2619380950927734, |
|
"eval_runtime": 333.9818, |
|
"eval_samples_per_second": 898.193, |
|
"eval_steps_per_second": 56.138, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 2.261375665664673, |
|
"eval_runtime": 333.299, |
|
"eval_samples_per_second": 900.033, |
|
"eval_steps_per_second": 56.253, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.4392, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 2.2577741146087646, |
|
"eval_runtime": 332.5892, |
|
"eval_samples_per_second": 901.954, |
|
"eval_steps_per_second": 56.373, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 2.267181873321533, |
|
"eval_runtime": 333.717, |
|
"eval_samples_per_second": 898.905, |
|
"eval_steps_per_second": 56.182, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.437, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.2597758769989014, |
|
"eval_runtime": 334.1825, |
|
"eval_samples_per_second": 897.653, |
|
"eval_steps_per_second": 56.104, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 2.263289451599121, |
|
"eval_runtime": 333.6576, |
|
"eval_samples_per_second": 899.065, |
|
"eval_steps_per_second": 56.192, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.4388, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 2.256582260131836, |
|
"eval_runtime": 335.1086, |
|
"eval_samples_per_second": 895.172, |
|
"eval_steps_per_second": 55.949, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.255068778991699, |
|
"eval_runtime": 334.1259, |
|
"eval_samples_per_second": 897.805, |
|
"eval_steps_per_second": 56.114, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.4386, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 2.2605791091918945, |
|
"eval_runtime": 334.7883, |
|
"eval_samples_per_second": 896.029, |
|
"eval_steps_per_second": 56.003, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 2.263402223587036, |
|
"eval_runtime": 334.1108, |
|
"eval_samples_per_second": 897.846, |
|
"eval_steps_per_second": 56.116, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.4402, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.264103889465332, |
|
"eval_runtime": 334.5974, |
|
"eval_samples_per_second": 896.54, |
|
"eval_steps_per_second": 56.035, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 2.2618744373321533, |
|
"eval_runtime": 335.396, |
|
"eval_samples_per_second": 894.405, |
|
"eval_steps_per_second": 55.901, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.4442, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 2.258431911468506, |
|
"eval_runtime": 334.1391, |
|
"eval_samples_per_second": 897.77, |
|
"eval_steps_per_second": 56.111, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.257888078689575, |
|
"eval_runtime": 337.4777, |
|
"eval_samples_per_second": 888.888, |
|
"eval_steps_per_second": 55.556, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.4327, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 2.252260684967041, |
|
"eval_runtime": 335.916, |
|
"eval_samples_per_second": 893.021, |
|
"eval_steps_per_second": 55.815, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 2.2561793327331543, |
|
"eval_runtime": 335.381, |
|
"eval_samples_per_second": 894.446, |
|
"eval_steps_per_second": 55.904, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.4289, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 2.259270191192627, |
|
"eval_runtime": 338.94, |
|
"eval_samples_per_second": 885.053, |
|
"eval_steps_per_second": 55.317, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 2.256190776824951, |
|
"eval_runtime": 337.0761, |
|
"eval_samples_per_second": 889.947, |
|
"eval_steps_per_second": 55.622, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.4319, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 2.253577709197998, |
|
"eval_runtime": 337.3724, |
|
"eval_samples_per_second": 889.166, |
|
"eval_steps_per_second": 55.574, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.260322332382202, |
|
"eval_runtime": 335.9181, |
|
"eval_samples_per_second": 893.015, |
|
"eval_steps_per_second": 55.814, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.4174, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 2.2548863887786865, |
|
"eval_runtime": 336.1593, |
|
"eval_samples_per_second": 892.374, |
|
"eval_steps_per_second": 55.774, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 2.2595221996307373, |
|
"eval_runtime": 338.2665, |
|
"eval_samples_per_second": 886.816, |
|
"eval_steps_per_second": 55.427, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.4155, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 2.255467176437378, |
|
"eval_runtime": 335.0383, |
|
"eval_samples_per_second": 895.36, |
|
"eval_steps_per_second": 55.961, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_loss": 2.250143527984619, |
|
"eval_runtime": 337.3147, |
|
"eval_samples_per_second": 889.318, |
|
"eval_steps_per_second": 55.583, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.427, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 2.2528042793273926, |
|
"eval_runtime": 335.8317, |
|
"eval_samples_per_second": 893.245, |
|
"eval_steps_per_second": 55.829, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.252933979034424, |
|
"eval_runtime": 335.79, |
|
"eval_samples_per_second": 893.356, |
|
"eval_steps_per_second": 55.835, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.4222, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 2.253556251525879, |
|
"eval_runtime": 336.7473, |
|
"eval_samples_per_second": 890.816, |
|
"eval_steps_per_second": 55.677, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 2.258152723312378, |
|
"eval_runtime": 337.5276, |
|
"eval_samples_per_second": 888.757, |
|
"eval_steps_per_second": 55.548, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.4232, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.2522146701812744, |
|
"eval_runtime": 335.4197, |
|
"eval_samples_per_second": 894.342, |
|
"eval_steps_per_second": 55.897, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 2.2524819374084473, |
|
"eval_runtime": 337.4419, |
|
"eval_samples_per_second": 888.983, |
|
"eval_steps_per_second": 55.562, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.4252, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 2.2537834644317627, |
|
"eval_runtime": 336.2053, |
|
"eval_samples_per_second": 892.252, |
|
"eval_steps_per_second": 55.767, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 2.2512009143829346, |
|
"eval_runtime": 335.734, |
|
"eval_samples_per_second": 893.505, |
|
"eval_steps_per_second": 55.845, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.4209, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_loss": 2.255702018737793, |
|
"eval_runtime": 337.398, |
|
"eval_samples_per_second": 889.098, |
|
"eval_steps_per_second": 55.569, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 2.2445454597473145, |
|
"eval_runtime": 338.3834, |
|
"eval_samples_per_second": 886.509, |
|
"eval_steps_per_second": 55.408, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.4243, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.257007122039795, |
|
"eval_runtime": 336.8153, |
|
"eval_samples_per_second": 890.636, |
|
"eval_steps_per_second": 55.666, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 2.25388240814209, |
|
"eval_runtime": 339.0365, |
|
"eval_samples_per_second": 884.801, |
|
"eval_steps_per_second": 55.301, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.4278, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_loss": 2.2514150142669678, |
|
"eval_runtime": 340.5571, |
|
"eval_samples_per_second": 880.851, |
|
"eval_steps_per_second": 55.054, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 2.2454025745391846, |
|
"eval_runtime": 337.7515, |
|
"eval_samples_per_second": 888.168, |
|
"eval_steps_per_second": 55.511, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.4286, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 2.246293306350708, |
|
"eval_runtime": 339.7018, |
|
"eval_samples_per_second": 883.069, |
|
"eval_steps_per_second": 55.193, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_loss": 2.25063157081604, |
|
"eval_runtime": 336.5454, |
|
"eval_samples_per_second": 891.351, |
|
"eval_steps_per_second": 55.71, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.4274, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.2426698207855225, |
|
"eval_runtime": 339.3399, |
|
"eval_samples_per_second": 884.01, |
|
"eval_steps_per_second": 55.251, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 2.2535440921783447, |
|
"eval_runtime": 339.1007, |
|
"eval_samples_per_second": 884.634, |
|
"eval_steps_per_second": 55.29, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.4201, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 2.2516891956329346, |
|
"eval_runtime": 337.2905, |
|
"eval_samples_per_second": 889.382, |
|
"eval_steps_per_second": 55.587, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.2436001300811768, |
|
"eval_runtime": 340.5027, |
|
"eval_samples_per_second": 880.992, |
|
"eval_steps_per_second": 55.063, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.4233, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_loss": 2.242955446243286, |
|
"eval_runtime": 338.961, |
|
"eval_samples_per_second": 884.999, |
|
"eval_steps_per_second": 55.313, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_loss": 2.247040271759033, |
|
"eval_runtime": 336.8862, |
|
"eval_samples_per_second": 890.449, |
|
"eval_steps_per_second": 55.654, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.4183, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.244565963745117, |
|
"eval_runtime": 337.986, |
|
"eval_samples_per_second": 887.552, |
|
"eval_steps_per_second": 55.473, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_loss": 2.2539021968841553, |
|
"eval_runtime": 340.0782, |
|
"eval_samples_per_second": 882.091, |
|
"eval_steps_per_second": 55.131, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.428, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_loss": 2.249154806137085, |
|
"eval_runtime": 337.5652, |
|
"eval_samples_per_second": 888.658, |
|
"eval_steps_per_second": 55.542, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.2543509006500244, |
|
"eval_runtime": 337.3598, |
|
"eval_samples_per_second": 889.199, |
|
"eval_steps_per_second": 55.576, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.4206, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 2.2478220462799072, |
|
"eval_runtime": 339.2392, |
|
"eval_samples_per_second": 884.273, |
|
"eval_steps_per_second": 55.268, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 2.2420246601104736, |
|
"eval_runtime": 337.9033, |
|
"eval_samples_per_second": 887.769, |
|
"eval_steps_per_second": 55.486, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.4287, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 2.244210958480835, |
|
"eval_runtime": 337.3268, |
|
"eval_samples_per_second": 889.286, |
|
"eval_steps_per_second": 55.581, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 2.2426180839538574, |
|
"eval_runtime": 339.5586, |
|
"eval_samples_per_second": 883.441, |
|
"eval_steps_per_second": 55.216, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.4297, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_loss": 2.242596387863159, |
|
"eval_runtime": 337.7343, |
|
"eval_samples_per_second": 888.213, |
|
"eval_steps_per_second": 55.514, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.2480640411376953, |
|
"eval_runtime": 337.5382, |
|
"eval_samples_per_second": 888.729, |
|
"eval_steps_per_second": 55.546, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.4185, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_loss": 2.2448768615722656, |
|
"eval_runtime": 339.0271, |
|
"eval_samples_per_second": 884.826, |
|
"eval_steps_per_second": 55.302, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 2.246758222579956, |
|
"eval_runtime": 338.8022, |
|
"eval_samples_per_second": 885.413, |
|
"eval_steps_per_second": 55.339, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.4217, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.2466745376586914, |
|
"eval_runtime": 341.1017, |
|
"eval_samples_per_second": 879.444, |
|
"eval_steps_per_second": 54.966, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 2.2463412284851074, |
|
"eval_runtime": 340.0034, |
|
"eval_samples_per_second": 882.285, |
|
"eval_steps_per_second": 55.144, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.4144, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 2.2481906414031982, |
|
"eval_runtime": 338.7844, |
|
"eval_samples_per_second": 885.46, |
|
"eval_steps_per_second": 55.342, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 2.242440938949585, |
|
"eval_runtime": 339.569, |
|
"eval_samples_per_second": 883.414, |
|
"eval_steps_per_second": 55.214, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.4175, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.2414705753326416, |
|
"eval_runtime": 339.2204, |
|
"eval_samples_per_second": 884.322, |
|
"eval_steps_per_second": 55.271, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 2.2450637817382812, |
|
"eval_runtime": 338.8494, |
|
"eval_samples_per_second": 885.29, |
|
"eval_steps_per_second": 55.331, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.4169, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.244276285171509, |
|
"eval_runtime": 338.3144, |
|
"eval_samples_per_second": 886.69, |
|
"eval_steps_per_second": 55.419, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 2.2389209270477295, |
|
"eval_runtime": 343.9025, |
|
"eval_samples_per_second": 872.282, |
|
"eval_steps_per_second": 54.518, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.4142, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 2.2376506328582764, |
|
"eval_runtime": 338.9552, |
|
"eval_samples_per_second": 885.014, |
|
"eval_steps_per_second": 55.314, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.239941358566284, |
|
"eval_runtime": 342.029, |
|
"eval_samples_per_second": 877.06, |
|
"eval_steps_per_second": 54.817, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.4122, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 2.24470853805542, |
|
"eval_runtime": 338.988, |
|
"eval_samples_per_second": 884.928, |
|
"eval_steps_per_second": 55.309, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 2.24562931060791, |
|
"eval_runtime": 341.3425, |
|
"eval_samples_per_second": 878.824, |
|
"eval_steps_per_second": 54.927, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.4166, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 2.245072364807129, |
|
"eval_runtime": 339.7578, |
|
"eval_samples_per_second": 882.923, |
|
"eval_steps_per_second": 55.183, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 2.2368929386138916, |
|
"eval_runtime": 340.9662, |
|
"eval_samples_per_second": 879.794, |
|
"eval_steps_per_second": 54.988, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.4165, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 2.2426319122314453, |
|
"eval_runtime": 339.1777, |
|
"eval_samples_per_second": 884.433, |
|
"eval_steps_per_second": 55.278, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.238410472869873, |
|
"eval_runtime": 340.071, |
|
"eval_samples_per_second": 882.11, |
|
"eval_steps_per_second": 55.133, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.4204, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_loss": 2.245389461517334, |
|
"eval_runtime": 339.4968, |
|
"eval_samples_per_second": 883.602, |
|
"eval_steps_per_second": 55.226, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 2.242230176925659, |
|
"eval_runtime": 341.1938, |
|
"eval_samples_per_second": 879.207, |
|
"eval_steps_per_second": 54.951, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.4192, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.2423222064971924, |
|
"eval_runtime": 341.7051, |
|
"eval_samples_per_second": 877.892, |
|
"eval_steps_per_second": 54.869, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 2.2434957027435303, |
|
"eval_runtime": 344.5773, |
|
"eval_samples_per_second": 870.574, |
|
"eval_steps_per_second": 54.412, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.4167, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 2.2450661659240723, |
|
"eval_runtime": 342.2307, |
|
"eval_samples_per_second": 876.543, |
|
"eval_steps_per_second": 54.785, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.2442915439605713, |
|
"eval_runtime": 339.3897, |
|
"eval_samples_per_second": 883.881, |
|
"eval_steps_per_second": 55.243, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.4124, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 2.243011713027954, |
|
"eval_runtime": 339.9044, |
|
"eval_samples_per_second": 882.542, |
|
"eval_steps_per_second": 55.16, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_loss": 2.2422168254852295, |
|
"eval_runtime": 340.0517, |
|
"eval_samples_per_second": 882.16, |
|
"eval_steps_per_second": 55.136, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.406, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 2.2356574535369873, |
|
"eval_runtime": 339.8299, |
|
"eval_samples_per_second": 882.736, |
|
"eval_steps_per_second": 55.172, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_loss": 2.2395410537719727, |
|
"eval_runtime": 340.769, |
|
"eval_samples_per_second": 880.303, |
|
"eval_steps_per_second": 55.02, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.4166, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_loss": 2.2377548217773438, |
|
"eval_runtime": 341.8287, |
|
"eval_samples_per_second": 877.574, |
|
"eval_steps_per_second": 54.849, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 2.2419931888580322, |
|
"eval_runtime": 341.3154, |
|
"eval_samples_per_second": 878.894, |
|
"eval_steps_per_second": 54.932, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.4144, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 2.2401504516601562, |
|
"eval_runtime": 341.1783, |
|
"eval_samples_per_second": 879.247, |
|
"eval_steps_per_second": 54.954, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_loss": 2.238373041152954, |
|
"eval_runtime": 340.7212, |
|
"eval_samples_per_second": 880.427, |
|
"eval_steps_per_second": 55.027, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.4219, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.2437572479248047, |
|
"eval_runtime": 342.9314, |
|
"eval_samples_per_second": 874.752, |
|
"eval_steps_per_second": 54.673, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_loss": 2.2455334663391113, |
|
"eval_runtime": 340.3903, |
|
"eval_samples_per_second": 881.282, |
|
"eval_steps_per_second": 55.081, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.4061, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 2.2396554946899414, |
|
"eval_runtime": 342.4586, |
|
"eval_samples_per_second": 875.96, |
|
"eval_steps_per_second": 54.748, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.23541522026062, |
|
"eval_runtime": 341.3616, |
|
"eval_samples_per_second": 878.775, |
|
"eval_steps_per_second": 54.924, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.411, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_loss": 2.2392566204071045, |
|
"eval_runtime": 340.724, |
|
"eval_samples_per_second": 880.419, |
|
"eval_steps_per_second": 55.027, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_loss": 2.238832473754883, |
|
"eval_runtime": 342.3701, |
|
"eval_samples_per_second": 876.186, |
|
"eval_steps_per_second": 54.762, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.4125, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.2406108379364014, |
|
"eval_runtime": 343.1331, |
|
"eval_samples_per_second": 874.238, |
|
"eval_steps_per_second": 54.641, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_loss": 2.2330496311187744, |
|
"eval_runtime": 341.3886, |
|
"eval_samples_per_second": 878.705, |
|
"eval_steps_per_second": 54.92, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.4092, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 2.2335941791534424, |
|
"eval_runtime": 341.2812, |
|
"eval_samples_per_second": 878.982, |
|
"eval_steps_per_second": 54.937, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 2.239811658859253, |
|
"eval_runtime": 341.3993, |
|
"eval_samples_per_second": 878.678, |
|
"eval_steps_per_second": 54.918, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.4078, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 2.2368171215057373, |
|
"eval_runtime": 342.0299, |
|
"eval_samples_per_second": 877.058, |
|
"eval_steps_per_second": 54.817, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_loss": 2.236109495162964, |
|
"eval_runtime": 341.4467, |
|
"eval_samples_per_second": 878.556, |
|
"eval_steps_per_second": 54.91, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.4185, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.2378110885620117, |
|
"eval_runtime": 342.0137, |
|
"eval_samples_per_second": 877.099, |
|
"eval_steps_per_second": 54.819, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_loss": 2.2338638305664062, |
|
"eval_runtime": 341.8702, |
|
"eval_samples_per_second": 877.467, |
|
"eval_steps_per_second": 54.842, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.4088, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 2.2365610599517822, |
|
"eval_runtime": 343.0203, |
|
"eval_samples_per_second": 874.526, |
|
"eval_steps_per_second": 54.659, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.238463878631592, |
|
"eval_runtime": 342.0664, |
|
"eval_samples_per_second": 876.964, |
|
"eval_steps_per_second": 54.811, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.4095, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 2.2336812019348145, |
|
"eval_runtime": 343.3628, |
|
"eval_samples_per_second": 873.653, |
|
"eval_steps_per_second": 54.604, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 2.2413289546966553, |
|
"eval_runtime": 342.3332, |
|
"eval_samples_per_second": 876.281, |
|
"eval_steps_per_second": 54.768, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.4078, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 2.237656593322754, |
|
"eval_runtime": 342.7683, |
|
"eval_samples_per_second": 875.168, |
|
"eval_steps_per_second": 54.699, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 2.2302229404449463, |
|
"eval_runtime": 342.0809, |
|
"eval_samples_per_second": 876.927, |
|
"eval_steps_per_second": 54.809, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.4073, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 2.2356677055358887, |
|
"eval_runtime": 342.5577, |
|
"eval_samples_per_second": 875.707, |
|
"eval_steps_per_second": 54.732, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.2384088039398193, |
|
"eval_runtime": 342.428, |
|
"eval_samples_per_second": 876.038, |
|
"eval_steps_per_second": 54.753, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.4073, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 2.2321836948394775, |
|
"eval_runtime": 342.2614, |
|
"eval_samples_per_second": 876.465, |
|
"eval_steps_per_second": 54.78, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_loss": 2.234363317489624, |
|
"eval_runtime": 344.4478, |
|
"eval_samples_per_second": 870.901, |
|
"eval_steps_per_second": 54.432, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.4043, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 2.232731819152832, |
|
"eval_runtime": 342.8187, |
|
"eval_samples_per_second": 875.04, |
|
"eval_steps_per_second": 54.691, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_loss": 2.234955072402954, |
|
"eval_runtime": 343.4884, |
|
"eval_samples_per_second": 873.334, |
|
"eval_steps_per_second": 54.584, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.4082, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_loss": 2.2375595569610596, |
|
"eval_runtime": 343.7508, |
|
"eval_samples_per_second": 872.667, |
|
"eval_steps_per_second": 54.542, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 2.2363414764404297, |
|
"eval_runtime": 343.8693, |
|
"eval_samples_per_second": 872.366, |
|
"eval_steps_per_second": 54.524, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.4073, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_loss": 2.23234224319458, |
|
"eval_runtime": 342.8958, |
|
"eval_samples_per_second": 874.843, |
|
"eval_steps_per_second": 54.678, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"eval_loss": 2.2419273853302, |
|
"eval_runtime": 343.3653, |
|
"eval_samples_per_second": 873.647, |
|
"eval_steps_per_second": 54.604, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.4148, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_loss": 2.2292640209198, |
|
"eval_runtime": 344.1756, |
|
"eval_samples_per_second": 871.59, |
|
"eval_steps_per_second": 54.475, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 2.2345802783966064, |
|
"eval_runtime": 346.0562, |
|
"eval_samples_per_second": 866.854, |
|
"eval_steps_per_second": 54.179, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.4098, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_loss": 2.237226963043213, |
|
"eval_runtime": 345.4208, |
|
"eval_samples_per_second": 868.448, |
|
"eval_steps_per_second": 54.279, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 2.237149953842163, |
|
"eval_runtime": 343.8922, |
|
"eval_samples_per_second": 872.308, |
|
"eval_steps_per_second": 54.52, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.407, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_loss": 2.2396621704101562, |
|
"eval_runtime": 346.177, |
|
"eval_samples_per_second": 866.551, |
|
"eval_steps_per_second": 54.16, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"eval_loss": 2.2300214767456055, |
|
"eval_runtime": 345.6113, |
|
"eval_samples_per_second": 867.969, |
|
"eval_steps_per_second": 54.249, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.4108, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 2.2317283153533936, |
|
"eval_runtime": 344.6229, |
|
"eval_samples_per_second": 870.459, |
|
"eval_steps_per_second": 54.404, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 2.2349703311920166, |
|
"eval_runtime": 344.7164, |
|
"eval_samples_per_second": 870.223, |
|
"eval_steps_per_second": 54.39, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.4168, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_loss": 2.2343006134033203, |
|
"eval_runtime": 344.4965, |
|
"eval_samples_per_second": 870.778, |
|
"eval_steps_per_second": 54.424, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.232745885848999, |
|
"eval_runtime": 343.8717, |
|
"eval_samples_per_second": 872.36, |
|
"eval_steps_per_second": 54.523, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.4113, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 2.2363381385803223, |
|
"eval_runtime": 343.8179, |
|
"eval_samples_per_second": 872.497, |
|
"eval_steps_per_second": 54.532, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_loss": 2.231372833251953, |
|
"eval_runtime": 345.6256, |
|
"eval_samples_per_second": 867.933, |
|
"eval_steps_per_second": 54.247, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.4131, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 2.23030686378479, |
|
"eval_runtime": 344.554, |
|
"eval_samples_per_second": 870.633, |
|
"eval_steps_per_second": 54.415, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 2.2353336811065674, |
|
"eval_runtime": 345.2222, |
|
"eval_samples_per_second": 868.948, |
|
"eval_steps_per_second": 54.31, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.4129, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 2.235344886779785, |
|
"eval_runtime": 344.2446, |
|
"eval_samples_per_second": 871.415, |
|
"eval_steps_per_second": 54.464, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 2.2295796871185303, |
|
"eval_runtime": 344.0878, |
|
"eval_samples_per_second": 871.812, |
|
"eval_steps_per_second": 54.489, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.4129, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 2.2313883304595947, |
|
"eval_runtime": 344.0986, |
|
"eval_samples_per_second": 871.785, |
|
"eval_steps_per_second": 54.487, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_loss": 2.2287940979003906, |
|
"eval_runtime": 343.9635, |
|
"eval_samples_per_second": 872.127, |
|
"eval_steps_per_second": 54.509, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.4045, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.2346994876861572, |
|
"eval_runtime": 344.4276, |
|
"eval_samples_per_second": 870.952, |
|
"eval_steps_per_second": 54.435, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 2.2348926067352295, |
|
"eval_runtime": 344.0715, |
|
"eval_samples_per_second": 871.854, |
|
"eval_steps_per_second": 54.492, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.4089, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_loss": 2.231017589569092, |
|
"eval_runtime": 344.7006, |
|
"eval_samples_per_second": 870.262, |
|
"eval_steps_per_second": 54.392, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.2342352867126465, |
|
"eval_runtime": 344.3635, |
|
"eval_samples_per_second": 871.114, |
|
"eval_steps_per_second": 54.445, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.4091, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 2.2319512367248535, |
|
"eval_runtime": 345.0718, |
|
"eval_samples_per_second": 869.326, |
|
"eval_steps_per_second": 54.334, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 2.231105327606201, |
|
"eval_runtime": 345.8677, |
|
"eval_samples_per_second": 867.326, |
|
"eval_steps_per_second": 54.209, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.4137, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 2.2278153896331787, |
|
"eval_runtime": 345.3364, |
|
"eval_samples_per_second": 868.66, |
|
"eval_steps_per_second": 54.292, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_loss": 2.2343814373016357, |
|
"eval_runtime": 344.9277, |
|
"eval_samples_per_second": 869.69, |
|
"eval_steps_per_second": 54.356, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.4063, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_loss": 2.233853340148926, |
|
"eval_runtime": 346.3645, |
|
"eval_samples_per_second": 866.082, |
|
"eval_steps_per_second": 54.131, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 2.22705078125, |
|
"eval_runtime": 344.9123, |
|
"eval_samples_per_second": 869.728, |
|
"eval_steps_per_second": 54.359, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.4046, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 2.22632098197937, |
|
"eval_runtime": 346.6997, |
|
"eval_samples_per_second": 865.244, |
|
"eval_steps_per_second": 54.078, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_loss": 2.236851453781128, |
|
"eval_runtime": 346.071, |
|
"eval_samples_per_second": 866.816, |
|
"eval_steps_per_second": 54.177, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.4105, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 2.2329680919647217, |
|
"eval_runtime": 347.6747, |
|
"eval_samples_per_second": 862.818, |
|
"eval_steps_per_second": 53.927, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_loss": 2.236093521118164, |
|
"eval_runtime": 346.0239, |
|
"eval_samples_per_second": 866.934, |
|
"eval_steps_per_second": 54.184, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.4045, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 2.231955051422119, |
|
"eval_runtime": 345.4242, |
|
"eval_samples_per_second": 868.439, |
|
"eval_steps_per_second": 54.278, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.2282557487487793, |
|
"eval_runtime": 345.9755, |
|
"eval_samples_per_second": 867.056, |
|
"eval_steps_per_second": 54.192, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.4093, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_loss": 2.22622013092041, |
|
"eval_runtime": 347.3733, |
|
"eval_samples_per_second": 863.567, |
|
"eval_steps_per_second": 53.974, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_loss": 2.229443311691284, |
|
"eval_runtime": 346.1833, |
|
"eval_samples_per_second": 866.535, |
|
"eval_steps_per_second": 54.159, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.4109, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.233405351638794, |
|
"eval_runtime": 346.0615, |
|
"eval_samples_per_second": 866.84, |
|
"eval_steps_per_second": 54.178, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_loss": 2.236346483230591, |
|
"eval_runtime": 345.3461, |
|
"eval_samples_per_second": 868.636, |
|
"eval_steps_per_second": 54.29, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.4061, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_loss": 2.2308871746063232, |
|
"eval_runtime": 347.2972, |
|
"eval_samples_per_second": 863.756, |
|
"eval_steps_per_second": 53.985, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.2269339561462402, |
|
"eval_runtime": 347.1506, |
|
"eval_samples_per_second": 864.121, |
|
"eval_steps_per_second": 54.008, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.4007, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 2.236927032470703, |
|
"eval_runtime": 347.4425, |
|
"eval_samples_per_second": 863.395, |
|
"eval_steps_per_second": 53.963, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_loss": 2.229724168777466, |
|
"eval_runtime": 345.6431, |
|
"eval_samples_per_second": 867.889, |
|
"eval_steps_per_second": 54.244, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.4034, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.2266740798950195, |
|
"eval_runtime": 346.5718, |
|
"eval_samples_per_second": 865.564, |
|
"eval_steps_per_second": 54.098, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_loss": 2.2310221195220947, |
|
"eval_runtime": 346.6214, |
|
"eval_samples_per_second": 865.44, |
|
"eval_steps_per_second": 54.091, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.4049, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"eval_loss": 2.236175060272217, |
|
"eval_runtime": 348.3819, |
|
"eval_samples_per_second": 861.067, |
|
"eval_steps_per_second": 53.817, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.231903076171875, |
|
"eval_runtime": 347.5521, |
|
"eval_samples_per_second": 863.122, |
|
"eval_steps_per_second": 53.946, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.4052, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 2.2307627201080322, |
|
"eval_runtime": 347.1934, |
|
"eval_samples_per_second": 864.014, |
|
"eval_steps_per_second": 54.002, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"eval_loss": 2.2225306034088135, |
|
"eval_runtime": 347.1345, |
|
"eval_samples_per_second": 864.161, |
|
"eval_steps_per_second": 54.011, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.4102, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.2365851402282715, |
|
"eval_runtime": 347.8217, |
|
"eval_samples_per_second": 862.453, |
|
"eval_steps_per_second": 53.904, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 2.232743740081787, |
|
"eval_runtime": 347.6812, |
|
"eval_samples_per_second": 862.802, |
|
"eval_steps_per_second": 53.926, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.4046, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_loss": 2.2304911613464355, |
|
"eval_runtime": 348.0072, |
|
"eval_samples_per_second": 861.994, |
|
"eval_steps_per_second": 53.875, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.230863094329834, |
|
"eval_runtime": 347.1957, |
|
"eval_samples_per_second": 864.008, |
|
"eval_steps_per_second": 54.001, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.4066, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 2.2291176319122314, |
|
"eval_runtime": 347.3697, |
|
"eval_samples_per_second": 863.576, |
|
"eval_steps_per_second": 53.974, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 2.2300875186920166, |
|
"eval_runtime": 348.7468, |
|
"eval_samples_per_second": 860.165, |
|
"eval_steps_per_second": 53.761, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.4041, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 2.237844467163086, |
|
"eval_runtime": 347.5443, |
|
"eval_samples_per_second": 863.142, |
|
"eval_steps_per_second": 53.947, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_loss": 2.2317147254943848, |
|
"eval_runtime": 348.3274, |
|
"eval_samples_per_second": 861.201, |
|
"eval_steps_per_second": 53.826, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.4081, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_loss": 2.232565402984619, |
|
"eval_runtime": 349.1958, |
|
"eval_samples_per_second": 859.059, |
|
"eval_steps_per_second": 53.692, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_loss": 2.2412142753601074, |
|
"eval_runtime": 347.7133, |
|
"eval_samples_per_second": 862.722, |
|
"eval_steps_per_second": 53.921, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.4147, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 2.2348580360412598, |
|
"eval_runtime": 348.9265, |
|
"eval_samples_per_second": 859.722, |
|
"eval_steps_per_second": 53.733, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_loss": 2.229579210281372, |
|
"eval_runtime": 348.1888, |
|
"eval_samples_per_second": 861.544, |
|
"eval_steps_per_second": 53.847, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.4105, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 2.231281280517578, |
|
"eval_runtime": 348.2564, |
|
"eval_samples_per_second": 861.377, |
|
"eval_steps_per_second": 53.837, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"eval_loss": 2.229707717895508, |
|
"eval_runtime": 349.2314, |
|
"eval_samples_per_second": 858.972, |
|
"eval_steps_per_second": 53.686, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.4096, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 2.2240703105926514, |
|
"eval_runtime": 348.1981, |
|
"eval_samples_per_second": 861.521, |
|
"eval_steps_per_second": 53.846, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 2.232208490371704, |
|
"eval_runtime": 349.4311, |
|
"eval_samples_per_second": 858.481, |
|
"eval_steps_per_second": 53.656, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.4089, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 2.234354019165039, |
|
"eval_runtime": 349.741, |
|
"eval_samples_per_second": 857.72, |
|
"eval_steps_per_second": 53.608, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_loss": 2.229137659072876, |
|
"eval_runtime": 349.1189, |
|
"eval_samples_per_second": 859.249, |
|
"eval_steps_per_second": 53.704, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0, |
|
"loss": 2.4048, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.227388858795166, |
|
"eval_runtime": 348.8655, |
|
"eval_samples_per_second": 859.873, |
|
"eval_steps_per_second": 53.743, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"step": 2400000, |
|
"total_flos": 7.587638746774346e+17, |
|
"train_loss": 2.443978935546875, |
|
"train_runtime": 257950.5102, |
|
"train_samples_per_second": 148.866, |
|
"train_steps_per_second": 9.304 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 7, |
|
"save_steps": 32000, |
|
"total_flos": 7.587638746774346e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|