|
{ |
|
"best_metric": 1.8842545747756958, |
|
"best_model_checkpoint": "./model_tweets_2020_Q1_90/checkpoint-1440000", |
|
"epoch": 49.171259398881354, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.2375540733337402, |
|
"eval_runtime": 45.4292, |
|
"eval_samples_per_second": 904.749, |
|
"eval_steps_per_second": 56.55, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.4394, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.155681610107422, |
|
"eval_runtime": 44.9417, |
|
"eval_samples_per_second": 914.562, |
|
"eval_steps_per_second": 57.163, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.0965232849121094, |
|
"eval_runtime": 45.1589, |
|
"eval_samples_per_second": 910.165, |
|
"eval_steps_per_second": 56.888, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.2403, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.063685894012451, |
|
"eval_runtime": 45.4617, |
|
"eval_samples_per_second": 904.101, |
|
"eval_steps_per_second": 56.509, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 2.062025308609009, |
|
"eval_runtime": 46.1664, |
|
"eval_samples_per_second": 890.301, |
|
"eval_steps_per_second": 55.647, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.1859, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.0426833629608154, |
|
"eval_runtime": 44.881, |
|
"eval_samples_per_second": 915.8, |
|
"eval_steps_per_second": 57.24, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.044032335281372, |
|
"eval_runtime": 45.0876, |
|
"eval_samples_per_second": 911.602, |
|
"eval_steps_per_second": 56.978, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.1472, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.017690420150757, |
|
"eval_runtime": 45.0, |
|
"eval_samples_per_second": 913.377, |
|
"eval_steps_per_second": 57.089, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 1.9980425834655762, |
|
"eval_runtime": 44.953, |
|
"eval_samples_per_second": 914.333, |
|
"eval_steps_per_second": 57.149, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.1334, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.0021114349365234, |
|
"eval_runtime": 45.3203, |
|
"eval_samples_per_second": 906.923, |
|
"eval_steps_per_second": 56.685, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 1.9963111877441406, |
|
"eval_runtime": 44.8632, |
|
"eval_samples_per_second": 916.163, |
|
"eval_steps_per_second": 57.263, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.1271, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.9917570352554321, |
|
"eval_runtime": 45.4669, |
|
"eval_samples_per_second": 903.999, |
|
"eval_steps_per_second": 56.503, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 1.988930344581604, |
|
"eval_runtime": 45.0716, |
|
"eval_samples_per_second": 911.927, |
|
"eval_steps_per_second": 56.998, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.1065, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 1.9688993692398071, |
|
"eval_runtime": 44.5304, |
|
"eval_samples_per_second": 923.009, |
|
"eval_steps_per_second": 57.691, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.991935133934021, |
|
"eval_runtime": 45.9679, |
|
"eval_samples_per_second": 894.146, |
|
"eval_steps_per_second": 55.887, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.105, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.9705941677093506, |
|
"eval_runtime": 45.4865, |
|
"eval_samples_per_second": 903.609, |
|
"eval_steps_per_second": 56.478, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 1.9724736213684082, |
|
"eval_runtime": 44.9361, |
|
"eval_samples_per_second": 914.676, |
|
"eval_steps_per_second": 57.17, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.1033, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 2.000924825668335, |
|
"eval_runtime": 45.1849, |
|
"eval_samples_per_second": 909.641, |
|
"eval_steps_per_second": 56.855, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 1.9660991430282593, |
|
"eval_runtime": 45.122, |
|
"eval_samples_per_second": 910.909, |
|
"eval_steps_per_second": 56.935, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.0934, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 1.964065432548523, |
|
"eval_runtime": 45.5781, |
|
"eval_samples_per_second": 901.792, |
|
"eval_steps_per_second": 56.365, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 1.973306655883789, |
|
"eval_runtime": 45.0146, |
|
"eval_samples_per_second": 913.081, |
|
"eval_steps_per_second": 57.07, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.0899, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 1.9746649265289307, |
|
"eval_runtime": 45.1942, |
|
"eval_samples_per_second": 909.454, |
|
"eval_steps_per_second": 56.844, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 1.9442167282104492, |
|
"eval_runtime": 44.9807, |
|
"eval_samples_per_second": 913.769, |
|
"eval_steps_per_second": 57.113, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.0903, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 1.9585589170455933, |
|
"eval_runtime": 45.3512, |
|
"eval_samples_per_second": 906.304, |
|
"eval_steps_per_second": 56.647, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 1.9585614204406738, |
|
"eval_runtime": 44.9754, |
|
"eval_samples_per_second": 913.877, |
|
"eval_steps_per_second": 57.12, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.0842, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 1.9401671886444092, |
|
"eval_runtime": 45.4042, |
|
"eval_samples_per_second": 905.247, |
|
"eval_steps_per_second": 56.581, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 1.9483397006988525, |
|
"eval_runtime": 45.1835, |
|
"eval_samples_per_second": 909.668, |
|
"eval_steps_per_second": 56.857, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.0761, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 1.9532350301742554, |
|
"eval_runtime": 45.6838, |
|
"eval_samples_per_second": 899.706, |
|
"eval_steps_per_second": 56.234, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 1.945618748664856, |
|
"eval_runtime": 44.9815, |
|
"eval_samples_per_second": 913.754, |
|
"eval_steps_per_second": 57.112, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.0799, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 1.9322354793548584, |
|
"eval_runtime": 45.7219, |
|
"eval_samples_per_second": 898.957, |
|
"eval_steps_per_second": 56.188, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 1.9460111856460571, |
|
"eval_runtime": 44.9207, |
|
"eval_samples_per_second": 914.991, |
|
"eval_steps_per_second": 57.19, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.0704, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 1.9477649927139282, |
|
"eval_runtime": 45.4934, |
|
"eval_samples_per_second": 903.472, |
|
"eval_steps_per_second": 56.47, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 1.943530559539795, |
|
"eval_runtime": 45.5332, |
|
"eval_samples_per_second": 902.683, |
|
"eval_steps_per_second": 56.42, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.0727, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 1.9356465339660645, |
|
"eval_runtime": 45.2443, |
|
"eval_samples_per_second": 908.445, |
|
"eval_steps_per_second": 56.781, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 1.9543371200561523, |
|
"eval_runtime": 44.9666, |
|
"eval_samples_per_second": 914.056, |
|
"eval_steps_per_second": 57.131, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.073, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_loss": 1.9541795253753662, |
|
"eval_runtime": 45.5545, |
|
"eval_samples_per_second": 902.26, |
|
"eval_steps_per_second": 56.394, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 1.9503259658813477, |
|
"eval_runtime": 45.0063, |
|
"eval_samples_per_second": 913.25, |
|
"eval_steps_per_second": 57.081, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.0647, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 1.9437284469604492, |
|
"eval_runtime": 45.0801, |
|
"eval_samples_per_second": 911.755, |
|
"eval_steps_per_second": 56.987, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 1.9450260400772095, |
|
"eval_runtime": 45.0125, |
|
"eval_samples_per_second": 913.125, |
|
"eval_steps_per_second": 57.073, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.0668, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 1.9220808744430542, |
|
"eval_runtime": 45.7281, |
|
"eval_samples_per_second": 898.834, |
|
"eval_steps_per_second": 56.18, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 1.927699089050293, |
|
"eval_runtime": 45.3402, |
|
"eval_samples_per_second": 906.525, |
|
"eval_steps_per_second": 56.661, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.0695, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_loss": 1.935697317123413, |
|
"eval_runtime": 45.1353, |
|
"eval_samples_per_second": 910.64, |
|
"eval_steps_per_second": 56.918, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 1.924439549446106, |
|
"eval_runtime": 45.0671, |
|
"eval_samples_per_second": 912.017, |
|
"eval_steps_per_second": 57.004, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.0652, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 1.938655138015747, |
|
"eval_runtime": 45.305, |
|
"eval_samples_per_second": 907.228, |
|
"eval_steps_per_second": 56.704, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 1.9354636669158936, |
|
"eval_runtime": 45.6689, |
|
"eval_samples_per_second": 900.0, |
|
"eval_steps_per_second": 56.253, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.0607, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"eval_loss": 1.9389930963516235, |
|
"eval_runtime": 45.2501, |
|
"eval_samples_per_second": 908.33, |
|
"eval_steps_per_second": 56.773, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 1.9358594417572021, |
|
"eval_runtime": 45.5644, |
|
"eval_samples_per_second": 902.064, |
|
"eval_steps_per_second": 56.382, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.0707, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_loss": 1.9393250942230225, |
|
"eval_runtime": 45.0739, |
|
"eval_samples_per_second": 911.881, |
|
"eval_steps_per_second": 56.995, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 1.9217476844787598, |
|
"eval_runtime": 45.0711, |
|
"eval_samples_per_second": 911.938, |
|
"eval_steps_per_second": 56.999, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.0621, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 1.9284311532974243, |
|
"eval_runtime": 45.6293, |
|
"eval_samples_per_second": 900.781, |
|
"eval_steps_per_second": 56.302, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 1.9383472204208374, |
|
"eval_runtime": 46.4801, |
|
"eval_samples_per_second": 884.292, |
|
"eval_steps_per_second": 55.271, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.0643, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 1.922420859336853, |
|
"eval_runtime": 45.4784, |
|
"eval_samples_per_second": 903.77, |
|
"eval_steps_per_second": 56.488, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 1.928429365158081, |
|
"eval_runtime": 45.9472, |
|
"eval_samples_per_second": 894.549, |
|
"eval_steps_per_second": 55.912, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.0498, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 1.9399133920669556, |
|
"eval_runtime": 46.3003, |
|
"eval_samples_per_second": 887.726, |
|
"eval_steps_per_second": 55.486, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_loss": 1.9427006244659424, |
|
"eval_runtime": 46.6439, |
|
"eval_samples_per_second": 881.187, |
|
"eval_steps_per_second": 55.077, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.0689, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"eval_loss": 1.9351186752319336, |
|
"eval_runtime": 45.2002, |
|
"eval_samples_per_second": 909.333, |
|
"eval_steps_per_second": 56.836, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 1.9312766790390015, |
|
"eval_runtime": 47.0788, |
|
"eval_samples_per_second": 873.047, |
|
"eval_steps_per_second": 54.568, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.0511, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_loss": 1.9392344951629639, |
|
"eval_runtime": 47.5719, |
|
"eval_samples_per_second": 863.997, |
|
"eval_steps_per_second": 54.002, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 1.9278026819229126, |
|
"eval_runtime": 48.6246, |
|
"eval_samples_per_second": 845.292, |
|
"eval_steps_per_second": 52.833, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.0664, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_loss": 1.9384809732437134, |
|
"eval_runtime": 48.0322, |
|
"eval_samples_per_second": 855.717, |
|
"eval_steps_per_second": 53.485, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.9471704959869385, |
|
"eval_runtime": 46.9, |
|
"eval_samples_per_second": 876.375, |
|
"eval_steps_per_second": 54.776, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.0565, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"eval_loss": 1.937743902206421, |
|
"eval_runtime": 48.1967, |
|
"eval_samples_per_second": 852.796, |
|
"eval_steps_per_second": 53.302, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"eval_loss": 1.9480650424957275, |
|
"eval_runtime": 47.3213, |
|
"eval_samples_per_second": 868.572, |
|
"eval_steps_per_second": 54.288, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.0566, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"eval_loss": 1.9453697204589844, |
|
"eval_runtime": 47.4611, |
|
"eval_samples_per_second": 866.014, |
|
"eval_steps_per_second": 54.128, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"eval_loss": 1.9244284629821777, |
|
"eval_runtime": 48.2751, |
|
"eval_samples_per_second": 851.412, |
|
"eval_steps_per_second": 53.216, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.0523, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_loss": 1.935764193534851, |
|
"eval_runtime": 47.3047, |
|
"eval_samples_per_second": 868.877, |
|
"eval_steps_per_second": 54.307, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 1.9176356792449951, |
|
"eval_runtime": 46.998, |
|
"eval_samples_per_second": 874.549, |
|
"eval_steps_per_second": 54.662, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.0554, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"eval_loss": 1.9284305572509766, |
|
"eval_runtime": 47.0847, |
|
"eval_samples_per_second": 872.937, |
|
"eval_steps_per_second": 54.561, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"eval_loss": 1.9287078380584717, |
|
"eval_runtime": 47.0031, |
|
"eval_samples_per_second": 874.453, |
|
"eval_steps_per_second": 54.656, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.0485, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"eval_loss": 1.923736810684204, |
|
"eval_runtime": 45.756, |
|
"eval_samples_per_second": 898.286, |
|
"eval_steps_per_second": 56.146, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"eval_loss": 1.920929193496704, |
|
"eval_runtime": 48.761, |
|
"eval_samples_per_second": 842.927, |
|
"eval_steps_per_second": 52.686, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.0485, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_loss": 1.9261997938156128, |
|
"eval_runtime": 47.8227, |
|
"eval_samples_per_second": 859.467, |
|
"eval_steps_per_second": 53.719, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"eval_loss": 1.9207805395126343, |
|
"eval_runtime": 47.2192, |
|
"eval_samples_per_second": 870.452, |
|
"eval_steps_per_second": 54.406, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.0542, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 1.9320107698440552, |
|
"eval_runtime": 47.8604, |
|
"eval_samples_per_second": 858.789, |
|
"eval_steps_per_second": 53.677, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"eval_loss": 1.9076824188232422, |
|
"eval_runtime": 48.2616, |
|
"eval_samples_per_second": 851.651, |
|
"eval_steps_per_second": 53.231, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.0527, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 1.9248530864715576, |
|
"eval_runtime": 46.9547, |
|
"eval_samples_per_second": 875.353, |
|
"eval_steps_per_second": 54.712, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"eval_loss": 1.919195532798767, |
|
"eval_runtime": 47.8559, |
|
"eval_samples_per_second": 858.87, |
|
"eval_steps_per_second": 53.682, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.0606, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"eval_loss": 1.9151537418365479, |
|
"eval_runtime": 47.7757, |
|
"eval_samples_per_second": 860.311, |
|
"eval_steps_per_second": 53.772, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_loss": 1.9194060564041138, |
|
"eval_runtime": 46.9693, |
|
"eval_samples_per_second": 875.082, |
|
"eval_steps_per_second": 54.695, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.0542, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 1.919755458831787, |
|
"eval_runtime": 48.4882, |
|
"eval_samples_per_second": 847.671, |
|
"eval_steps_per_second": 52.982, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"eval_loss": 1.9134645462036133, |
|
"eval_runtime": 49.7276, |
|
"eval_samples_per_second": 826.544, |
|
"eval_steps_per_second": 51.662, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.0593, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_loss": 1.919248342514038, |
|
"eval_runtime": 46.444, |
|
"eval_samples_per_second": 884.98, |
|
"eval_steps_per_second": 55.314, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_loss": 1.9256792068481445, |
|
"eval_runtime": 46.3375, |
|
"eval_samples_per_second": 887.013, |
|
"eval_steps_per_second": 55.441, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.0467, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"eval_loss": 1.9134962558746338, |
|
"eval_runtime": 48.3494, |
|
"eval_samples_per_second": 850.104, |
|
"eval_steps_per_second": 53.134, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"eval_loss": 1.8994532823562622, |
|
"eval_runtime": 46.5628, |
|
"eval_samples_per_second": 882.721, |
|
"eval_steps_per_second": 55.173, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.0535, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 1.9305521249771118, |
|
"eval_runtime": 46.8843, |
|
"eval_samples_per_second": 876.669, |
|
"eval_steps_per_second": 54.795, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"eval_loss": 1.927839994430542, |
|
"eval_runtime": 48.051, |
|
"eval_samples_per_second": 855.384, |
|
"eval_steps_per_second": 53.464, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.0559, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 1.9136629104614258, |
|
"eval_runtime": 47.4852, |
|
"eval_samples_per_second": 865.574, |
|
"eval_steps_per_second": 54.101, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"eval_loss": 1.9164540767669678, |
|
"eval_runtime": 46.9282, |
|
"eval_samples_per_second": 875.849, |
|
"eval_steps_per_second": 54.743, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.0544, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"eval_loss": 1.921919345855713, |
|
"eval_runtime": 50.0641, |
|
"eval_samples_per_second": 820.987, |
|
"eval_steps_per_second": 51.314, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 1.9200448989868164, |
|
"eval_runtime": 50.8477, |
|
"eval_samples_per_second": 808.336, |
|
"eval_steps_per_second": 50.523, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.0493, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"eval_loss": 1.924161434173584, |
|
"eval_runtime": 46.3633, |
|
"eval_samples_per_second": 886.519, |
|
"eval_steps_per_second": 55.41, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"eval_loss": 1.9263921976089478, |
|
"eval_runtime": 48.7265, |
|
"eval_samples_per_second": 843.525, |
|
"eval_steps_per_second": 52.723, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.0538, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"eval_loss": 1.9333122968673706, |
|
"eval_runtime": 48.0632, |
|
"eval_samples_per_second": 855.165, |
|
"eval_steps_per_second": 53.45, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"eval_loss": 1.912643551826477, |
|
"eval_runtime": 46.2548, |
|
"eval_samples_per_second": 888.599, |
|
"eval_steps_per_second": 55.54, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.0457, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"eval_loss": 1.9022926092147827, |
|
"eval_runtime": 48.0637, |
|
"eval_samples_per_second": 855.157, |
|
"eval_steps_per_second": 53.45, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"eval_loss": 1.9153952598571777, |
|
"eval_runtime": 47.4167, |
|
"eval_samples_per_second": 866.825, |
|
"eval_steps_per_second": 54.179, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.0436, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"eval_loss": 1.894668698310852, |
|
"eval_runtime": 47.3621, |
|
"eval_samples_per_second": 867.824, |
|
"eval_steps_per_second": 54.242, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"eval_loss": 1.9197973012924194, |
|
"eval_runtime": 48.7318, |
|
"eval_samples_per_second": 843.432, |
|
"eval_steps_per_second": 52.717, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.0527, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"eval_loss": 1.912539005279541, |
|
"eval_runtime": 47.1232, |
|
"eval_samples_per_second": 872.224, |
|
"eval_steps_per_second": 54.517, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"eval_loss": 1.9129201173782349, |
|
"eval_runtime": 46.5511, |
|
"eval_samples_per_second": 882.944, |
|
"eval_steps_per_second": 55.187, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.0484, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"eval_loss": 1.930238127708435, |
|
"eval_runtime": 48.6118, |
|
"eval_samples_per_second": 845.514, |
|
"eval_steps_per_second": 52.847, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"eval_loss": 1.915164589881897, |
|
"eval_runtime": 47.3129, |
|
"eval_samples_per_second": 868.727, |
|
"eval_steps_per_second": 54.298, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.0535, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_loss": 1.9222667217254639, |
|
"eval_runtime": 48.8011, |
|
"eval_samples_per_second": 842.235, |
|
"eval_steps_per_second": 52.642, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"eval_loss": 1.9195363521575928, |
|
"eval_runtime": 47.9497, |
|
"eval_samples_per_second": 857.19, |
|
"eval_steps_per_second": 53.577, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.0516, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"eval_loss": 1.9071532487869263, |
|
"eval_runtime": 46.9027, |
|
"eval_samples_per_second": 876.325, |
|
"eval_steps_per_second": 54.773, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"eval_loss": 1.9210063219070435, |
|
"eval_runtime": 48.1848, |
|
"eval_samples_per_second": 853.007, |
|
"eval_steps_per_second": 53.316, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.0546, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_loss": 1.9014463424682617, |
|
"eval_runtime": 49.087, |
|
"eval_samples_per_second": 837.329, |
|
"eval_steps_per_second": 52.336, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_loss": 1.9129884243011475, |
|
"eval_runtime": 46.451, |
|
"eval_samples_per_second": 884.847, |
|
"eval_steps_per_second": 55.306, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.0439, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_loss": 1.9064290523529053, |
|
"eval_runtime": 48.2272, |
|
"eval_samples_per_second": 852.258, |
|
"eval_steps_per_second": 53.269, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"eval_loss": 1.9166321754455566, |
|
"eval_runtime": 48.6696, |
|
"eval_samples_per_second": 844.511, |
|
"eval_steps_per_second": 52.785, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.0391, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"eval_loss": 1.930993676185608, |
|
"eval_runtime": 47.1469, |
|
"eval_samples_per_second": 871.786, |
|
"eval_steps_per_second": 54.489, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_loss": 1.9064345359802246, |
|
"eval_runtime": 45.9523, |
|
"eval_samples_per_second": 894.45, |
|
"eval_steps_per_second": 55.906, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.0568, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 1.9106640815734863, |
|
"eval_runtime": 47.4421, |
|
"eval_samples_per_second": 866.361, |
|
"eval_steps_per_second": 54.15, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"eval_loss": 1.931686520576477, |
|
"eval_runtime": 47.8842, |
|
"eval_samples_per_second": 858.363, |
|
"eval_steps_per_second": 53.65, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.047, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_loss": 1.9234881401062012, |
|
"eval_runtime": 47.1905, |
|
"eval_samples_per_second": 870.981, |
|
"eval_steps_per_second": 54.439, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"eval_loss": 1.917264699935913, |
|
"eval_runtime": 48.2222, |
|
"eval_samples_per_second": 852.346, |
|
"eval_steps_per_second": 53.274, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.0431, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"eval_loss": 1.8946489095687866, |
|
"eval_runtime": 48.6788, |
|
"eval_samples_per_second": 844.352, |
|
"eval_steps_per_second": 52.775, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"eval_loss": 1.9247366189956665, |
|
"eval_runtime": 47.3908, |
|
"eval_samples_per_second": 867.3, |
|
"eval_steps_per_second": 54.209, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.0444, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"eval_loss": 1.9105546474456787, |
|
"eval_runtime": 47.3389, |
|
"eval_samples_per_second": 868.249, |
|
"eval_steps_per_second": 54.268, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"eval_loss": 1.9022789001464844, |
|
"eval_runtime": 48.8131, |
|
"eval_samples_per_second": 842.027, |
|
"eval_steps_per_second": 52.629, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.0465, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.9196343421936035, |
|
"eval_runtime": 47.9588, |
|
"eval_samples_per_second": 857.027, |
|
"eval_steps_per_second": 53.567, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"eval_loss": 1.9111627340316772, |
|
"eval_runtime": 48.5638, |
|
"eval_samples_per_second": 846.35, |
|
"eval_steps_per_second": 52.899, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.0454, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"eval_loss": 1.9045841693878174, |
|
"eval_runtime": 47.3858, |
|
"eval_samples_per_second": 867.39, |
|
"eval_steps_per_second": 54.215, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"eval_loss": 1.9031916856765747, |
|
"eval_runtime": 47.9829, |
|
"eval_samples_per_second": 856.596, |
|
"eval_steps_per_second": 53.54, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.04, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"eval_loss": 1.9129735231399536, |
|
"eval_runtime": 48.8696, |
|
"eval_samples_per_second": 841.055, |
|
"eval_steps_per_second": 52.568, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"eval_loss": 1.922319769859314, |
|
"eval_runtime": 48.2743, |
|
"eval_samples_per_second": 851.427, |
|
"eval_steps_per_second": 53.217, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.0406, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"eval_loss": 1.9261207580566406, |
|
"eval_runtime": 48.256, |
|
"eval_samples_per_second": 851.749, |
|
"eval_steps_per_second": 53.237, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"eval_loss": 1.9014304876327515, |
|
"eval_runtime": 47.3123, |
|
"eval_samples_per_second": 868.737, |
|
"eval_steps_per_second": 54.299, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.0401, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"eval_loss": 1.905246615409851, |
|
"eval_runtime": 47.017, |
|
"eval_samples_per_second": 874.195, |
|
"eval_steps_per_second": 54.64, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"eval_loss": 1.9004502296447754, |
|
"eval_runtime": 47.6152, |
|
"eval_samples_per_second": 863.213, |
|
"eval_steps_per_second": 53.953, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.044, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"eval_loss": 1.9044198989868164, |
|
"eval_runtime": 48.1051, |
|
"eval_samples_per_second": 854.421, |
|
"eval_steps_per_second": 53.404, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"eval_loss": 1.9169893264770508, |
|
"eval_runtime": 49.4919, |
|
"eval_samples_per_second": 830.479, |
|
"eval_steps_per_second": 51.907, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.0401, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"eval_loss": 1.9103343486785889, |
|
"eval_runtime": 47.4547, |
|
"eval_samples_per_second": 866.132, |
|
"eval_steps_per_second": 54.136, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"eval_loss": 1.8970900774002075, |
|
"eval_runtime": 46.9831, |
|
"eval_samples_per_second": 874.825, |
|
"eval_steps_per_second": 54.679, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.0458, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"eval_loss": 1.9256750345230103, |
|
"eval_runtime": 49.8527, |
|
"eval_samples_per_second": 824.47, |
|
"eval_steps_per_second": 51.532, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.45, |
|
"eval_loss": 1.9029202461242676, |
|
"eval_runtime": 48.7208, |
|
"eval_samples_per_second": 843.623, |
|
"eval_steps_per_second": 52.729, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.0414, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"eval_loss": 1.915024995803833, |
|
"eval_runtime": 47.4381, |
|
"eval_samples_per_second": 866.434, |
|
"eval_steps_per_second": 54.155, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"eval_loss": 1.9123761653900146, |
|
"eval_runtime": 48.794, |
|
"eval_samples_per_second": 842.358, |
|
"eval_steps_per_second": 52.65, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.0419, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"eval_loss": 1.9029767513275146, |
|
"eval_runtime": 46.4756, |
|
"eval_samples_per_second": 884.378, |
|
"eval_steps_per_second": 55.276, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"eval_loss": 1.9144717454910278, |
|
"eval_runtime": 47.2249, |
|
"eval_samples_per_second": 870.345, |
|
"eval_steps_per_second": 54.399, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.0415, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"eval_loss": 1.9132155179977417, |
|
"eval_runtime": 49.2316, |
|
"eval_samples_per_second": 834.869, |
|
"eval_steps_per_second": 52.182, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"eval_loss": 1.9053661823272705, |
|
"eval_runtime": 47.1942, |
|
"eval_samples_per_second": 870.913, |
|
"eval_steps_per_second": 54.435, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.0394, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"eval_loss": 1.915486216545105, |
|
"eval_runtime": 47.1108, |
|
"eval_samples_per_second": 872.454, |
|
"eval_steps_per_second": 54.531, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"eval_loss": 1.9147080183029175, |
|
"eval_runtime": 48.0401, |
|
"eval_samples_per_second": 855.578, |
|
"eval_steps_per_second": 53.476, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.0414, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"eval_loss": 1.9130446910858154, |
|
"eval_runtime": 47.5065, |
|
"eval_samples_per_second": 865.187, |
|
"eval_steps_per_second": 54.077, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 24.09, |
|
"eval_loss": 1.9002370834350586, |
|
"eval_runtime": 46.3893, |
|
"eval_samples_per_second": 886.023, |
|
"eval_steps_per_second": 55.379, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.036, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"eval_loss": 1.899147391319275, |
|
"eval_runtime": 49.4488, |
|
"eval_samples_per_second": 831.204, |
|
"eval_steps_per_second": 51.953, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"eval_loss": 1.9202589988708496, |
|
"eval_runtime": 47.2347, |
|
"eval_samples_per_second": 870.165, |
|
"eval_steps_per_second": 54.388, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.0393, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"eval_loss": 1.9327338933944702, |
|
"eval_runtime": 47.2461, |
|
"eval_samples_per_second": 869.956, |
|
"eval_steps_per_second": 54.375, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"eval_loss": 1.9098608493804932, |
|
"eval_runtime": 49.039, |
|
"eval_samples_per_second": 838.15, |
|
"eval_steps_per_second": 52.387, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.0375, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"eval_loss": 1.9097236394882202, |
|
"eval_runtime": 46.2815, |
|
"eval_samples_per_second": 888.088, |
|
"eval_steps_per_second": 55.508, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"eval_loss": 1.900553584098816, |
|
"eval_runtime": 47.0809, |
|
"eval_samples_per_second": 873.008, |
|
"eval_steps_per_second": 54.566, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.0384, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"eval_loss": 1.9062752723693848, |
|
"eval_runtime": 48.6722, |
|
"eval_samples_per_second": 844.466, |
|
"eval_steps_per_second": 52.782, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.41, |
|
"eval_loss": 1.9056702852249146, |
|
"eval_runtime": 46.3936, |
|
"eval_samples_per_second": 885.941, |
|
"eval_steps_per_second": 55.374, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.0392, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"eval_loss": 1.9037301540374756, |
|
"eval_runtime": 47.9627, |
|
"eval_samples_per_second": 856.958, |
|
"eval_steps_per_second": 53.563, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 25.73, |
|
"eval_loss": 1.9013148546218872, |
|
"eval_runtime": 47.5129, |
|
"eval_samples_per_second": 865.07, |
|
"eval_steps_per_second": 54.07, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.0503, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"eval_loss": 1.903662085533142, |
|
"eval_runtime": 47.6911, |
|
"eval_samples_per_second": 861.838, |
|
"eval_steps_per_second": 53.867, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"eval_loss": 1.904166579246521, |
|
"eval_runtime": 51.6224, |
|
"eval_samples_per_second": 796.204, |
|
"eval_steps_per_second": 49.765, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.0418, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"eval_loss": 1.896562933921814, |
|
"eval_runtime": 47.6826, |
|
"eval_samples_per_second": 861.992, |
|
"eval_steps_per_second": 53.877, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.39, |
|
"eval_loss": 1.9186903238296509, |
|
"eval_runtime": 49.1139, |
|
"eval_samples_per_second": 836.872, |
|
"eval_steps_per_second": 52.307, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.0416, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 1.9097530841827393, |
|
"eval_runtime": 48.6689, |
|
"eval_samples_per_second": 844.523, |
|
"eval_steps_per_second": 52.785, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"eval_loss": 1.9153447151184082, |
|
"eval_runtime": 49.0115, |
|
"eval_samples_per_second": 838.62, |
|
"eval_steps_per_second": 52.416, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.0396, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"eval_loss": 1.9163771867752075, |
|
"eval_runtime": 48.0083, |
|
"eval_samples_per_second": 856.144, |
|
"eval_steps_per_second": 53.512, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"eval_loss": 1.886674404144287, |
|
"eval_runtime": 48.4626, |
|
"eval_samples_per_second": 848.117, |
|
"eval_steps_per_second": 53.01, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.0397, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"eval_loss": 1.8968819379806519, |
|
"eval_runtime": 48.9216, |
|
"eval_samples_per_second": 840.161, |
|
"eval_steps_per_second": 52.513, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"eval_loss": 1.915518879890442, |
|
"eval_runtime": 46.9702, |
|
"eval_samples_per_second": 875.066, |
|
"eval_steps_per_second": 54.694, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.0442, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"eval_loss": 1.9004348516464233, |
|
"eval_runtime": 47.3437, |
|
"eval_samples_per_second": 868.163, |
|
"eval_steps_per_second": 54.263, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 27.7, |
|
"eval_loss": 1.9026644229888916, |
|
"eval_runtime": 47.3596, |
|
"eval_samples_per_second": 867.87, |
|
"eval_steps_per_second": 54.245, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.0332, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"eval_loss": 1.9095487594604492, |
|
"eval_runtime": 48.6586, |
|
"eval_samples_per_second": 844.702, |
|
"eval_steps_per_second": 52.796, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"eval_loss": 1.9133695363998413, |
|
"eval_runtime": 46.4591, |
|
"eval_samples_per_second": 884.693, |
|
"eval_steps_per_second": 55.296, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.0398, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"eval_loss": 1.9082748889923096, |
|
"eval_runtime": 48.6075, |
|
"eval_samples_per_second": 845.589, |
|
"eval_steps_per_second": 52.852, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"eval_loss": 1.9041306972503662, |
|
"eval_runtime": 47.5314, |
|
"eval_samples_per_second": 864.733, |
|
"eval_steps_per_second": 54.048, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.0387, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"eval_loss": 1.8979713916778564, |
|
"eval_runtime": 47.145, |
|
"eval_samples_per_second": 871.821, |
|
"eval_steps_per_second": 54.491, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 28.68, |
|
"eval_loss": 1.9209250211715698, |
|
"eval_runtime": 48.0076, |
|
"eval_samples_per_second": 856.156, |
|
"eval_steps_per_second": 53.512, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.0378, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"eval_loss": 1.896246314048767, |
|
"eval_runtime": 47.2348, |
|
"eval_samples_per_second": 870.163, |
|
"eval_steps_per_second": 54.388, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_loss": 1.8981308937072754, |
|
"eval_runtime": 46.2446, |
|
"eval_samples_per_second": 888.796, |
|
"eval_steps_per_second": 55.552, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.0359, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"eval_loss": 1.9077860116958618, |
|
"eval_runtime": 47.7404, |
|
"eval_samples_per_second": 860.949, |
|
"eval_steps_per_second": 53.812, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.34, |
|
"eval_loss": 1.8961683511734009, |
|
"eval_runtime": 48.4809, |
|
"eval_samples_per_second": 847.798, |
|
"eval_steps_per_second": 52.99, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.0357, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"eval_loss": 1.8842545747756958, |
|
"eval_runtime": 46.6243, |
|
"eval_samples_per_second": 881.558, |
|
"eval_steps_per_second": 55.1, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"eval_loss": 1.9157154560089111, |
|
"eval_runtime": 47.0518, |
|
"eval_samples_per_second": 873.548, |
|
"eval_steps_per_second": 54.599, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.0367, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"eval_loss": 1.9277722835540771, |
|
"eval_runtime": 48.4635, |
|
"eval_samples_per_second": 848.102, |
|
"eval_steps_per_second": 53.009, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_loss": 1.900920033454895, |
|
"eval_runtime": 48.4837, |
|
"eval_samples_per_second": 847.75, |
|
"eval_steps_per_second": 52.987, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.0442, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"eval_loss": 1.8968621492385864, |
|
"eval_runtime": 47.5534, |
|
"eval_samples_per_second": 864.334, |
|
"eval_steps_per_second": 54.024, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"eval_loss": 1.908553123474121, |
|
"eval_runtime": 49.61, |
|
"eval_samples_per_second": 828.502, |
|
"eval_steps_per_second": 51.784, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.0401, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"eval_loss": 1.9059042930603027, |
|
"eval_runtime": 47.0678, |
|
"eval_samples_per_second": 873.252, |
|
"eval_steps_per_second": 54.581, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"eval_loss": 1.8996624946594238, |
|
"eval_runtime": 46.6534, |
|
"eval_samples_per_second": 881.007, |
|
"eval_steps_per_second": 55.066, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.0293, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"eval_loss": 1.9013988971710205, |
|
"eval_runtime": 48.769, |
|
"eval_samples_per_second": 842.79, |
|
"eval_steps_per_second": 52.677, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_loss": 1.8971112966537476, |
|
"eval_runtime": 46.4191, |
|
"eval_samples_per_second": 885.455, |
|
"eval_steps_per_second": 55.344, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.035, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"eval_loss": 1.911407470703125, |
|
"eval_runtime": 48.2406, |
|
"eval_samples_per_second": 852.021, |
|
"eval_steps_per_second": 53.254, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 31.31, |
|
"eval_loss": 1.9107669591903687, |
|
"eval_runtime": 48.5327, |
|
"eval_samples_per_second": 846.893, |
|
"eval_steps_per_second": 52.933, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.0389, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"eval_loss": 1.897054672241211, |
|
"eval_runtime": 47.3694, |
|
"eval_samples_per_second": 867.69, |
|
"eval_steps_per_second": 54.233, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 31.63, |
|
"eval_loss": 1.9081885814666748, |
|
"eval_runtime": 47.7941, |
|
"eval_samples_per_second": 859.98, |
|
"eval_steps_per_second": 53.751, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.0346, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"eval_loss": 1.9208694696426392, |
|
"eval_runtime": 47.5276, |
|
"eval_samples_per_second": 864.802, |
|
"eval_steps_per_second": 54.053, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 31.96, |
|
"eval_loss": 1.9018235206604004, |
|
"eval_runtime": 48.2326, |
|
"eval_samples_per_second": 852.162, |
|
"eval_steps_per_second": 53.263, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.0428, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"eval_loss": 1.8987597227096558, |
|
"eval_runtime": 48.8575, |
|
"eval_samples_per_second": 841.263, |
|
"eval_steps_per_second": 52.581, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 32.29, |
|
"eval_loss": 1.9089306592941284, |
|
"eval_runtime": 48.142, |
|
"eval_samples_per_second": 853.766, |
|
"eval_steps_per_second": 53.363, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.0286, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"eval_loss": 1.8982652425765991, |
|
"eval_runtime": 47.0839, |
|
"eval_samples_per_second": 872.953, |
|
"eval_steps_per_second": 54.562, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 32.62, |
|
"eval_loss": 1.9029755592346191, |
|
"eval_runtime": 47.4849, |
|
"eval_samples_per_second": 865.581, |
|
"eval_steps_per_second": 54.101, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 32.78, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.037, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 32.78, |
|
"eval_loss": 1.9041838645935059, |
|
"eval_runtime": 47.6583, |
|
"eval_samples_per_second": 862.431, |
|
"eval_steps_per_second": 53.905, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"eval_loss": 1.9021574258804321, |
|
"eval_runtime": 47.636, |
|
"eval_samples_per_second": 862.836, |
|
"eval_steps_per_second": 53.93, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 33.11, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.0348, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 33.11, |
|
"eval_loss": 1.8987573385238647, |
|
"eval_runtime": 48.3458, |
|
"eval_samples_per_second": 850.167, |
|
"eval_steps_per_second": 53.138, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 33.27, |
|
"eval_loss": 1.9159427881240845, |
|
"eval_runtime": 48.9467, |
|
"eval_samples_per_second": 839.73, |
|
"eval_steps_per_second": 52.486, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.042, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"eval_loss": 1.8933523893356323, |
|
"eval_runtime": 48.1661, |
|
"eval_samples_per_second": 853.339, |
|
"eval_steps_per_second": 53.336, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"eval_loss": 1.890829086303711, |
|
"eval_runtime": 47.8234, |
|
"eval_samples_per_second": 859.454, |
|
"eval_steps_per_second": 53.719, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.0426, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"eval_loss": 1.88780677318573, |
|
"eval_runtime": 48.4427, |
|
"eval_samples_per_second": 848.466, |
|
"eval_steps_per_second": 53.032, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 33.93, |
|
"eval_loss": 1.8881806135177612, |
|
"eval_runtime": 48.0347, |
|
"eval_samples_per_second": 855.674, |
|
"eval_steps_per_second": 53.482, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.0293, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"eval_loss": 1.9031046628952026, |
|
"eval_runtime": 48.0682, |
|
"eval_samples_per_second": 855.077, |
|
"eval_steps_per_second": 53.445, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 34.26, |
|
"eval_loss": 1.9005811214447021, |
|
"eval_runtime": 48.7309, |
|
"eval_samples_per_second": 843.448, |
|
"eval_steps_per_second": 52.718, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.0401, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"eval_loss": 1.9066253900527954, |
|
"eval_runtime": 52.1229, |
|
"eval_samples_per_second": 788.56, |
|
"eval_steps_per_second": 49.287, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"eval_loss": 1.8970048427581787, |
|
"eval_runtime": 48.6499, |
|
"eval_samples_per_second": 844.853, |
|
"eval_steps_per_second": 52.806, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.0315, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"eval_loss": 1.895332932472229, |
|
"eval_runtime": 47.3522, |
|
"eval_samples_per_second": 868.006, |
|
"eval_steps_per_second": 54.253, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 1.9029524326324463, |
|
"eval_runtime": 48.1027, |
|
"eval_samples_per_second": 854.464, |
|
"eval_steps_per_second": 53.407, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.0393, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"eval_loss": 1.908921480178833, |
|
"eval_runtime": 49.5777, |
|
"eval_samples_per_second": 829.043, |
|
"eval_steps_per_second": 51.818, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"eval_loss": 1.9037113189697266, |
|
"eval_runtime": 46.6983, |
|
"eval_samples_per_second": 880.16, |
|
"eval_steps_per_second": 55.013, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.0422, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"eval_loss": 1.8991347551345825, |
|
"eval_runtime": 50.2373, |
|
"eval_samples_per_second": 818.157, |
|
"eval_steps_per_second": 51.137, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"eval_loss": 1.8825979232788086, |
|
"eval_runtime": 48.4051, |
|
"eval_samples_per_second": 849.125, |
|
"eval_steps_per_second": 53.073, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.0307, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"eval_loss": 1.9027307033538818, |
|
"eval_runtime": 48.1077, |
|
"eval_samples_per_second": 854.375, |
|
"eval_steps_per_second": 53.401, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"eval_loss": 1.9172565937042236, |
|
"eval_runtime": 52.2685, |
|
"eval_samples_per_second": 786.363, |
|
"eval_steps_per_second": 49.15, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.0414, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"eval_loss": 1.9065029621124268, |
|
"eval_runtime": 46.9419, |
|
"eval_samples_per_second": 875.593, |
|
"eval_steps_per_second": 54.727, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 36.22, |
|
"eval_loss": 1.8984663486480713, |
|
"eval_runtime": 46.6191, |
|
"eval_samples_per_second": 881.656, |
|
"eval_steps_per_second": 55.106, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.0393, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"eval_loss": 1.884873628616333, |
|
"eval_runtime": 48.406, |
|
"eval_samples_per_second": 849.11, |
|
"eval_steps_per_second": 53.072, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_loss": 1.8944772481918335, |
|
"eval_runtime": 47.82, |
|
"eval_samples_per_second": 859.514, |
|
"eval_steps_per_second": 53.722, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.0302, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"eval_loss": 1.8962441682815552, |
|
"eval_runtime": 47.8175, |
|
"eval_samples_per_second": 859.56, |
|
"eval_steps_per_second": 53.725, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 36.88, |
|
"eval_loss": 1.901509165763855, |
|
"eval_runtime": 48.8399, |
|
"eval_samples_per_second": 841.566, |
|
"eval_steps_per_second": 52.6, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.0344, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"eval_loss": 1.8923826217651367, |
|
"eval_runtime": 48.2438, |
|
"eval_samples_per_second": 851.964, |
|
"eval_steps_per_second": 53.25, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"eval_loss": 1.8804473876953125, |
|
"eval_runtime": 46.9418, |
|
"eval_samples_per_second": 875.595, |
|
"eval_steps_per_second": 54.727, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.0303, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"eval_loss": 1.894727349281311, |
|
"eval_runtime": 49.8927, |
|
"eval_samples_per_second": 823.808, |
|
"eval_steps_per_second": 51.491, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 37.53, |
|
"eval_loss": 1.891680121421814, |
|
"eval_runtime": 47.928, |
|
"eval_samples_per_second": 857.579, |
|
"eval_steps_per_second": 53.601, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.0318, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"eval_loss": 1.8992102146148682, |
|
"eval_runtime": 46.9913, |
|
"eval_samples_per_second": 874.673, |
|
"eval_steps_per_second": 54.67, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 37.86, |
|
"eval_loss": 1.901772379875183, |
|
"eval_runtime": 48.5091, |
|
"eval_samples_per_second": 847.306, |
|
"eval_steps_per_second": 52.959, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.039, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"eval_loss": 1.8903098106384277, |
|
"eval_runtime": 49.839, |
|
"eval_samples_per_second": 824.695, |
|
"eval_steps_per_second": 51.546, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 38.19, |
|
"eval_loss": 1.9088859558105469, |
|
"eval_runtime": 49.3732, |
|
"eval_samples_per_second": 832.477, |
|
"eval_steps_per_second": 52.032, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.0319, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"eval_loss": 1.9092743396759033, |
|
"eval_runtime": 47.4891, |
|
"eval_samples_per_second": 865.505, |
|
"eval_steps_per_second": 54.097, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 38.52, |
|
"eval_loss": 1.895071268081665, |
|
"eval_runtime": 50.6752, |
|
"eval_samples_per_second": 811.087, |
|
"eval_steps_per_second": 50.695, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.0359, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 1.8970472812652588, |
|
"eval_runtime": 47.92, |
|
"eval_samples_per_second": 857.721, |
|
"eval_steps_per_second": 53.61, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 38.85, |
|
"eval_loss": 1.89948308467865, |
|
"eval_runtime": 49.1024, |
|
"eval_samples_per_second": 837.067, |
|
"eval_steps_per_second": 52.319, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.0353, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"eval_loss": 1.8887970447540283, |
|
"eval_runtime": 49.8221, |
|
"eval_samples_per_second": 824.975, |
|
"eval_steps_per_second": 51.563, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 39.17, |
|
"eval_loss": 1.914867877960205, |
|
"eval_runtime": 47.1415, |
|
"eval_samples_per_second": 871.885, |
|
"eval_steps_per_second": 54.495, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.0343, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"eval_loss": 1.8881230354309082, |
|
"eval_runtime": 48.2273, |
|
"eval_samples_per_second": 852.256, |
|
"eval_steps_per_second": 53.269, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"eval_loss": 1.8935189247131348, |
|
"eval_runtime": 49.8823, |
|
"eval_samples_per_second": 823.979, |
|
"eval_steps_per_second": 51.501, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.0395, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"eval_loss": 1.8938074111938477, |
|
"eval_runtime": 49.2266, |
|
"eval_samples_per_second": 834.956, |
|
"eval_steps_per_second": 52.187, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 39.83, |
|
"eval_loss": 1.8929249048233032, |
|
"eval_runtime": 47.6917, |
|
"eval_samples_per_second": 861.827, |
|
"eval_steps_per_second": 53.867, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.0316, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_loss": 1.9186030626296997, |
|
"eval_runtime": 49.8739, |
|
"eval_samples_per_second": 824.118, |
|
"eval_steps_per_second": 51.51, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 40.16, |
|
"eval_loss": 1.9189401865005493, |
|
"eval_runtime": 50.0489, |
|
"eval_samples_per_second": 821.238, |
|
"eval_steps_per_second": 51.33, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.0302, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"eval_loss": 1.9124609231948853, |
|
"eval_runtime": 52.3286, |
|
"eval_samples_per_second": 785.459, |
|
"eval_steps_per_second": 49.094, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"eval_loss": 1.9077569246292114, |
|
"eval_runtime": 48.6653, |
|
"eval_samples_per_second": 844.585, |
|
"eval_steps_per_second": 52.789, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.0355, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"eval_loss": 1.8975083827972412, |
|
"eval_runtime": 48.5464, |
|
"eval_samples_per_second": 846.654, |
|
"eval_steps_per_second": 52.918, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 40.81, |
|
"eval_loss": 1.892892837524414, |
|
"eval_runtime": 49.649, |
|
"eval_samples_per_second": 827.852, |
|
"eval_steps_per_second": 51.743, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.0332, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"eval_loss": 1.8898950815200806, |
|
"eval_runtime": 48.8626, |
|
"eval_samples_per_second": 841.175, |
|
"eval_steps_per_second": 52.576, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"eval_loss": 1.9043201208114624, |
|
"eval_runtime": 49.3883, |
|
"eval_samples_per_second": 832.221, |
|
"eval_steps_per_second": 52.016, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.0327, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"eval_loss": 1.9086461067199707, |
|
"eval_runtime": 48.958, |
|
"eval_samples_per_second": 839.535, |
|
"eval_steps_per_second": 52.474, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 41.47, |
|
"eval_loss": 1.894409418106079, |
|
"eval_runtime": 50.95, |
|
"eval_samples_per_second": 806.713, |
|
"eval_steps_per_second": 50.422, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.0414, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"eval_loss": 1.9029189348220825, |
|
"eval_runtime": 48.6407, |
|
"eval_samples_per_second": 845.013, |
|
"eval_steps_per_second": 52.816, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"eval_loss": 1.8990041017532349, |
|
"eval_runtime": 48.9601, |
|
"eval_samples_per_second": 839.5, |
|
"eval_steps_per_second": 52.471, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.0327, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"eval_loss": 1.9174869060516357, |
|
"eval_runtime": 47.4446, |
|
"eval_samples_per_second": 866.315, |
|
"eval_steps_per_second": 54.147, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 42.12, |
|
"eval_loss": 1.88877534866333, |
|
"eval_runtime": 48.6939, |
|
"eval_samples_per_second": 844.09, |
|
"eval_steps_per_second": 52.758, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.0428, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"eval_loss": 1.8971478939056396, |
|
"eval_runtime": 47.9266, |
|
"eval_samples_per_second": 857.603, |
|
"eval_steps_per_second": 53.603, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 1.90713369846344, |
|
"eval_runtime": 49.0676, |
|
"eval_samples_per_second": 837.661, |
|
"eval_steps_per_second": 52.356, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.0337, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"eval_loss": 1.8967806100845337, |
|
"eval_runtime": 50.5362, |
|
"eval_samples_per_second": 813.318, |
|
"eval_steps_per_second": 50.835, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 42.78, |
|
"eval_loss": 1.9059593677520752, |
|
"eval_runtime": 48.8076, |
|
"eval_samples_per_second": 842.122, |
|
"eval_steps_per_second": 52.635, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.0394, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"eval_loss": 1.902763843536377, |
|
"eval_runtime": 48.599, |
|
"eval_samples_per_second": 845.737, |
|
"eval_steps_per_second": 52.861, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"eval_loss": 1.8916599750518799, |
|
"eval_runtime": 50.6067, |
|
"eval_samples_per_second": 812.185, |
|
"eval_steps_per_second": 50.764, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.0314, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"eval_loss": 1.8868290185928345, |
|
"eval_runtime": 50.3637, |
|
"eval_samples_per_second": 816.103, |
|
"eval_steps_per_second": 51.009, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"eval_loss": 1.9086658954620361, |
|
"eval_runtime": 47.3253, |
|
"eval_samples_per_second": 868.499, |
|
"eval_steps_per_second": 54.284, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.0368, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"eval_loss": 1.8951292037963867, |
|
"eval_runtime": 48.361, |
|
"eval_samples_per_second": 849.9, |
|
"eval_steps_per_second": 53.121, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 43.76, |
|
"eval_loss": 1.893837809562683, |
|
"eval_runtime": 47.3135, |
|
"eval_samples_per_second": 868.717, |
|
"eval_steps_per_second": 54.297, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.0298, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"eval_loss": 1.8874099254608154, |
|
"eval_runtime": 47.3514, |
|
"eval_samples_per_second": 868.022, |
|
"eval_steps_per_second": 54.254, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 44.09, |
|
"eval_loss": 1.90646493434906, |
|
"eval_runtime": 49.5581, |
|
"eval_samples_per_second": 829.37, |
|
"eval_steps_per_second": 51.838, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.0353, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"eval_loss": 1.9097111225128174, |
|
"eval_runtime": 46.9959, |
|
"eval_samples_per_second": 874.587, |
|
"eval_steps_per_second": 54.664, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 44.42, |
|
"eval_loss": 1.8985484838485718, |
|
"eval_runtime": 48.6536, |
|
"eval_samples_per_second": 844.788, |
|
"eval_steps_per_second": 52.802, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.0324, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"eval_loss": 1.9160007238388062, |
|
"eval_runtime": 49.8464, |
|
"eval_samples_per_second": 824.574, |
|
"eval_steps_per_second": 51.538, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 44.75, |
|
"eval_loss": 1.9059538841247559, |
|
"eval_runtime": 49.9186, |
|
"eval_samples_per_second": 823.38, |
|
"eval_steps_per_second": 51.464, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.0316, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"eval_loss": 1.8911948204040527, |
|
"eval_runtime": 47.2346, |
|
"eval_samples_per_second": 870.167, |
|
"eval_steps_per_second": 54.388, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 45.07, |
|
"eval_loss": 1.9013826847076416, |
|
"eval_runtime": 47.9929, |
|
"eval_samples_per_second": 856.419, |
|
"eval_steps_per_second": 53.529, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.0322, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"eval_loss": 1.903083324432373, |
|
"eval_runtime": 47.6274, |
|
"eval_samples_per_second": 862.991, |
|
"eval_steps_per_second": 53.94, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"eval_loss": 1.9086343050003052, |
|
"eval_runtime": 47.838, |
|
"eval_samples_per_second": 859.191, |
|
"eval_steps_per_second": 53.702, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.035, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"eval_loss": 1.9147051572799683, |
|
"eval_runtime": 47.9056, |
|
"eval_samples_per_second": 857.978, |
|
"eval_steps_per_second": 53.626, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 45.73, |
|
"eval_loss": 1.900397777557373, |
|
"eval_runtime": 48.7638, |
|
"eval_samples_per_second": 842.879, |
|
"eval_steps_per_second": 52.682, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.0431, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"eval_loss": 1.9027053117752075, |
|
"eval_runtime": 47.5248, |
|
"eval_samples_per_second": 864.854, |
|
"eval_steps_per_second": 54.056, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 46.06, |
|
"eval_loss": 1.891597032546997, |
|
"eval_runtime": 48.2801, |
|
"eval_samples_per_second": 851.323, |
|
"eval_steps_per_second": 53.21, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.0347, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"eval_loss": 1.9048635959625244, |
|
"eval_runtime": 48.7746, |
|
"eval_samples_per_second": 842.693, |
|
"eval_steps_per_second": 52.671, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"eval_loss": 1.8972582817077637, |
|
"eval_runtime": 48.3007, |
|
"eval_samples_per_second": 850.96, |
|
"eval_steps_per_second": 53.188, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.0353, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"eval_loss": 1.8931912183761597, |
|
"eval_runtime": 49.2437, |
|
"eval_samples_per_second": 834.665, |
|
"eval_steps_per_second": 52.169, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 46.71, |
|
"eval_loss": 1.912061333656311, |
|
"eval_runtime": 48.3695, |
|
"eval_samples_per_second": 849.75, |
|
"eval_steps_per_second": 53.112, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.0309, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"eval_loss": 1.8972575664520264, |
|
"eval_runtime": 48.673, |
|
"eval_samples_per_second": 844.452, |
|
"eval_steps_per_second": 52.781, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"eval_loss": 1.8985596895217896, |
|
"eval_runtime": 50.0454, |
|
"eval_samples_per_second": 821.294, |
|
"eval_steps_per_second": 51.333, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.0359, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"eval_loss": 1.9202120304107666, |
|
"eval_runtime": 49.0939, |
|
"eval_samples_per_second": 837.212, |
|
"eval_steps_per_second": 52.328, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"eval_loss": 1.8778069019317627, |
|
"eval_runtime": 48.2597, |
|
"eval_samples_per_second": 851.683, |
|
"eval_steps_per_second": 53.233, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.037, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"eval_loss": 1.9136707782745361, |
|
"eval_runtime": 48.2191, |
|
"eval_samples_per_second": 852.401, |
|
"eval_steps_per_second": 53.278, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 47.7, |
|
"eval_loss": 1.9095231294631958, |
|
"eval_runtime": 49.9332, |
|
"eval_samples_per_second": 823.14, |
|
"eval_steps_per_second": 51.449, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 47.86, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.0306, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 47.86, |
|
"eval_loss": 1.903990626335144, |
|
"eval_runtime": 47.1892, |
|
"eval_samples_per_second": 871.005, |
|
"eval_steps_per_second": 54.44, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_loss": 1.8931529521942139, |
|
"eval_runtime": 47.2904, |
|
"eval_samples_per_second": 869.141, |
|
"eval_steps_per_second": 54.324, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 48.19, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.0342, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 48.19, |
|
"eval_loss": 1.9151569604873657, |
|
"eval_runtime": 50.819, |
|
"eval_samples_per_second": 808.791, |
|
"eval_steps_per_second": 50.552, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 48.35, |
|
"eval_loss": 1.9058945178985596, |
|
"eval_runtime": 47.9047, |
|
"eval_samples_per_second": 857.995, |
|
"eval_steps_per_second": 53.627, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 48.52, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.0457, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 48.52, |
|
"eval_loss": 1.8844729661941528, |
|
"eval_runtime": 47.9335, |
|
"eval_samples_per_second": 857.479, |
|
"eval_steps_per_second": 53.595, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 48.68, |
|
"eval_loss": 1.903971791267395, |
|
"eval_runtime": 49.7448, |
|
"eval_samples_per_second": 826.257, |
|
"eval_steps_per_second": 51.644, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.0349, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"eval_loss": 1.9000164270401, |
|
"eval_runtime": 48.2699, |
|
"eval_samples_per_second": 851.504, |
|
"eval_steps_per_second": 53.222, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_loss": 1.905893087387085, |
|
"eval_runtime": 48.9997, |
|
"eval_samples_per_second": 838.822, |
|
"eval_steps_per_second": 52.429, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"learning_rate": 0.0, |
|
"loss": 2.0322, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"eval_loss": 1.8925877809524536, |
|
"eval_runtime": 49.4417, |
|
"eval_samples_per_second": 831.323, |
|
"eval_steps_per_second": 51.96, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"step": 2400000, |
|
"total_flos": 7.48474678802484e+17, |
|
"train_loss": 2.0531104736328123, |
|
"train_runtime": 167230.1572, |
|
"train_samples_per_second": 229.624, |
|
"train_steps_per_second": 14.351 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 50, |
|
"save_steps": 32000, |
|
"total_flos": 7.48474678802484e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|