|
{ |
|
"best_metric": 1.8842545747756958, |
|
"best_model_checkpoint": "./model_tweets_2020_Q1_90_2/checkpoint-1440000", |
|
"epoch": 49.171259398881354, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.2375540733337402, |
|
"eval_runtime": 44.4512, |
|
"eval_samples_per_second": 924.655, |
|
"eval_steps_per_second": 57.794, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.4394, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.155681610107422, |
|
"eval_runtime": 44.6165, |
|
"eval_samples_per_second": 921.229, |
|
"eval_steps_per_second": 57.58, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.0965232849121094, |
|
"eval_runtime": 44.4096, |
|
"eval_samples_per_second": 925.521, |
|
"eval_steps_per_second": 57.848, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.2403, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.063685894012451, |
|
"eval_runtime": 44.7424, |
|
"eval_samples_per_second": 918.637, |
|
"eval_steps_per_second": 57.418, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 2.062025308609009, |
|
"eval_runtime": 45.5911, |
|
"eval_samples_per_second": 901.535, |
|
"eval_steps_per_second": 56.349, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.1859, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.0426833629608154, |
|
"eval_runtime": 44.8823, |
|
"eval_samples_per_second": 915.773, |
|
"eval_steps_per_second": 57.239, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.044032335281372, |
|
"eval_runtime": 45.1969, |
|
"eval_samples_per_second": 909.398, |
|
"eval_steps_per_second": 56.84, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.1472, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.017690420150757, |
|
"eval_runtime": 44.7902, |
|
"eval_samples_per_second": 917.655, |
|
"eval_steps_per_second": 57.356, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 1.9980425834655762, |
|
"eval_runtime": 45.9383, |
|
"eval_samples_per_second": 894.721, |
|
"eval_steps_per_second": 55.923, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.1334, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.0021114349365234, |
|
"eval_runtime": 45.2262, |
|
"eval_samples_per_second": 908.81, |
|
"eval_steps_per_second": 56.803, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 1.9963111877441406, |
|
"eval_runtime": 45.3678, |
|
"eval_samples_per_second": 905.973, |
|
"eval_steps_per_second": 56.626, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.1271, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.9917570352554321, |
|
"eval_runtime": 45.3467, |
|
"eval_samples_per_second": 906.395, |
|
"eval_steps_per_second": 56.652, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 1.988930344581604, |
|
"eval_runtime": 44.5535, |
|
"eval_samples_per_second": 922.532, |
|
"eval_steps_per_second": 57.661, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.1065, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 1.9688993692398071, |
|
"eval_runtime": 44.9183, |
|
"eval_samples_per_second": 915.039, |
|
"eval_steps_per_second": 57.193, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.991935133934021, |
|
"eval_runtime": 44.922, |
|
"eval_samples_per_second": 914.964, |
|
"eval_steps_per_second": 57.188, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.105, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.9705941677093506, |
|
"eval_runtime": 45.3654, |
|
"eval_samples_per_second": 906.021, |
|
"eval_steps_per_second": 56.629, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 1.9724736213684082, |
|
"eval_runtime": 44.6248, |
|
"eval_samples_per_second": 921.057, |
|
"eval_steps_per_second": 57.569, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.1033, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 2.000924825668335, |
|
"eval_runtime": 44.6809, |
|
"eval_samples_per_second": 919.901, |
|
"eval_steps_per_second": 57.497, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 1.9660991430282593, |
|
"eval_runtime": 45.3271, |
|
"eval_samples_per_second": 906.786, |
|
"eval_steps_per_second": 56.677, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.0934, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 1.964065432548523, |
|
"eval_runtime": 45.3105, |
|
"eval_samples_per_second": 907.118, |
|
"eval_steps_per_second": 56.698, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 1.973306655883789, |
|
"eval_runtime": 45.0686, |
|
"eval_samples_per_second": 911.988, |
|
"eval_steps_per_second": 57.002, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.0899, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 1.9746649265289307, |
|
"eval_runtime": 45.3568, |
|
"eval_samples_per_second": 906.193, |
|
"eval_steps_per_second": 56.64, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 1.9442167282104492, |
|
"eval_runtime": 44.7788, |
|
"eval_samples_per_second": 917.889, |
|
"eval_steps_per_second": 57.371, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.0903, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 1.9585589170455933, |
|
"eval_runtime": 44.7775, |
|
"eval_samples_per_second": 917.915, |
|
"eval_steps_per_second": 57.373, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 1.9585614204406738, |
|
"eval_runtime": 44.6141, |
|
"eval_samples_per_second": 921.278, |
|
"eval_steps_per_second": 57.583, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.0842, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 1.9401671886444092, |
|
"eval_runtime": 45.384, |
|
"eval_samples_per_second": 905.65, |
|
"eval_steps_per_second": 56.606, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 1.9483397006988525, |
|
"eval_runtime": 44.7805, |
|
"eval_samples_per_second": 917.855, |
|
"eval_steps_per_second": 57.369, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.0761, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 1.9532350301742554, |
|
"eval_runtime": 44.8654, |
|
"eval_samples_per_second": 916.117, |
|
"eval_steps_per_second": 57.26, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 1.945618748664856, |
|
"eval_runtime": 46.2969, |
|
"eval_samples_per_second": 887.792, |
|
"eval_steps_per_second": 55.49, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.0799, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 1.9322354793548584, |
|
"eval_runtime": 44.8701, |
|
"eval_samples_per_second": 916.023, |
|
"eval_steps_per_second": 57.254, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 1.9460111856460571, |
|
"eval_runtime": 46.0976, |
|
"eval_samples_per_second": 891.629, |
|
"eval_steps_per_second": 55.73, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.0704, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 1.9477649927139282, |
|
"eval_runtime": 46.6633, |
|
"eval_samples_per_second": 880.821, |
|
"eval_steps_per_second": 55.054, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 1.943530559539795, |
|
"eval_runtime": 44.6432, |
|
"eval_samples_per_second": 920.678, |
|
"eval_steps_per_second": 57.545, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.0727, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 1.9356465339660645, |
|
"eval_runtime": 46.1985, |
|
"eval_samples_per_second": 889.682, |
|
"eval_steps_per_second": 55.608, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 1.9543371200561523, |
|
"eval_runtime": 45.3342, |
|
"eval_samples_per_second": 906.645, |
|
"eval_steps_per_second": 56.668, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.073, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_loss": 1.9541795253753662, |
|
"eval_runtime": 45.0744, |
|
"eval_samples_per_second": 911.871, |
|
"eval_steps_per_second": 56.995, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 1.9503259658813477, |
|
"eval_runtime": 45.4487, |
|
"eval_samples_per_second": 904.359, |
|
"eval_steps_per_second": 56.525, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.0647, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 1.9437284469604492, |
|
"eval_runtime": 45.8852, |
|
"eval_samples_per_second": 895.757, |
|
"eval_steps_per_second": 55.988, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 1.9450260400772095, |
|
"eval_runtime": 45.357, |
|
"eval_samples_per_second": 906.188, |
|
"eval_steps_per_second": 56.64, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.0668, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 1.9220808744430542, |
|
"eval_runtime": 45.2729, |
|
"eval_samples_per_second": 907.873, |
|
"eval_steps_per_second": 56.745, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 1.927699089050293, |
|
"eval_runtime": 45.6224, |
|
"eval_samples_per_second": 900.917, |
|
"eval_steps_per_second": 56.31, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.0695, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_loss": 1.935697317123413, |
|
"eval_runtime": 45.5283, |
|
"eval_samples_per_second": 902.779, |
|
"eval_steps_per_second": 56.426, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 1.924439549446106, |
|
"eval_runtime": 44.8085, |
|
"eval_samples_per_second": 917.282, |
|
"eval_steps_per_second": 57.333, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.0652, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 1.938655138015747, |
|
"eval_runtime": 45.4835, |
|
"eval_samples_per_second": 903.669, |
|
"eval_steps_per_second": 56.482, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 1.9354636669158936, |
|
"eval_runtime": 44.716, |
|
"eval_samples_per_second": 919.178, |
|
"eval_steps_per_second": 57.451, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.0607, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"eval_loss": 1.9389930963516235, |
|
"eval_runtime": 44.6813, |
|
"eval_samples_per_second": 919.893, |
|
"eval_steps_per_second": 57.496, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 1.9358594417572021, |
|
"eval_runtime": 45.4077, |
|
"eval_samples_per_second": 905.177, |
|
"eval_steps_per_second": 56.576, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.0707, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_loss": 1.9393250942230225, |
|
"eval_runtime": 45.4398, |
|
"eval_samples_per_second": 904.537, |
|
"eval_steps_per_second": 56.536, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 1.9217476844787598, |
|
"eval_runtime": 46.1561, |
|
"eval_samples_per_second": 890.5, |
|
"eval_steps_per_second": 55.659, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.0621, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 1.9284311532974243, |
|
"eval_runtime": 46.4848, |
|
"eval_samples_per_second": 884.203, |
|
"eval_steps_per_second": 55.265, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 1.9383472204208374, |
|
"eval_runtime": 46.1371, |
|
"eval_samples_per_second": 890.867, |
|
"eval_steps_per_second": 55.682, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.0643, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 1.922420859336853, |
|
"eval_runtime": 45.1714, |
|
"eval_samples_per_second": 909.911, |
|
"eval_steps_per_second": 56.872, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 1.928429365158081, |
|
"eval_runtime": 45.6103, |
|
"eval_samples_per_second": 901.157, |
|
"eval_steps_per_second": 56.325, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.0498, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 1.9399133920669556, |
|
"eval_runtime": 45.3159, |
|
"eval_samples_per_second": 907.01, |
|
"eval_steps_per_second": 56.691, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_loss": 1.9427006244659424, |
|
"eval_runtime": 45.0449, |
|
"eval_samples_per_second": 912.468, |
|
"eval_steps_per_second": 57.032, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.0689, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"eval_loss": 1.9351186752319336, |
|
"eval_runtime": 46.3442, |
|
"eval_samples_per_second": 886.886, |
|
"eval_steps_per_second": 55.433, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 1.9312766790390015, |
|
"eval_runtime": 45.0453, |
|
"eval_samples_per_second": 912.46, |
|
"eval_steps_per_second": 57.032, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.0511, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_loss": 1.9392344951629639, |
|
"eval_runtime": 45.0174, |
|
"eval_samples_per_second": 913.025, |
|
"eval_steps_per_second": 57.067, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 1.9278026819229126, |
|
"eval_runtime": 44.8506, |
|
"eval_samples_per_second": 916.42, |
|
"eval_steps_per_second": 57.279, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.0664, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_loss": 1.9384809732437134, |
|
"eval_runtime": 45.34, |
|
"eval_samples_per_second": 906.529, |
|
"eval_steps_per_second": 56.661, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.9471704959869385, |
|
"eval_runtime": 44.7273, |
|
"eval_samples_per_second": 918.947, |
|
"eval_steps_per_second": 57.437, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.0565, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"eval_loss": 1.937743902206421, |
|
"eval_runtime": 45.3104, |
|
"eval_samples_per_second": 907.121, |
|
"eval_steps_per_second": 56.698, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"eval_loss": 1.9480650424957275, |
|
"eval_runtime": 45.4075, |
|
"eval_samples_per_second": 905.182, |
|
"eval_steps_per_second": 56.577, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.0566, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"eval_loss": 1.9453697204589844, |
|
"eval_runtime": 44.898, |
|
"eval_samples_per_second": 915.452, |
|
"eval_steps_per_second": 57.219, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"eval_loss": 1.9244284629821777, |
|
"eval_runtime": 45.3186, |
|
"eval_samples_per_second": 906.957, |
|
"eval_steps_per_second": 56.688, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.0523, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_loss": 1.935764193534851, |
|
"eval_runtime": 44.869, |
|
"eval_samples_per_second": 916.045, |
|
"eval_steps_per_second": 57.256, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 1.9176356792449951, |
|
"eval_runtime": 45.328, |
|
"eval_samples_per_second": 906.768, |
|
"eval_steps_per_second": 56.676, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.0554, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"eval_loss": 1.9284305572509766, |
|
"eval_runtime": 45.5519, |
|
"eval_samples_per_second": 902.311, |
|
"eval_steps_per_second": 56.397, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"eval_loss": 1.9287078380584717, |
|
"eval_runtime": 44.7567, |
|
"eval_samples_per_second": 918.343, |
|
"eval_steps_per_second": 57.399, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.0485, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"eval_loss": 1.923736810684204, |
|
"eval_runtime": 44.866, |
|
"eval_samples_per_second": 916.107, |
|
"eval_steps_per_second": 57.259, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"eval_loss": 1.920929193496704, |
|
"eval_runtime": 44.798, |
|
"eval_samples_per_second": 917.497, |
|
"eval_steps_per_second": 57.346, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.0485, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_loss": 1.9261997938156128, |
|
"eval_runtime": 44.83, |
|
"eval_samples_per_second": 916.842, |
|
"eval_steps_per_second": 57.305, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"eval_loss": 1.9207805395126343, |
|
"eval_runtime": 44.7975, |
|
"eval_samples_per_second": 917.507, |
|
"eval_steps_per_second": 57.347, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.0542, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 1.9320107698440552, |
|
"eval_runtime": 44.9881, |
|
"eval_samples_per_second": 913.619, |
|
"eval_steps_per_second": 57.104, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"eval_loss": 1.9076824188232422, |
|
"eval_runtime": 44.823, |
|
"eval_samples_per_second": 916.985, |
|
"eval_steps_per_second": 57.314, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.0527, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 1.9248530864715576, |
|
"eval_runtime": 45.2012, |
|
"eval_samples_per_second": 909.312, |
|
"eval_steps_per_second": 56.835, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"eval_loss": 1.919195532798767, |
|
"eval_runtime": 44.95, |
|
"eval_samples_per_second": 914.393, |
|
"eval_steps_per_second": 57.152, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.0606, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"eval_loss": 1.9151537418365479, |
|
"eval_runtime": 44.9136, |
|
"eval_samples_per_second": 915.135, |
|
"eval_steps_per_second": 57.199, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_loss": 1.9194060564041138, |
|
"eval_runtime": 45.7535, |
|
"eval_samples_per_second": 898.337, |
|
"eval_steps_per_second": 56.149, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.0542, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 1.919755458831787, |
|
"eval_runtime": 45.4065, |
|
"eval_samples_per_second": 905.201, |
|
"eval_steps_per_second": 56.578, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"eval_loss": 1.9134645462036133, |
|
"eval_runtime": 45.523, |
|
"eval_samples_per_second": 902.884, |
|
"eval_steps_per_second": 56.433, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.0593, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"eval_loss": 1.919248342514038, |
|
"eval_runtime": 45.5047, |
|
"eval_samples_per_second": 903.248, |
|
"eval_steps_per_second": 56.456, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_loss": 1.9256792068481445, |
|
"eval_runtime": 45.5171, |
|
"eval_samples_per_second": 903.002, |
|
"eval_steps_per_second": 56.44, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.0467, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"eval_loss": 1.9134962558746338, |
|
"eval_runtime": 44.9903, |
|
"eval_samples_per_second": 913.574, |
|
"eval_steps_per_second": 57.101, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"eval_loss": 1.8994532823562622, |
|
"eval_runtime": 45.5632, |
|
"eval_samples_per_second": 902.087, |
|
"eval_steps_per_second": 56.383, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.0535, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 1.9305521249771118, |
|
"eval_runtime": 45.4839, |
|
"eval_samples_per_second": 903.66, |
|
"eval_steps_per_second": 56.481, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"eval_loss": 1.927839994430542, |
|
"eval_runtime": 44.9073, |
|
"eval_samples_per_second": 915.264, |
|
"eval_steps_per_second": 57.207, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.0559, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 1.9136629104614258, |
|
"eval_runtime": 45.1604, |
|
"eval_samples_per_second": 910.133, |
|
"eval_steps_per_second": 56.886, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"eval_loss": 1.9164540767669678, |
|
"eval_runtime": 44.9974, |
|
"eval_samples_per_second": 913.431, |
|
"eval_steps_per_second": 57.092, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.0544, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"eval_loss": 1.921919345855713, |
|
"eval_runtime": 45.8685, |
|
"eval_samples_per_second": 896.084, |
|
"eval_steps_per_second": 56.008, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 1.9200448989868164, |
|
"eval_runtime": 45.5436, |
|
"eval_samples_per_second": 902.476, |
|
"eval_steps_per_second": 56.407, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.0493, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"eval_loss": 1.924161434173584, |
|
"eval_runtime": 44.985, |
|
"eval_samples_per_second": 913.683, |
|
"eval_steps_per_second": 57.108, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"eval_loss": 1.9263921976089478, |
|
"eval_runtime": 44.8608, |
|
"eval_samples_per_second": 916.211, |
|
"eval_steps_per_second": 57.266, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.0538, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"eval_loss": 1.9333122968673706, |
|
"eval_runtime": 44.8991, |
|
"eval_samples_per_second": 915.431, |
|
"eval_steps_per_second": 57.217, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"eval_loss": 1.912643551826477, |
|
"eval_runtime": 46.091, |
|
"eval_samples_per_second": 891.758, |
|
"eval_steps_per_second": 55.738, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.0457, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"eval_loss": 1.9022926092147827, |
|
"eval_runtime": 45.2957, |
|
"eval_samples_per_second": 907.416, |
|
"eval_steps_per_second": 56.716, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"eval_loss": 1.9153952598571777, |
|
"eval_runtime": 44.9942, |
|
"eval_samples_per_second": 913.495, |
|
"eval_steps_per_second": 57.096, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.0436, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"eval_loss": 1.894668698310852, |
|
"eval_runtime": 45.1573, |
|
"eval_samples_per_second": 910.196, |
|
"eval_steps_per_second": 56.89, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"eval_loss": 1.9197973012924194, |
|
"eval_runtime": 45.5499, |
|
"eval_samples_per_second": 902.351, |
|
"eval_steps_per_second": 56.4, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.0527, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"eval_loss": 1.912539005279541, |
|
"eval_runtime": 45.0868, |
|
"eval_samples_per_second": 911.619, |
|
"eval_steps_per_second": 56.979, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"eval_loss": 1.9129201173782349, |
|
"eval_runtime": 45.0169, |
|
"eval_samples_per_second": 913.036, |
|
"eval_steps_per_second": 57.068, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.0484, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"eval_loss": 1.930238127708435, |
|
"eval_runtime": 45.0627, |
|
"eval_samples_per_second": 912.107, |
|
"eval_steps_per_second": 57.009, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"eval_loss": 1.915164589881897, |
|
"eval_runtime": 44.8156, |
|
"eval_samples_per_second": 917.137, |
|
"eval_steps_per_second": 57.324, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.0535, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_loss": 1.9222667217254639, |
|
"eval_runtime": 45.4436, |
|
"eval_samples_per_second": 904.463, |
|
"eval_steps_per_second": 56.532, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"eval_loss": 1.9195363521575928, |
|
"eval_runtime": 45.1269, |
|
"eval_samples_per_second": 910.809, |
|
"eval_steps_per_second": 56.928, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.0516, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"eval_loss": 1.9071532487869263, |
|
"eval_runtime": 45.5538, |
|
"eval_samples_per_second": 902.274, |
|
"eval_steps_per_second": 56.395, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"eval_loss": 1.9210063219070435, |
|
"eval_runtime": 45.0127, |
|
"eval_samples_per_second": 913.12, |
|
"eval_steps_per_second": 57.073, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.0546, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_loss": 1.9014463424682617, |
|
"eval_runtime": 45.0217, |
|
"eval_samples_per_second": 912.937, |
|
"eval_steps_per_second": 57.061, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_loss": 1.9129884243011475, |
|
"eval_runtime": 45.0333, |
|
"eval_samples_per_second": 912.702, |
|
"eval_steps_per_second": 57.047, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.0439, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_loss": 1.9064290523529053, |
|
"eval_runtime": 45.5491, |
|
"eval_samples_per_second": 902.367, |
|
"eval_steps_per_second": 56.401, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"eval_loss": 1.9166321754455566, |
|
"eval_runtime": 45.3148, |
|
"eval_samples_per_second": 907.032, |
|
"eval_steps_per_second": 56.692, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.0391, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"eval_loss": 1.930993676185608, |
|
"eval_runtime": 45.1221, |
|
"eval_samples_per_second": 910.905, |
|
"eval_steps_per_second": 56.934, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_loss": 1.9064345359802246, |
|
"eval_runtime": 44.9731, |
|
"eval_samples_per_second": 913.923, |
|
"eval_steps_per_second": 57.123, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.0568, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 1.9106640815734863, |
|
"eval_runtime": 45.255, |
|
"eval_samples_per_second": 908.23, |
|
"eval_steps_per_second": 56.767, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"eval_loss": 1.931686520576477, |
|
"eval_runtime": 44.9728, |
|
"eval_samples_per_second": 913.931, |
|
"eval_steps_per_second": 57.123, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.047, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_loss": 1.9234881401062012, |
|
"eval_runtime": 46.2033, |
|
"eval_samples_per_second": 889.591, |
|
"eval_steps_per_second": 55.602, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"eval_loss": 1.917264699935913, |
|
"eval_runtime": 45.8963, |
|
"eval_samples_per_second": 895.541, |
|
"eval_steps_per_second": 55.974, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.0431, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"eval_loss": 1.8946489095687866, |
|
"eval_runtime": 45.1842, |
|
"eval_samples_per_second": 909.654, |
|
"eval_steps_per_second": 56.856, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"eval_loss": 1.9247366189956665, |
|
"eval_runtime": 45.5385, |
|
"eval_samples_per_second": 902.578, |
|
"eval_steps_per_second": 56.414, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.0444, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"eval_loss": 1.9105546474456787, |
|
"eval_runtime": 45.0193, |
|
"eval_samples_per_second": 912.987, |
|
"eval_steps_per_second": 57.064, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"eval_loss": 1.9022789001464844, |
|
"eval_runtime": 45.0679, |
|
"eval_samples_per_second": 912.002, |
|
"eval_steps_per_second": 57.003, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.0465, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.9196343421936035, |
|
"eval_runtime": 44.9372, |
|
"eval_samples_per_second": 914.655, |
|
"eval_steps_per_second": 57.169, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"eval_loss": 1.9111627340316772, |
|
"eval_runtime": 45.8537, |
|
"eval_samples_per_second": 896.372, |
|
"eval_steps_per_second": 56.026, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.0454, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"eval_loss": 1.9045841693878174, |
|
"eval_runtime": 45.0217, |
|
"eval_samples_per_second": 912.937, |
|
"eval_steps_per_second": 57.061, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"eval_loss": 1.9031916856765747, |
|
"eval_runtime": 44.8617, |
|
"eval_samples_per_second": 916.194, |
|
"eval_steps_per_second": 57.265, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.04, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"eval_loss": 1.9129735231399536, |
|
"eval_runtime": 45.7328, |
|
"eval_samples_per_second": 898.742, |
|
"eval_steps_per_second": 56.174, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"eval_loss": 1.922319769859314, |
|
"eval_runtime": 45.7137, |
|
"eval_samples_per_second": 899.117, |
|
"eval_steps_per_second": 56.198, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.0406, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"eval_loss": 1.9261207580566406, |
|
"eval_runtime": 45.2159, |
|
"eval_samples_per_second": 909.017, |
|
"eval_steps_per_second": 56.816, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"eval_loss": 1.9014304876327515, |
|
"eval_runtime": 45.6968, |
|
"eval_samples_per_second": 899.451, |
|
"eval_steps_per_second": 56.218, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.0401, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"eval_loss": 1.905246615409851, |
|
"eval_runtime": 45.6693, |
|
"eval_samples_per_second": 899.993, |
|
"eval_steps_per_second": 56.252, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"eval_loss": 1.9004502296447754, |
|
"eval_runtime": 45.6183, |
|
"eval_samples_per_second": 900.998, |
|
"eval_steps_per_second": 56.315, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.044, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"eval_loss": 1.9044198989868164, |
|
"eval_runtime": 45.8662, |
|
"eval_samples_per_second": 896.128, |
|
"eval_steps_per_second": 56.011, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"eval_loss": 1.9169893264770508, |
|
"eval_runtime": 45.213, |
|
"eval_samples_per_second": 909.076, |
|
"eval_steps_per_second": 56.82, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.0401, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"eval_loss": 1.9103343486785889, |
|
"eval_runtime": 45.7608, |
|
"eval_samples_per_second": 898.192, |
|
"eval_steps_per_second": 56.14, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"eval_loss": 1.8970900774002075, |
|
"eval_runtime": 45.1522, |
|
"eval_samples_per_second": 910.298, |
|
"eval_steps_per_second": 56.896, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.0458, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"eval_loss": 1.9256750345230103, |
|
"eval_runtime": 45.4505, |
|
"eval_samples_per_second": 904.324, |
|
"eval_steps_per_second": 56.523, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.45, |
|
"eval_loss": 1.9029202461242676, |
|
"eval_runtime": 45.9975, |
|
"eval_samples_per_second": 893.57, |
|
"eval_steps_per_second": 55.851, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.0414, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"eval_loss": 1.915024995803833, |
|
"eval_runtime": 45.2783, |
|
"eval_samples_per_second": 907.764, |
|
"eval_steps_per_second": 56.738, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"eval_loss": 1.9123761653900146, |
|
"eval_runtime": 45.0133, |
|
"eval_samples_per_second": 913.107, |
|
"eval_steps_per_second": 57.072, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.0419, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"eval_loss": 1.9029767513275146, |
|
"eval_runtime": 45.0706, |
|
"eval_samples_per_second": 911.947, |
|
"eval_steps_per_second": 56.999, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"eval_loss": 1.9144717454910278, |
|
"eval_runtime": 45.5951, |
|
"eval_samples_per_second": 901.457, |
|
"eval_steps_per_second": 56.344, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.0415, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"eval_loss": 1.9132155179977417, |
|
"eval_runtime": 44.6846, |
|
"eval_samples_per_second": 919.824, |
|
"eval_steps_per_second": 57.492, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"eval_loss": 1.9053661823272705, |
|
"eval_runtime": 45.6412, |
|
"eval_samples_per_second": 900.547, |
|
"eval_steps_per_second": 56.287, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.0394, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"eval_loss": 1.915486216545105, |
|
"eval_runtime": 46.1174, |
|
"eval_samples_per_second": 891.246, |
|
"eval_steps_per_second": 55.706, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"eval_loss": 1.9147080183029175, |
|
"eval_runtime": 44.9401, |
|
"eval_samples_per_second": 914.595, |
|
"eval_steps_per_second": 57.165, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.0414, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"eval_loss": 1.9130446910858154, |
|
"eval_runtime": 44.5878, |
|
"eval_samples_per_second": 921.822, |
|
"eval_steps_per_second": 57.617, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 24.09, |
|
"eval_loss": 1.9002370834350586, |
|
"eval_runtime": 45.5775, |
|
"eval_samples_per_second": 901.805, |
|
"eval_steps_per_second": 56.366, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.036, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"eval_loss": 1.899147391319275, |
|
"eval_runtime": 45.1106, |
|
"eval_samples_per_second": 911.138, |
|
"eval_steps_per_second": 56.949, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"eval_loss": 1.9202589988708496, |
|
"eval_runtime": 44.9471, |
|
"eval_samples_per_second": 914.452, |
|
"eval_steps_per_second": 57.156, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.0393, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"eval_loss": 1.9327338933944702, |
|
"eval_runtime": 45.225, |
|
"eval_samples_per_second": 908.834, |
|
"eval_steps_per_second": 56.805, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"eval_loss": 1.9098608493804932, |
|
"eval_runtime": 44.9604, |
|
"eval_samples_per_second": 914.182, |
|
"eval_steps_per_second": 57.139, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.0375, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"eval_loss": 1.9097236394882202, |
|
"eval_runtime": 45.2358, |
|
"eval_samples_per_second": 908.617, |
|
"eval_steps_per_second": 56.791, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"eval_loss": 1.900553584098816, |
|
"eval_runtime": 45.0389, |
|
"eval_samples_per_second": 912.589, |
|
"eval_steps_per_second": 57.04, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.0384, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"eval_loss": 1.9062752723693848, |
|
"eval_runtime": 45.0968, |
|
"eval_samples_per_second": 911.418, |
|
"eval_steps_per_second": 56.966, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.41, |
|
"eval_loss": 1.9056702852249146, |
|
"eval_runtime": 44.7493, |
|
"eval_samples_per_second": 918.496, |
|
"eval_steps_per_second": 57.409, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.0392, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"eval_loss": 1.9037301540374756, |
|
"eval_runtime": 45.476, |
|
"eval_samples_per_second": 903.817, |
|
"eval_steps_per_second": 56.491, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 25.73, |
|
"eval_loss": 1.9013148546218872, |
|
"eval_runtime": 45.2771, |
|
"eval_samples_per_second": 907.789, |
|
"eval_steps_per_second": 56.74, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.0503, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 25.9, |
|
"eval_loss": 1.903662085533142, |
|
"eval_runtime": 45.1825, |
|
"eval_samples_per_second": 909.689, |
|
"eval_steps_per_second": 56.858, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"eval_loss": 1.904166579246521, |
|
"eval_runtime": 44.9558, |
|
"eval_samples_per_second": 914.275, |
|
"eval_steps_per_second": 57.145, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.0418, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"eval_loss": 1.896562933921814, |
|
"eval_runtime": 44.9261, |
|
"eval_samples_per_second": 914.881, |
|
"eval_steps_per_second": 57.183, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.39, |
|
"eval_loss": 1.9186903238296509, |
|
"eval_runtime": 45.6851, |
|
"eval_samples_per_second": 899.681, |
|
"eval_steps_per_second": 56.233, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.0416, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 1.9097530841827393, |
|
"eval_runtime": 45.1674, |
|
"eval_samples_per_second": 909.992, |
|
"eval_steps_per_second": 56.877, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"eval_loss": 1.9153447151184082, |
|
"eval_runtime": 45.0006, |
|
"eval_samples_per_second": 913.366, |
|
"eval_steps_per_second": 57.088, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.0396, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"eval_loss": 1.9163771867752075, |
|
"eval_runtime": 45.1212, |
|
"eval_samples_per_second": 910.924, |
|
"eval_steps_per_second": 56.936, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"eval_loss": 1.886674404144287, |
|
"eval_runtime": 45.5153, |
|
"eval_samples_per_second": 903.038, |
|
"eval_steps_per_second": 56.443, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.0397, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"eval_loss": 1.8968819379806519, |
|
"eval_runtime": 45.2936, |
|
"eval_samples_per_second": 907.458, |
|
"eval_steps_per_second": 56.719, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"eval_loss": 1.915518879890442, |
|
"eval_runtime": 45.0557, |
|
"eval_samples_per_second": 912.249, |
|
"eval_steps_per_second": 57.018, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.0442, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"eval_loss": 1.9004348516464233, |
|
"eval_runtime": 45.9864, |
|
"eval_samples_per_second": 893.785, |
|
"eval_steps_per_second": 55.864, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 27.7, |
|
"eval_loss": 1.9026644229888916, |
|
"eval_runtime": 45.0271, |
|
"eval_samples_per_second": 912.829, |
|
"eval_steps_per_second": 57.055, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.0332, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"eval_loss": 1.9095487594604492, |
|
"eval_runtime": 45.6627, |
|
"eval_samples_per_second": 900.123, |
|
"eval_steps_per_second": 56.26, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"eval_loss": 1.9133695363998413, |
|
"eval_runtime": 45.1776, |
|
"eval_samples_per_second": 909.786, |
|
"eval_steps_per_second": 56.864, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.0398, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"eval_loss": 1.9082748889923096, |
|
"eval_runtime": 45.8093, |
|
"eval_samples_per_second": 897.242, |
|
"eval_steps_per_second": 56.08, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"eval_loss": 1.9041306972503662, |
|
"eval_runtime": 45.2392, |
|
"eval_samples_per_second": 908.549, |
|
"eval_steps_per_second": 56.787, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.0387, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"eval_loss": 1.8979713916778564, |
|
"eval_runtime": 44.9884, |
|
"eval_samples_per_second": 913.613, |
|
"eval_steps_per_second": 57.104, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 28.68, |
|
"eval_loss": 1.9209250211715698, |
|
"eval_runtime": 44.9769, |
|
"eval_samples_per_second": 913.847, |
|
"eval_steps_per_second": 57.118, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.0378, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"eval_loss": 1.896246314048767, |
|
"eval_runtime": 45.6981, |
|
"eval_samples_per_second": 899.425, |
|
"eval_steps_per_second": 56.217, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_loss": 1.8981308937072754, |
|
"eval_runtime": 45.1348, |
|
"eval_samples_per_second": 910.65, |
|
"eval_steps_per_second": 56.918, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.0359, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"eval_loss": 1.9077860116958618, |
|
"eval_runtime": 45.2141, |
|
"eval_samples_per_second": 909.054, |
|
"eval_steps_per_second": 56.819, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.34, |
|
"eval_loss": 1.8961683511734009, |
|
"eval_runtime": 45.0582, |
|
"eval_samples_per_second": 912.197, |
|
"eval_steps_per_second": 57.015, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.0357, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"eval_loss": 1.8842545747756958, |
|
"eval_runtime": 45.1031, |
|
"eval_samples_per_second": 911.291, |
|
"eval_steps_per_second": 56.958, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"eval_loss": 1.9157154560089111, |
|
"eval_runtime": 46.1984, |
|
"eval_samples_per_second": 889.685, |
|
"eval_steps_per_second": 55.608, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.0367, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"eval_loss": 1.9277722835540771, |
|
"eval_runtime": 44.9891, |
|
"eval_samples_per_second": 913.6, |
|
"eval_steps_per_second": 57.103, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_loss": 1.900920033454895, |
|
"eval_runtime": 45.4927, |
|
"eval_samples_per_second": 903.485, |
|
"eval_steps_per_second": 56.471, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.0442, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"eval_loss": 1.8968621492385864, |
|
"eval_runtime": 45.254, |
|
"eval_samples_per_second": 908.251, |
|
"eval_steps_per_second": 56.768, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"eval_loss": 1.908553123474121, |
|
"eval_runtime": 45.9278, |
|
"eval_samples_per_second": 894.927, |
|
"eval_steps_per_second": 55.936, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.0401, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"eval_loss": 1.9059042930603027, |
|
"eval_runtime": 45.2972, |
|
"eval_samples_per_second": 907.384, |
|
"eval_steps_per_second": 56.714, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"eval_loss": 1.8996624946594238, |
|
"eval_runtime": 45.1056, |
|
"eval_samples_per_second": 911.24, |
|
"eval_steps_per_second": 56.955, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.0293, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"eval_loss": 1.9013988971710205, |
|
"eval_runtime": 45.2081, |
|
"eval_samples_per_second": 909.174, |
|
"eval_steps_per_second": 56.826, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"eval_loss": 1.8971112966537476, |
|
"eval_runtime": 45.1495, |
|
"eval_samples_per_second": 910.354, |
|
"eval_steps_per_second": 56.9, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.035, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"eval_loss": 1.911407470703125, |
|
"eval_runtime": 45.2705, |
|
"eval_samples_per_second": 907.92, |
|
"eval_steps_per_second": 56.748, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 31.31, |
|
"eval_loss": 1.9107669591903687, |
|
"eval_runtime": 45.8007, |
|
"eval_samples_per_second": 897.409, |
|
"eval_steps_per_second": 56.091, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.0389, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"eval_loss": 1.897054672241211, |
|
"eval_runtime": 45.5487, |
|
"eval_samples_per_second": 902.375, |
|
"eval_steps_per_second": 56.401, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 31.63, |
|
"eval_loss": 1.9081885814666748, |
|
"eval_runtime": 45.7595, |
|
"eval_samples_per_second": 898.218, |
|
"eval_steps_per_second": 56.141, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.0346, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"eval_loss": 1.9208694696426392, |
|
"eval_runtime": 45.3969, |
|
"eval_samples_per_second": 905.391, |
|
"eval_steps_per_second": 56.59, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 31.96, |
|
"eval_loss": 1.9018235206604004, |
|
"eval_runtime": 45.7851, |
|
"eval_samples_per_second": 897.717, |
|
"eval_steps_per_second": 56.11, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.0428, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"eval_loss": 1.8987597227096558, |
|
"eval_runtime": 45.4714, |
|
"eval_samples_per_second": 903.908, |
|
"eval_steps_per_second": 56.497, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 32.29, |
|
"eval_loss": 1.9089306592941284, |
|
"eval_runtime": 46.3208, |
|
"eval_samples_per_second": 887.333, |
|
"eval_steps_per_second": 55.461, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.0286, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"eval_loss": 1.8982652425765991, |
|
"eval_runtime": 45.8114, |
|
"eval_samples_per_second": 897.2, |
|
"eval_steps_per_second": 56.078, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 32.62, |
|
"eval_loss": 1.9029755592346191, |
|
"eval_runtime": 46.5806, |
|
"eval_samples_per_second": 882.385, |
|
"eval_steps_per_second": 55.152, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 32.78, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.037, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 32.78, |
|
"eval_loss": 1.9041838645935059, |
|
"eval_runtime": 46.254, |
|
"eval_samples_per_second": 888.615, |
|
"eval_steps_per_second": 55.541, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"eval_loss": 1.9021574258804321, |
|
"eval_runtime": 46.3868, |
|
"eval_samples_per_second": 886.07, |
|
"eval_steps_per_second": 55.382, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 33.11, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.0348, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 33.11, |
|
"eval_loss": 1.8987573385238647, |
|
"eval_runtime": 46.3647, |
|
"eval_samples_per_second": 886.494, |
|
"eval_steps_per_second": 55.409, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 33.27, |
|
"eval_loss": 1.9159427881240845, |
|
"eval_runtime": 47.3206, |
|
"eval_samples_per_second": 868.587, |
|
"eval_steps_per_second": 54.289, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.042, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"eval_loss": 1.8933523893356323, |
|
"eval_runtime": 46.2105, |
|
"eval_samples_per_second": 889.452, |
|
"eval_steps_per_second": 55.593, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"eval_loss": 1.890829086303711, |
|
"eval_runtime": 45.7149, |
|
"eval_samples_per_second": 899.094, |
|
"eval_steps_per_second": 56.196, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.0426, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"eval_loss": 1.88780677318573, |
|
"eval_runtime": 46.0333, |
|
"eval_samples_per_second": 892.876, |
|
"eval_steps_per_second": 55.807, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 33.93, |
|
"eval_loss": 1.8881806135177612, |
|
"eval_runtime": 46.0263, |
|
"eval_samples_per_second": 893.012, |
|
"eval_steps_per_second": 55.816, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.0293, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"eval_loss": 1.9031046628952026, |
|
"eval_runtime": 45.8901, |
|
"eval_samples_per_second": 895.662, |
|
"eval_steps_per_second": 55.982, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 34.26, |
|
"eval_loss": 1.9005811214447021, |
|
"eval_runtime": 45.915, |
|
"eval_samples_per_second": 895.177, |
|
"eval_steps_per_second": 55.951, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.0401, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"eval_loss": 1.9066253900527954, |
|
"eval_runtime": 46.1755, |
|
"eval_samples_per_second": 890.126, |
|
"eval_steps_per_second": 55.636, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"eval_loss": 1.8970048427581787, |
|
"eval_runtime": 45.9657, |
|
"eval_samples_per_second": 894.188, |
|
"eval_steps_per_second": 55.889, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.0315, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 34.75, |
|
"eval_loss": 1.895332932472229, |
|
"eval_runtime": 45.6192, |
|
"eval_samples_per_second": 900.98, |
|
"eval_steps_per_second": 56.314, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 1.9029524326324463, |
|
"eval_runtime": 45.1831, |
|
"eval_samples_per_second": 909.676, |
|
"eval_steps_per_second": 56.857, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.0393, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"eval_loss": 1.908921480178833, |
|
"eval_runtime": 45.6994, |
|
"eval_samples_per_second": 899.399, |
|
"eval_steps_per_second": 56.215, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"eval_loss": 1.9037113189697266, |
|
"eval_runtime": 45.2722, |
|
"eval_samples_per_second": 907.887, |
|
"eval_steps_per_second": 56.746, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.0422, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"eval_loss": 1.8991347551345825, |
|
"eval_runtime": 46.062, |
|
"eval_samples_per_second": 892.319, |
|
"eval_steps_per_second": 55.773, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"eval_loss": 1.8825979232788086, |
|
"eval_runtime": 45.3033, |
|
"eval_samples_per_second": 907.262, |
|
"eval_steps_per_second": 56.707, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.0307, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"eval_loss": 1.9027307033538818, |
|
"eval_runtime": 45.5838, |
|
"eval_samples_per_second": 901.681, |
|
"eval_steps_per_second": 56.358, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"eval_loss": 1.9172565937042236, |
|
"eval_runtime": 45.2197, |
|
"eval_samples_per_second": 908.94, |
|
"eval_steps_per_second": 56.811, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.0414, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"eval_loss": 1.9065029621124268, |
|
"eval_runtime": 45.5684, |
|
"eval_samples_per_second": 901.985, |
|
"eval_steps_per_second": 56.377, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 36.22, |
|
"eval_loss": 1.8984663486480713, |
|
"eval_runtime": 45.1302, |
|
"eval_samples_per_second": 910.743, |
|
"eval_steps_per_second": 56.924, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.0393, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"eval_loss": 1.884873628616333, |
|
"eval_runtime": 45.3848, |
|
"eval_samples_per_second": 905.634, |
|
"eval_steps_per_second": 56.605, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_loss": 1.8944772481918335, |
|
"eval_runtime": 45.1483, |
|
"eval_samples_per_second": 910.378, |
|
"eval_steps_per_second": 56.901, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.0302, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"eval_loss": 1.8962441682815552, |
|
"eval_runtime": 46.8165, |
|
"eval_samples_per_second": 877.938, |
|
"eval_steps_per_second": 54.874, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 36.88, |
|
"eval_loss": 1.901509165763855, |
|
"eval_runtime": 46.5098, |
|
"eval_samples_per_second": 883.727, |
|
"eval_steps_per_second": 55.236, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.0344, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"eval_loss": 1.8923826217651367, |
|
"eval_runtime": 47.0536, |
|
"eval_samples_per_second": 873.514, |
|
"eval_steps_per_second": 54.597, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"eval_loss": 1.8804473876953125, |
|
"eval_runtime": 45.649, |
|
"eval_samples_per_second": 900.393, |
|
"eval_steps_per_second": 56.277, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.0303, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"eval_loss": 1.894727349281311, |
|
"eval_runtime": 45.669, |
|
"eval_samples_per_second": 899.998, |
|
"eval_steps_per_second": 56.253, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 37.53, |
|
"eval_loss": 1.891680121421814, |
|
"eval_runtime": 45.9688, |
|
"eval_samples_per_second": 894.128, |
|
"eval_steps_per_second": 55.886, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.0318, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"eval_loss": 1.8992102146148682, |
|
"eval_runtime": 46.1777, |
|
"eval_samples_per_second": 890.083, |
|
"eval_steps_per_second": 55.633, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 37.86, |
|
"eval_loss": 1.901772379875183, |
|
"eval_runtime": 46.2844, |
|
"eval_samples_per_second": 888.031, |
|
"eval_steps_per_second": 55.505, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.039, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"eval_loss": 1.8903098106384277, |
|
"eval_runtime": 47.0109, |
|
"eval_samples_per_second": 874.308, |
|
"eval_steps_per_second": 54.647, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 38.19, |
|
"eval_loss": 1.9088859558105469, |
|
"eval_runtime": 45.4014, |
|
"eval_samples_per_second": 905.303, |
|
"eval_steps_per_second": 56.584, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.0319, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"eval_loss": 1.9092743396759033, |
|
"eval_runtime": 45.713, |
|
"eval_samples_per_second": 899.132, |
|
"eval_steps_per_second": 56.198, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 38.52, |
|
"eval_loss": 1.895071268081665, |
|
"eval_runtime": 46.0527, |
|
"eval_samples_per_second": 892.499, |
|
"eval_steps_per_second": 55.784, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.0359, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 1.8970472812652588, |
|
"eval_runtime": 45.2938, |
|
"eval_samples_per_second": 907.454, |
|
"eval_steps_per_second": 56.719, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 38.85, |
|
"eval_loss": 1.89948308467865, |
|
"eval_runtime": 46.012, |
|
"eval_samples_per_second": 893.288, |
|
"eval_steps_per_second": 55.833, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.0353, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"eval_loss": 1.8887970447540283, |
|
"eval_runtime": 45.8877, |
|
"eval_samples_per_second": 895.709, |
|
"eval_steps_per_second": 55.985, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 39.17, |
|
"eval_loss": 1.914867877960205, |
|
"eval_runtime": 45.5355, |
|
"eval_samples_per_second": 902.636, |
|
"eval_steps_per_second": 56.418, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.0343, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"eval_loss": 1.8881230354309082, |
|
"eval_runtime": 46.2793, |
|
"eval_samples_per_second": 888.13, |
|
"eval_steps_per_second": 55.511, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"eval_loss": 1.8935189247131348, |
|
"eval_runtime": 46.1086, |
|
"eval_samples_per_second": 891.417, |
|
"eval_steps_per_second": 55.716, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.0395, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"eval_loss": 1.8938074111938477, |
|
"eval_runtime": 45.8793, |
|
"eval_samples_per_second": 895.872, |
|
"eval_steps_per_second": 55.995, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 39.83, |
|
"eval_loss": 1.8929249048233032, |
|
"eval_runtime": 46.149, |
|
"eval_samples_per_second": 890.637, |
|
"eval_steps_per_second": 55.668, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.0316, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_loss": 1.9186030626296997, |
|
"eval_runtime": 45.3993, |
|
"eval_samples_per_second": 905.345, |
|
"eval_steps_per_second": 56.587, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 40.16, |
|
"eval_loss": 1.9189401865005493, |
|
"eval_runtime": 46.9208, |
|
"eval_samples_per_second": 875.987, |
|
"eval_steps_per_second": 54.752, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.0302, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"eval_loss": 1.9124609231948853, |
|
"eval_runtime": 46.2172, |
|
"eval_samples_per_second": 889.322, |
|
"eval_steps_per_second": 55.585, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"eval_loss": 1.9077569246292114, |
|
"eval_runtime": 46.4482, |
|
"eval_samples_per_second": 884.901, |
|
"eval_steps_per_second": 55.309, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.0355, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"eval_loss": 1.8975083827972412, |
|
"eval_runtime": 45.4061, |
|
"eval_samples_per_second": 905.208, |
|
"eval_steps_per_second": 56.578, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 40.81, |
|
"eval_loss": 1.892892837524414, |
|
"eval_runtime": 45.6207, |
|
"eval_samples_per_second": 900.951, |
|
"eval_steps_per_second": 56.312, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.0332, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"eval_loss": 1.8898950815200806, |
|
"eval_runtime": 45.4778, |
|
"eval_samples_per_second": 903.782, |
|
"eval_steps_per_second": 56.489, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"eval_loss": 1.9043201208114624, |
|
"eval_runtime": 45.2865, |
|
"eval_samples_per_second": 907.599, |
|
"eval_steps_per_second": 56.728, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.0327, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"eval_loss": 1.9086461067199707, |
|
"eval_runtime": 45.7752, |
|
"eval_samples_per_second": 897.91, |
|
"eval_steps_per_second": 56.122, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 41.47, |
|
"eval_loss": 1.894409418106079, |
|
"eval_runtime": 45.9308, |
|
"eval_samples_per_second": 894.867, |
|
"eval_steps_per_second": 55.932, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.0414, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"eval_loss": 1.9029189348220825, |
|
"eval_runtime": 45.9761, |
|
"eval_samples_per_second": 893.986, |
|
"eval_steps_per_second": 55.877, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"eval_loss": 1.8990041017532349, |
|
"eval_runtime": 46.0838, |
|
"eval_samples_per_second": 891.897, |
|
"eval_steps_per_second": 55.746, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.0327, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"eval_loss": 1.9174869060516357, |
|
"eval_runtime": 46.0447, |
|
"eval_samples_per_second": 892.655, |
|
"eval_steps_per_second": 55.794, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 42.12, |
|
"eval_loss": 1.88877534866333, |
|
"eval_runtime": 45.3492, |
|
"eval_samples_per_second": 906.344, |
|
"eval_steps_per_second": 56.649, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.0428, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"eval_loss": 1.8971478939056396, |
|
"eval_runtime": 46.1829, |
|
"eval_samples_per_second": 889.983, |
|
"eval_steps_per_second": 55.627, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 1.90713369846344, |
|
"eval_runtime": 46.2493, |
|
"eval_samples_per_second": 888.706, |
|
"eval_steps_per_second": 55.547, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.0337, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"eval_loss": 1.8967806100845337, |
|
"eval_runtime": 45.4829, |
|
"eval_samples_per_second": 903.679, |
|
"eval_steps_per_second": 56.483, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 42.78, |
|
"eval_loss": 1.9059593677520752, |
|
"eval_runtime": 45.3661, |
|
"eval_samples_per_second": 906.008, |
|
"eval_steps_per_second": 56.628, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.0394, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"eval_loss": 1.902763843536377, |
|
"eval_runtime": 46.1023, |
|
"eval_samples_per_second": 891.539, |
|
"eval_steps_per_second": 55.724, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"eval_loss": 1.8916599750518799, |
|
"eval_runtime": 46.167, |
|
"eval_samples_per_second": 890.289, |
|
"eval_steps_per_second": 55.646, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.0314, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"eval_loss": 1.8868290185928345, |
|
"eval_runtime": 45.7209, |
|
"eval_samples_per_second": 898.977, |
|
"eval_steps_per_second": 56.189, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"eval_loss": 1.9086658954620361, |
|
"eval_runtime": 46.6687, |
|
"eval_samples_per_second": 880.719, |
|
"eval_steps_per_second": 55.048, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.0368, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"eval_loss": 1.8951292037963867, |
|
"eval_runtime": 45.5475, |
|
"eval_samples_per_second": 902.399, |
|
"eval_steps_per_second": 56.403, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 43.76, |
|
"eval_loss": 1.893837809562683, |
|
"eval_runtime": 46.7129, |
|
"eval_samples_per_second": 879.885, |
|
"eval_steps_per_second": 54.996, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.0298, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"eval_loss": 1.8874099254608154, |
|
"eval_runtime": 46.1171, |
|
"eval_samples_per_second": 891.254, |
|
"eval_steps_per_second": 55.706, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 44.09, |
|
"eval_loss": 1.90646493434906, |
|
"eval_runtime": 45.5199, |
|
"eval_samples_per_second": 902.945, |
|
"eval_steps_per_second": 56.437, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.0353, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"eval_loss": 1.9097111225128174, |
|
"eval_runtime": 46.2608, |
|
"eval_samples_per_second": 888.484, |
|
"eval_steps_per_second": 55.533, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 44.42, |
|
"eval_loss": 1.8985484838485718, |
|
"eval_runtime": 46.7775, |
|
"eval_samples_per_second": 878.67, |
|
"eval_steps_per_second": 54.92, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.0324, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"eval_loss": 1.9160007238388062, |
|
"eval_runtime": 45.5635, |
|
"eval_samples_per_second": 902.082, |
|
"eval_steps_per_second": 56.383, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 44.75, |
|
"eval_loss": 1.9059538841247559, |
|
"eval_runtime": 46.2267, |
|
"eval_samples_per_second": 889.139, |
|
"eval_steps_per_second": 55.574, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.0316, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"eval_loss": 1.8911948204040527, |
|
"eval_runtime": 46.1128, |
|
"eval_samples_per_second": 891.335, |
|
"eval_steps_per_second": 55.711, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 45.07, |
|
"eval_loss": 1.9013826847076416, |
|
"eval_runtime": 46.2728, |
|
"eval_samples_per_second": 888.253, |
|
"eval_steps_per_second": 55.519, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.0322, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"eval_loss": 1.903083324432373, |
|
"eval_runtime": 45.9933, |
|
"eval_samples_per_second": 893.652, |
|
"eval_steps_per_second": 55.856, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"eval_loss": 1.9086343050003052, |
|
"eval_runtime": 46.1275, |
|
"eval_samples_per_second": 891.053, |
|
"eval_steps_per_second": 55.694, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.035, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"eval_loss": 1.9147051572799683, |
|
"eval_runtime": 46.1882, |
|
"eval_samples_per_second": 889.88, |
|
"eval_steps_per_second": 55.62, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 45.73, |
|
"eval_loss": 1.900397777557373, |
|
"eval_runtime": 46.2313, |
|
"eval_samples_per_second": 889.052, |
|
"eval_steps_per_second": 55.568, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.0431, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"eval_loss": 1.9027053117752075, |
|
"eval_runtime": 45.9169, |
|
"eval_samples_per_second": 895.139, |
|
"eval_steps_per_second": 55.949, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 46.06, |
|
"eval_loss": 1.891597032546997, |
|
"eval_runtime": 45.8109, |
|
"eval_samples_per_second": 897.211, |
|
"eval_steps_per_second": 56.078, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.0347, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 46.22, |
|
"eval_loss": 1.9048635959625244, |
|
"eval_runtime": 46.62, |
|
"eval_samples_per_second": 881.64, |
|
"eval_steps_per_second": 55.105, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"eval_loss": 1.8972582817077637, |
|
"eval_runtime": 45.4329, |
|
"eval_samples_per_second": 904.675, |
|
"eval_steps_per_second": 56.545, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.0353, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"eval_loss": 1.8931912183761597, |
|
"eval_runtime": 45.83, |
|
"eval_samples_per_second": 896.836, |
|
"eval_steps_per_second": 56.055, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 46.71, |
|
"eval_loss": 1.912061333656311, |
|
"eval_runtime": 45.6862, |
|
"eval_samples_per_second": 899.659, |
|
"eval_steps_per_second": 56.231, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.0309, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"eval_loss": 1.8972575664520264, |
|
"eval_runtime": 45.7208, |
|
"eval_samples_per_second": 898.978, |
|
"eval_steps_per_second": 56.189, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"eval_loss": 1.8985596895217896, |
|
"eval_runtime": 45.62, |
|
"eval_samples_per_second": 900.965, |
|
"eval_steps_per_second": 56.313, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.0359, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"eval_loss": 1.9202120304107666, |
|
"eval_runtime": 46.1708, |
|
"eval_samples_per_second": 890.216, |
|
"eval_steps_per_second": 55.641, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"eval_loss": 1.8778069019317627, |
|
"eval_runtime": 45.0738, |
|
"eval_samples_per_second": 911.882, |
|
"eval_steps_per_second": 56.995, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.037, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"eval_loss": 1.9136707782745361, |
|
"eval_runtime": 45.7351, |
|
"eval_samples_per_second": 898.698, |
|
"eval_steps_per_second": 56.171, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 47.7, |
|
"eval_loss": 1.9095231294631958, |
|
"eval_runtime": 46.2067, |
|
"eval_samples_per_second": 889.525, |
|
"eval_steps_per_second": 55.598, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 47.86, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.0306, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 47.86, |
|
"eval_loss": 1.903990626335144, |
|
"eval_runtime": 45.7769, |
|
"eval_samples_per_second": 897.877, |
|
"eval_steps_per_second": 56.12, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"eval_loss": 1.8931529521942139, |
|
"eval_runtime": 46.2564, |
|
"eval_samples_per_second": 888.57, |
|
"eval_steps_per_second": 55.538, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 48.19, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.0342, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 48.19, |
|
"eval_loss": 1.9151569604873657, |
|
"eval_runtime": 45.6412, |
|
"eval_samples_per_second": 900.547, |
|
"eval_steps_per_second": 56.287, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 48.35, |
|
"eval_loss": 1.9058945178985596, |
|
"eval_runtime": 46.1965, |
|
"eval_samples_per_second": 889.72, |
|
"eval_steps_per_second": 55.61, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 48.52, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.0457, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 48.52, |
|
"eval_loss": 1.8844729661941528, |
|
"eval_runtime": 46.2206, |
|
"eval_samples_per_second": 889.257, |
|
"eval_steps_per_second": 55.581, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 48.68, |
|
"eval_loss": 1.903971791267395, |
|
"eval_runtime": 45.6349, |
|
"eval_samples_per_second": 900.67, |
|
"eval_steps_per_second": 56.295, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.0349, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"eval_loss": 1.9000164270401, |
|
"eval_runtime": 45.9273, |
|
"eval_samples_per_second": 894.936, |
|
"eval_steps_per_second": 55.936, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_loss": 1.905893087387085, |
|
"eval_runtime": 46.548, |
|
"eval_samples_per_second": 883.003, |
|
"eval_steps_per_second": 55.19, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"learning_rate": 0.0, |
|
"loss": 2.0322, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"eval_loss": 1.8925877809524536, |
|
"eval_runtime": 45.4613, |
|
"eval_samples_per_second": 904.11, |
|
"eval_steps_per_second": 56.51, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"step": 2400000, |
|
"total_flos": 7.48474678802484e+17, |
|
"train_loss": 2.0531104736328123, |
|
"train_runtime": 163423.6248, |
|
"train_samples_per_second": 234.972, |
|
"train_steps_per_second": 14.686 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 50, |
|
"save_steps": 32000, |
|
"total_flos": 7.48474678802484e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|