|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999191919191919, |
|
"global_step": 6187, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.7705, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 2.9903, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.6103124618530273, |
|
"eval_runtime": 21.555, |
|
"eval_samples_per_second": 46.393, |
|
"eval_steps_per_second": 1.485, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 2.7311, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.303687572479248, |
|
"eval_runtime": 21.5668, |
|
"eval_samples_per_second": 46.368, |
|
"eval_steps_per_second": 1.484, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 2.3175, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.1831250190734863, |
|
"eval_runtime": 21.6264, |
|
"eval_samples_per_second": 46.24, |
|
"eval_steps_per_second": 1.48, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-06, |
|
"loss": 2.2645, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.130187511444092, |
|
"eval_runtime": 21.5721, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-06, |
|
"loss": 2.1012, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.102562427520752, |
|
"eval_runtime": 21.5886, |
|
"eval_samples_per_second": 46.321, |
|
"eval_steps_per_second": 1.482, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-06, |
|
"loss": 2.2054, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.0776875019073486, |
|
"eval_runtime": 21.5931, |
|
"eval_samples_per_second": 46.311, |
|
"eval_steps_per_second": 1.482, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-06, |
|
"loss": 2.1123, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.058500051498413, |
|
"eval_runtime": 21.5702, |
|
"eval_samples_per_second": 46.36, |
|
"eval_steps_per_second": 1.484, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0723, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.0454375743865967, |
|
"eval_runtime": 21.601, |
|
"eval_samples_per_second": 46.294, |
|
"eval_steps_per_second": 1.481, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0369, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.036062479019165, |
|
"eval_runtime": 21.5801, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-06, |
|
"loss": 2.1556, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.0283124446868896, |
|
"eval_runtime": 21.578, |
|
"eval_samples_per_second": 46.343, |
|
"eval_steps_per_second": 1.483, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0969, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.0223751068115234, |
|
"eval_runtime": 21.5462, |
|
"eval_samples_per_second": 46.412, |
|
"eval_steps_per_second": 1.485, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0542, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.017312526702881, |
|
"eval_runtime": 21.7681, |
|
"eval_samples_per_second": 45.939, |
|
"eval_steps_per_second": 1.47, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9574, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.012624979019165, |
|
"eval_runtime": 21.6595, |
|
"eval_samples_per_second": 46.169, |
|
"eval_steps_per_second": 1.477, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0246, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.009000062942505, |
|
"eval_runtime": 21.635, |
|
"eval_samples_per_second": 46.221, |
|
"eval_steps_per_second": 1.479, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0172, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.0059375762939453, |
|
"eval_runtime": 21.5668, |
|
"eval_samples_per_second": 46.368, |
|
"eval_steps_per_second": 1.484, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9788, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.002187490463257, |
|
"eval_runtime": 21.5772, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-06, |
|
"loss": 2.001, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9981249570846558, |
|
"eval_runtime": 21.6059, |
|
"eval_samples_per_second": 46.284, |
|
"eval_steps_per_second": 1.481, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9815, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.995437502861023, |
|
"eval_runtime": 21.583, |
|
"eval_samples_per_second": 46.333, |
|
"eval_steps_per_second": 1.483, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0833, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9923125505447388, |
|
"eval_runtime": 21.5969, |
|
"eval_samples_per_second": 46.303, |
|
"eval_steps_per_second": 1.482, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0181, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9904375076293945, |
|
"eval_runtime": 21.5756, |
|
"eval_samples_per_second": 46.349, |
|
"eval_steps_per_second": 1.483, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0131, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9896249771118164, |
|
"eval_runtime": 21.5918, |
|
"eval_samples_per_second": 46.314, |
|
"eval_steps_per_second": 1.482, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9979, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9875625371932983, |
|
"eval_runtime": 21.565, |
|
"eval_samples_per_second": 46.372, |
|
"eval_steps_per_second": 1.484, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0325, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9860625267028809, |
|
"eval_runtime": 21.5888, |
|
"eval_samples_per_second": 46.32, |
|
"eval_steps_per_second": 1.482, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-06, |
|
"loss": 2.105, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.984375, |
|
"eval_runtime": 21.5906, |
|
"eval_samples_per_second": 46.316, |
|
"eval_steps_per_second": 1.482, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0709, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9822499752044678, |
|
"eval_runtime": 21.6129, |
|
"eval_samples_per_second": 46.269, |
|
"eval_steps_per_second": 1.481, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0523, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9815000295639038, |
|
"eval_runtime": 21.5779, |
|
"eval_samples_per_second": 46.344, |
|
"eval_steps_per_second": 1.483, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9995, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9774999618530273, |
|
"eval_runtime": 21.5937, |
|
"eval_samples_per_second": 46.31, |
|
"eval_steps_per_second": 1.482, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8896, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.9772499799728394, |
|
"eval_runtime": 21.6433, |
|
"eval_samples_per_second": 46.204, |
|
"eval_steps_per_second": 1.479, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-06, |
|
"loss": 2.088, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.9761874675750732, |
|
"eval_runtime": 21.6127, |
|
"eval_samples_per_second": 46.269, |
|
"eval_steps_per_second": 1.481, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9491, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.9765000343322754, |
|
"eval_runtime": 21.5915, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0771, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.973562479019165, |
|
"eval_runtime": 21.6552, |
|
"eval_samples_per_second": 46.178, |
|
"eval_steps_per_second": 1.478, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0921, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.9715625047683716, |
|
"eval_runtime": 21.6072, |
|
"eval_samples_per_second": 46.281, |
|
"eval_steps_per_second": 1.481, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-06, |
|
"loss": 1.967, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.9698749780654907, |
|
"eval_runtime": 21.5859, |
|
"eval_samples_per_second": 46.326, |
|
"eval_steps_per_second": 1.482, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9998, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.9673750400543213, |
|
"eval_runtime": 21.5761, |
|
"eval_samples_per_second": 46.348, |
|
"eval_steps_per_second": 1.483, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0577, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.9666249752044678, |
|
"eval_runtime": 21.5981, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0249, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.965000033378601, |
|
"eval_runtime": 21.5716, |
|
"eval_samples_per_second": 46.357, |
|
"eval_steps_per_second": 1.483, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9365, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.9635624885559082, |
|
"eval_runtime": 21.5755, |
|
"eval_samples_per_second": 46.349, |
|
"eval_steps_per_second": 1.483, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0111, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.9628750085830688, |
|
"eval_runtime": 21.5574, |
|
"eval_samples_per_second": 46.388, |
|
"eval_steps_per_second": 1.484, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0041, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.9611250162124634, |
|
"eval_runtime": 21.6082, |
|
"eval_samples_per_second": 46.279, |
|
"eval_steps_per_second": 1.481, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0181, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.9593125581741333, |
|
"eval_runtime": 21.7618, |
|
"eval_samples_per_second": 45.952, |
|
"eval_steps_per_second": 1.47, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0277, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.9578125476837158, |
|
"eval_runtime": 21.5803, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-06, |
|
"loss": 1.973, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.957124948501587, |
|
"eval_runtime": 21.6031, |
|
"eval_samples_per_second": 46.29, |
|
"eval_steps_per_second": 1.481, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9841, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.9556875228881836, |
|
"eval_runtime": 21.74, |
|
"eval_samples_per_second": 45.998, |
|
"eval_steps_per_second": 1.472, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0041, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.9543124437332153, |
|
"eval_runtime": 21.5907, |
|
"eval_samples_per_second": 46.316, |
|
"eval_steps_per_second": 1.482, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8736, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.9544999599456787, |
|
"eval_runtime": 21.6106, |
|
"eval_samples_per_second": 46.274, |
|
"eval_steps_per_second": 1.481, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0252, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.954687476158142, |
|
"eval_runtime": 21.6061, |
|
"eval_samples_per_second": 46.283, |
|
"eval_steps_per_second": 1.481, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0165, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.9551249742507935, |
|
"eval_runtime": 21.6096, |
|
"eval_samples_per_second": 46.276, |
|
"eval_steps_per_second": 1.481, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0753, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.9535000324249268, |
|
"eval_runtime": 21.5781, |
|
"eval_samples_per_second": 46.343, |
|
"eval_steps_per_second": 1.483, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0401, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.9523124694824219, |
|
"eval_runtime": 21.5923, |
|
"eval_samples_per_second": 46.313, |
|
"eval_steps_per_second": 1.482, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9997, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.9514374732971191, |
|
"eval_runtime": 21.5788, |
|
"eval_samples_per_second": 46.342, |
|
"eval_steps_per_second": 1.483, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9983, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.9509999752044678, |
|
"eval_runtime": 21.5723, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0708, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.9498125314712524, |
|
"eval_runtime": 21.6676, |
|
"eval_samples_per_second": 46.152, |
|
"eval_steps_per_second": 1.477, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0041, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.949375033378601, |
|
"eval_runtime": 21.5738, |
|
"eval_samples_per_second": 46.353, |
|
"eval_steps_per_second": 1.483, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9361, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.9479999542236328, |
|
"eval_runtime": 21.6244, |
|
"eval_samples_per_second": 46.244, |
|
"eval_steps_per_second": 1.48, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-06, |
|
"loss": 1.996, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.9463125467300415, |
|
"eval_runtime": 21.5711, |
|
"eval_samples_per_second": 46.358, |
|
"eval_steps_per_second": 1.483, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9434, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.946062445640564, |
|
"eval_runtime": 21.6589, |
|
"eval_samples_per_second": 46.17, |
|
"eval_steps_per_second": 1.477, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9713, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.9454375505447388, |
|
"eval_runtime": 21.5799, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0267, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.9446874856948853, |
|
"eval_runtime": 21.6125, |
|
"eval_samples_per_second": 46.269, |
|
"eval_steps_per_second": 1.481, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9422, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.9424999952316284, |
|
"eval_runtime": 21.5951, |
|
"eval_samples_per_second": 46.307, |
|
"eval_steps_per_second": 1.482, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9464, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.9424375295639038, |
|
"eval_runtime": 21.5707, |
|
"eval_samples_per_second": 46.359, |
|
"eval_steps_per_second": 1.483, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-06, |
|
"loss": 2.028, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.9411250352859497, |
|
"eval_runtime": 21.5905, |
|
"eval_samples_per_second": 46.317, |
|
"eval_steps_per_second": 1.482, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0127, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.941562533378601, |
|
"eval_runtime": 21.5988, |
|
"eval_samples_per_second": 46.299, |
|
"eval_steps_per_second": 1.482, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9577, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.9395625591278076, |
|
"eval_runtime": 21.5636, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9023, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.9394375085830688, |
|
"eval_runtime": 21.5844, |
|
"eval_samples_per_second": 46.33, |
|
"eval_steps_per_second": 1.483, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0571, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.9391249418258667, |
|
"eval_runtime": 21.6088, |
|
"eval_samples_per_second": 46.277, |
|
"eval_steps_per_second": 1.481, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 2.023, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.9401874542236328, |
|
"eval_runtime": 21.5901, |
|
"eval_samples_per_second": 46.318, |
|
"eval_steps_per_second": 1.482, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9559, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.939062476158142, |
|
"eval_runtime": 21.5898, |
|
"eval_samples_per_second": 46.318, |
|
"eval_steps_per_second": 1.482, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9522, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.9381250143051147, |
|
"eval_runtime": 21.6135, |
|
"eval_samples_per_second": 46.267, |
|
"eval_steps_per_second": 1.481, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9785, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.9374375343322754, |
|
"eval_runtime": 21.5705, |
|
"eval_samples_per_second": 46.36, |
|
"eval_steps_per_second": 1.484, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0394, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.9369374513626099, |
|
"eval_runtime": 21.5881, |
|
"eval_samples_per_second": 46.322, |
|
"eval_steps_per_second": 1.482, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0392, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.9351249933242798, |
|
"eval_runtime": 21.5738, |
|
"eval_samples_per_second": 46.353, |
|
"eval_steps_per_second": 1.483, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9352, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.9348125457763672, |
|
"eval_runtime": 21.5633, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9418, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.9336249828338623, |
|
"eval_runtime": 21.66, |
|
"eval_samples_per_second": 46.168, |
|
"eval_steps_per_second": 1.477, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0288, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.9332499504089355, |
|
"eval_runtime": 21.5801, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0209, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.9328750371932983, |
|
"eval_runtime": 21.714, |
|
"eval_samples_per_second": 46.053, |
|
"eval_steps_per_second": 1.474, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0951, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.9322500228881836, |
|
"eval_runtime": 21.5724, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9686, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.9313124418258667, |
|
"eval_runtime": 21.5878, |
|
"eval_samples_per_second": 46.323, |
|
"eval_steps_per_second": 1.482, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1e-06, |
|
"loss": 2.008, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.9303125143051147, |
|
"eval_runtime": 21.5821, |
|
"eval_samples_per_second": 46.335, |
|
"eval_steps_per_second": 1.483, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1e-06, |
|
"loss": 1.998, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.9296875, |
|
"eval_runtime": 21.643, |
|
"eval_samples_per_second": 46.204, |
|
"eval_steps_per_second": 1.479, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9939, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.928125023841858, |
|
"eval_runtime": 21.6015, |
|
"eval_samples_per_second": 46.293, |
|
"eval_steps_per_second": 1.481, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0758, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.9277499914169312, |
|
"eval_runtime": 21.6035, |
|
"eval_samples_per_second": 46.289, |
|
"eval_steps_per_second": 1.481, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0049, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.928375005722046, |
|
"eval_runtime": 21.774, |
|
"eval_samples_per_second": 45.926, |
|
"eval_steps_per_second": 1.47, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9863, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.9279375076293945, |
|
"eval_runtime": 21.6241, |
|
"eval_samples_per_second": 46.245, |
|
"eval_steps_per_second": 1.48, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9947, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.9278124570846558, |
|
"eval_runtime": 21.5978, |
|
"eval_samples_per_second": 46.301, |
|
"eval_steps_per_second": 1.482, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9341, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.9268125295639038, |
|
"eval_runtime": 21.5635, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9241, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.9255625009536743, |
|
"eval_runtime": 21.5833, |
|
"eval_samples_per_second": 46.332, |
|
"eval_steps_per_second": 1.483, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9837, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.925374984741211, |
|
"eval_runtime": 21.745, |
|
"eval_samples_per_second": 45.988, |
|
"eval_steps_per_second": 1.472, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1e-06, |
|
"loss": 1.973, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.9242500066757202, |
|
"eval_runtime": 21.5975, |
|
"eval_samples_per_second": 46.302, |
|
"eval_steps_per_second": 1.482, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9233, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.9240624904632568, |
|
"eval_runtime": 21.6099, |
|
"eval_samples_per_second": 46.275, |
|
"eval_steps_per_second": 1.481, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9619, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.92368745803833, |
|
"eval_runtime": 21.5993, |
|
"eval_samples_per_second": 46.298, |
|
"eval_steps_per_second": 1.482, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9975, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9224375486373901, |
|
"eval_runtime": 21.5918, |
|
"eval_samples_per_second": 46.314, |
|
"eval_steps_per_second": 1.482, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9807, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9214999675750732, |
|
"eval_runtime": 21.5688, |
|
"eval_samples_per_second": 46.363, |
|
"eval_steps_per_second": 1.484, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8974, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.924375057220459, |
|
"eval_runtime": 21.5825, |
|
"eval_samples_per_second": 46.334, |
|
"eval_steps_per_second": 1.483, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0246, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9245624542236328, |
|
"eval_runtime": 21.6472, |
|
"eval_samples_per_second": 46.195, |
|
"eval_steps_per_second": 1.478, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9318, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9228750467300415, |
|
"eval_runtime": 21.6026, |
|
"eval_samples_per_second": 46.291, |
|
"eval_steps_per_second": 1.481, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9721, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.9230624437332153, |
|
"eval_runtime": 21.5765, |
|
"eval_samples_per_second": 46.347, |
|
"eval_steps_per_second": 1.483, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9978, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.9227499961853027, |
|
"eval_runtime": 21.6602, |
|
"eval_samples_per_second": 46.168, |
|
"eval_steps_per_second": 1.477, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8698, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.9231250286102295, |
|
"eval_runtime": 21.684, |
|
"eval_samples_per_second": 46.117, |
|
"eval_steps_per_second": 1.476, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9715, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.9209375381469727, |
|
"eval_runtime": 21.5945, |
|
"eval_samples_per_second": 46.308, |
|
"eval_steps_per_second": 1.482, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9874, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.9190624952316284, |
|
"eval_runtime": 21.5978, |
|
"eval_samples_per_second": 46.301, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0096, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.919374942779541, |
|
"eval_runtime": 21.5737, |
|
"eval_samples_per_second": 46.353, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9226, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.919124960899353, |
|
"eval_runtime": 21.5786, |
|
"eval_samples_per_second": 46.342, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9934, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.919124960899353, |
|
"eval_runtime": 21.5567, |
|
"eval_samples_per_second": 46.389, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9778, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.9184374809265137, |
|
"eval_runtime": 21.5886, |
|
"eval_samples_per_second": 46.321, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9206, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.9199374914169312, |
|
"eval_runtime": 21.684, |
|
"eval_samples_per_second": 46.117, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8988, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.919374942779541, |
|
"eval_runtime": 21.5854, |
|
"eval_samples_per_second": 46.328, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0907, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.9197499752044678, |
|
"eval_runtime": 21.5829, |
|
"eval_samples_per_second": 46.333, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0045, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.9206875562667847, |
|
"eval_runtime": 21.5983, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1e-06, |
|
"loss": 1.938, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.9186874628067017, |
|
"eval_runtime": 21.6418, |
|
"eval_samples_per_second": 46.207, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9877, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.918624997138977, |
|
"eval_runtime": 21.5764, |
|
"eval_samples_per_second": 46.347, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1e-06, |
|
"loss": 1.884, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.9181874990463257, |
|
"eval_runtime": 21.6142, |
|
"eval_samples_per_second": 46.266, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0029, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.9176249504089355, |
|
"eval_runtime": 21.6204, |
|
"eval_samples_per_second": 46.253, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9871, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.9174375534057617, |
|
"eval_runtime": 21.5968, |
|
"eval_samples_per_second": 46.303, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9415, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.917562484741211, |
|
"eval_runtime": 21.5748, |
|
"eval_samples_per_second": 46.35, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0646, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9170000553131104, |
|
"eval_runtime": 21.6665, |
|
"eval_samples_per_second": 46.154, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9221, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9166874885559082, |
|
"eval_runtime": 21.6037, |
|
"eval_samples_per_second": 46.288, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8499, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9155625104904175, |
|
"eval_runtime": 21.5624, |
|
"eval_samples_per_second": 46.377, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0678, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9151250123977661, |
|
"eval_runtime": 21.5683, |
|
"eval_samples_per_second": 46.364, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9067, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9144999980926514, |
|
"eval_runtime": 21.5675, |
|
"eval_samples_per_second": 46.366, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0528, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.914562463760376, |
|
"eval_runtime": 21.5545, |
|
"eval_samples_per_second": 46.394, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0069, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9144375324249268, |
|
"eval_runtime": 21.5614, |
|
"eval_samples_per_second": 46.379, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0498, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9149999618530273, |
|
"eval_runtime": 21.5646, |
|
"eval_samples_per_second": 46.372, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0296, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9141875505447388, |
|
"eval_runtime": 21.5633, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9634, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9131875038146973, |
|
"eval_runtime": 21.5505, |
|
"eval_samples_per_second": 46.403, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0171, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.913562536239624, |
|
"eval_runtime": 21.5632, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9484, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9150625467300415, |
|
"eval_runtime": 21.5801, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0042, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9144375324249268, |
|
"eval_runtime": 21.5746, |
|
"eval_samples_per_second": 46.351, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9661, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9140000343322754, |
|
"eval_runtime": 21.6459, |
|
"eval_samples_per_second": 46.198, |
|
"eval_steps_per_second": 1.478, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8565, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9128750562667847, |
|
"eval_runtime": 21.57, |
|
"eval_samples_per_second": 46.361, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-06, |
|
"loss": 1.972, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9139374494552612, |
|
"eval_runtime": 21.5805, |
|
"eval_samples_per_second": 46.338, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8855, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9141249656677246, |
|
"eval_runtime": 21.6158, |
|
"eval_samples_per_second": 46.262, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-06, |
|
"loss": 1.913, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9146875143051147, |
|
"eval_runtime": 21.5474, |
|
"eval_samples_per_second": 46.409, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0252, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9136874675750732, |
|
"eval_runtime": 21.555, |
|
"eval_samples_per_second": 46.393, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0193, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9125624895095825, |
|
"eval_runtime": 21.5626, |
|
"eval_samples_per_second": 46.377, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8765, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9117499589920044, |
|
"eval_runtime": 21.5826, |
|
"eval_samples_per_second": 46.334, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9179, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9114999771118164, |
|
"eval_runtime": 21.5598, |
|
"eval_samples_per_second": 46.383, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9387, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9118125438690186, |
|
"eval_runtime": 21.6259, |
|
"eval_samples_per_second": 46.241, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9386, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9107500314712524, |
|
"eval_runtime": 21.5698, |
|
"eval_samples_per_second": 46.361, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9395, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9112499952316284, |
|
"eval_runtime": 21.6242, |
|
"eval_samples_per_second": 46.245, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-06, |
|
"loss": 1.883, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.9111875295639038, |
|
"eval_runtime": 21.5742, |
|
"eval_samples_per_second": 46.352, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9956, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.9107500314712524, |
|
"eval_runtime": 21.5887, |
|
"eval_samples_per_second": 46.321, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9536, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.9124375581741333, |
|
"eval_runtime": 21.5963, |
|
"eval_samples_per_second": 46.304, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9391, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.9121249914169312, |
|
"eval_runtime": 21.5911, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8853, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.9098750352859497, |
|
"eval_runtime": 21.5981, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8877, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.9091875553131104, |
|
"eval_runtime": 21.5725, |
|
"eval_samples_per_second": 46.355, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9746, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.909000039100647, |
|
"eval_runtime": 21.6032, |
|
"eval_samples_per_second": 46.29, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0241, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.9088125228881836, |
|
"eval_runtime": 21.6136, |
|
"eval_samples_per_second": 46.267, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9257, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.9088749885559082, |
|
"eval_runtime": 21.572, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0036, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.9089374542236328, |
|
"eval_runtime": 21.5831, |
|
"eval_samples_per_second": 46.333, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9253, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.909500002861023, |
|
"eval_runtime": 21.6823, |
|
"eval_samples_per_second": 46.121, |
|
"eval_steps_per_second": 1.476, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0153, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.9086250066757202, |
|
"eval_runtime": 21.5458, |
|
"eval_samples_per_second": 46.413, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0366, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.9090625047683716, |
|
"eval_runtime": 21.6111, |
|
"eval_samples_per_second": 46.273, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9283, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.9079999923706055, |
|
"eval_runtime": 21.6291, |
|
"eval_samples_per_second": 46.234, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-06, |
|
"loss": 2.023, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.9077500104904175, |
|
"eval_runtime": 21.6052, |
|
"eval_samples_per_second": 46.285, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0456, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.9064375162124634, |
|
"eval_runtime": 21.5712, |
|
"eval_samples_per_second": 46.358, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0027, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.9082499742507935, |
|
"eval_runtime": 21.5748, |
|
"eval_samples_per_second": 46.35, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9597, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.909000039100647, |
|
"eval_runtime": 21.6055, |
|
"eval_samples_per_second": 46.285, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9969, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.9086250066757202, |
|
"eval_runtime": 21.5805, |
|
"eval_samples_per_second": 46.338, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9597, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.907562494277954, |
|
"eval_runtime": 21.6041, |
|
"eval_samples_per_second": 46.288, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9571, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.907812476158142, |
|
"eval_runtime": 21.593, |
|
"eval_samples_per_second": 46.311, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0231, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.9071249961853027, |
|
"eval_runtime": 21.588, |
|
"eval_samples_per_second": 46.322, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9554, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.9047499895095825, |
|
"eval_runtime": 21.5963, |
|
"eval_samples_per_second": 46.304, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9042, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.9048750400543213, |
|
"eval_runtime": 21.601, |
|
"eval_samples_per_second": 46.294, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8837, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.9047499895095825, |
|
"eval_runtime": 21.5744, |
|
"eval_samples_per_second": 46.351, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1e-06, |
|
"loss": 1.931, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.9033750295639038, |
|
"eval_runtime": 21.5833, |
|
"eval_samples_per_second": 46.332, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1e-06, |
|
"loss": 1.891, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.9033124446868896, |
|
"eval_runtime": 21.5683, |
|
"eval_samples_per_second": 46.364, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9157, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.9033750295639038, |
|
"eval_runtime": 21.6387, |
|
"eval_samples_per_second": 46.213, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9626, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.9030624628067017, |
|
"eval_runtime": 21.6073, |
|
"eval_samples_per_second": 46.281, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0237, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.90443754196167, |
|
"eval_runtime": 21.5689, |
|
"eval_samples_per_second": 46.363, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9002, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.9045624732971191, |
|
"eval_runtime": 21.5902, |
|
"eval_samples_per_second": 46.317, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9676, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.9063124656677246, |
|
"eval_runtime": 21.6165, |
|
"eval_samples_per_second": 46.261, |
|
"eval_steps_per_second": 1.48, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0352, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.9059375524520874, |
|
"eval_runtime": 21.575, |
|
"eval_samples_per_second": 46.35, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-06, |
|
"loss": 1.996, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.9054374694824219, |
|
"eval_runtime": 21.5922, |
|
"eval_samples_per_second": 46.313, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9681, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.9049999713897705, |
|
"eval_runtime": 21.5631, |
|
"eval_samples_per_second": 46.376, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9749, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.90443754196167, |
|
"eval_runtime": 21.59, |
|
"eval_samples_per_second": 46.318, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9298, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.9034374952316284, |
|
"eval_runtime": 21.6076, |
|
"eval_samples_per_second": 46.28, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9732, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.9023749828338623, |
|
"eval_runtime": 21.5985, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9114, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.9015624523162842, |
|
"eval_runtime": 21.5773, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9449, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.9018125534057617, |
|
"eval_runtime": 21.5622, |
|
"eval_samples_per_second": 46.377, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9619, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.9030624628067017, |
|
"eval_runtime": 21.6519, |
|
"eval_samples_per_second": 46.185, |
|
"eval_steps_per_second": 1.478, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9091, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.9013124704360962, |
|
"eval_runtime": 21.5954, |
|
"eval_samples_per_second": 46.306, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9262, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.9016250371932983, |
|
"eval_runtime": 21.5732, |
|
"eval_samples_per_second": 46.354, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0112, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.902187466621399, |
|
"eval_runtime": 21.5764, |
|
"eval_samples_per_second": 46.347, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9816, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.9011249542236328, |
|
"eval_runtime": 21.5714, |
|
"eval_samples_per_second": 46.358, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-06, |
|
"loss": 1.861, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.8998124599456787, |
|
"eval_runtime": 21.567, |
|
"eval_samples_per_second": 46.367, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9451, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.8995000123977661, |
|
"eval_runtime": 21.588, |
|
"eval_samples_per_second": 46.322, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0126, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.9004374742507935, |
|
"eval_runtime": 21.5695, |
|
"eval_samples_per_second": 46.362, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8937, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.9018750190734863, |
|
"eval_runtime": 21.5846, |
|
"eval_samples_per_second": 46.329, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9457, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.8994375467300415, |
|
"eval_runtime": 21.5822, |
|
"eval_samples_per_second": 46.335, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1e-06, |
|
"loss": 2.062, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.899749994277954, |
|
"eval_runtime": 21.5866, |
|
"eval_samples_per_second": 46.325, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9497, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.9012500047683716, |
|
"eval_runtime": 21.6617, |
|
"eval_samples_per_second": 46.164, |
|
"eval_steps_per_second": 1.477, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0455, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.9005000591278076, |
|
"eval_runtime": 21.6313, |
|
"eval_samples_per_second": 46.229, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9166, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.8993124961853027, |
|
"eval_runtime": 21.6028, |
|
"eval_samples_per_second": 46.29, |
|
"eval_steps_per_second": 1.481, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9117, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.899749994277954, |
|
"eval_runtime": 21.5853, |
|
"eval_samples_per_second": 46.328, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0035, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8984375, |
|
"eval_runtime": 21.5723, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9852, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8980000019073486, |
|
"eval_runtime": 21.6353, |
|
"eval_samples_per_second": 46.221, |
|
"eval_steps_per_second": 1.479, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 2.034, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8988125324249268, |
|
"eval_runtime": 21.5587, |
|
"eval_samples_per_second": 46.385, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8832, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8982499837875366, |
|
"eval_runtime": 21.5628, |
|
"eval_samples_per_second": 46.376, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 1.956, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8980624675750732, |
|
"eval_runtime": 21.5868, |
|
"eval_samples_per_second": 46.325, |
|
"eval_steps_per_second": 1.482, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8746, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8969374895095825, |
|
"eval_runtime": 21.5821, |
|
"eval_samples_per_second": 46.335, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8892, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.8958749771118164, |
|
"eval_runtime": 21.6083, |
|
"eval_samples_per_second": 46.278, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0095, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.8948750495910645, |
|
"eval_runtime": 21.6201, |
|
"eval_samples_per_second": 46.253, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9422, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.897125005722046, |
|
"eval_runtime": 21.5595, |
|
"eval_samples_per_second": 46.383, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8938, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.8969999551773071, |
|
"eval_runtime": 21.5569, |
|
"eval_samples_per_second": 46.389, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-06, |
|
"loss": 1.874, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.8964999914169312, |
|
"eval_runtime": 21.5507, |
|
"eval_samples_per_second": 46.402, |
|
"eval_steps_per_second": 1.485, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8995, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.8951250314712524, |
|
"eval_runtime": 21.6188, |
|
"eval_samples_per_second": 46.256, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8192, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.895937442779541, |
|
"eval_runtime": 21.5982, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9252, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.8956249952316284, |
|
"eval_runtime": 21.6523, |
|
"eval_samples_per_second": 46.185, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8958, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.896875023841858, |
|
"eval_runtime": 21.6098, |
|
"eval_samples_per_second": 46.275, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9043, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.8940000534057617, |
|
"eval_runtime": 21.5718, |
|
"eval_samples_per_second": 46.357, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9596, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.8939374685287476, |
|
"eval_runtime": 21.6391, |
|
"eval_samples_per_second": 46.213, |
|
"eval_steps_per_second": 1.479, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9218, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.8934375047683716, |
|
"eval_runtime": 21.5925, |
|
"eval_samples_per_second": 46.312, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9027, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.8935625553131104, |
|
"eval_runtime": 21.5668, |
|
"eval_samples_per_second": 46.368, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0138, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.8940000534057617, |
|
"eval_runtime": 21.5794, |
|
"eval_samples_per_second": 46.341, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9878, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.894124984741211, |
|
"eval_runtime": 21.7429, |
|
"eval_samples_per_second": 45.992, |
|
"eval_steps_per_second": 1.472, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0328, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.8918750286102295, |
|
"eval_runtime": 21.5689, |
|
"eval_samples_per_second": 46.363, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1e-06, |
|
"loss": 1.894, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.8916250467300415, |
|
"eval_runtime": 21.6262, |
|
"eval_samples_per_second": 46.24, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1e-06, |
|
"loss": 1.952, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.8934999704360962, |
|
"eval_runtime": 21.5943, |
|
"eval_samples_per_second": 46.309, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9383, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.894124984741211, |
|
"eval_runtime": 21.5954, |
|
"eval_samples_per_second": 46.306, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9761, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.8935625553131104, |
|
"eval_runtime": 21.5892, |
|
"eval_samples_per_second": 46.32, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8925, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.8925625085830688, |
|
"eval_runtime": 21.6011, |
|
"eval_samples_per_second": 46.294, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8558, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.8917499780654907, |
|
"eval_runtime": 21.652, |
|
"eval_samples_per_second": 46.185, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9386, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.8930624723434448, |
|
"eval_runtime": 21.608, |
|
"eval_samples_per_second": 46.279, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9294, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.8916875123977661, |
|
"eval_runtime": 21.5916, |
|
"eval_samples_per_second": 46.314, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-06, |
|
"loss": 1.92, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.8911875486373901, |
|
"eval_runtime": 21.5961, |
|
"eval_samples_per_second": 46.305, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9115, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.8904999494552612, |
|
"eval_runtime": 21.5851, |
|
"eval_samples_per_second": 46.328, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1e-06, |
|
"loss": 1.7885, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.8905625343322754, |
|
"eval_runtime": 21.5921, |
|
"eval_samples_per_second": 46.313, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9907, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.890125036239624, |
|
"eval_runtime": 21.5976, |
|
"eval_samples_per_second": 46.301, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8993, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.8910000324249268, |
|
"eval_runtime": 21.6337, |
|
"eval_samples_per_second": 46.224, |
|
"eval_steps_per_second": 1.479, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9179, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.8908125162124634, |
|
"eval_runtime": 21.5991, |
|
"eval_samples_per_second": 46.298, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8432, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.891124963760376, |
|
"eval_runtime": 21.5977, |
|
"eval_samples_per_second": 46.301, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9284, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.8915624618530273, |
|
"eval_runtime": 21.5848, |
|
"eval_samples_per_second": 46.329, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9587, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.8913124799728394, |
|
"eval_runtime": 21.587, |
|
"eval_samples_per_second": 46.324, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9536, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.8917499780654907, |
|
"eval_runtime": 21.5919, |
|
"eval_samples_per_second": 46.314, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8101, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.8914999961853027, |
|
"eval_runtime": 21.5804, |
|
"eval_samples_per_second": 46.338, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9749, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.8907500505447388, |
|
"eval_runtime": 21.59, |
|
"eval_samples_per_second": 46.318, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9275, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.8913124799728394, |
|
"eval_runtime": 21.6004, |
|
"eval_samples_per_second": 46.295, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9226, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.8911875486373901, |
|
"eval_runtime": 21.5995, |
|
"eval_samples_per_second": 46.297, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9028, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.8923125267028809, |
|
"eval_runtime": 21.656, |
|
"eval_samples_per_second": 46.177, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9238, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.8915624618530273, |
|
"eval_runtime": 21.574, |
|
"eval_samples_per_second": 46.352, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9168, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.8916875123977661, |
|
"eval_runtime": 21.5773, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9032, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.8912500143051147, |
|
"eval_runtime": 21.5933, |
|
"eval_samples_per_second": 46.311, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9834, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.890625, |
|
"eval_runtime": 21.6052, |
|
"eval_samples_per_second": 46.285, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9134, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.8895000219345093, |
|
"eval_runtime": 21.586, |
|
"eval_samples_per_second": 46.326, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9176, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.8895000219345093, |
|
"eval_runtime": 21.7961, |
|
"eval_samples_per_second": 45.88, |
|
"eval_steps_per_second": 1.468, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-06, |
|
"loss": 1.972, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.8908125162124634, |
|
"eval_runtime": 21.7651, |
|
"eval_samples_per_second": 45.945, |
|
"eval_steps_per_second": 1.47, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9347, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.889312505722046, |
|
"eval_runtime": 21.6078, |
|
"eval_samples_per_second": 46.28, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8683, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.8899375200271606, |
|
"eval_runtime": 21.5933, |
|
"eval_samples_per_second": 46.311, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8978, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.8900624513626099, |
|
"eval_runtime": 21.7246, |
|
"eval_samples_per_second": 46.031, |
|
"eval_steps_per_second": 1.473, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9114, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.8910624980926514, |
|
"eval_runtime": 21.5984, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8513, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.8907500505447388, |
|
"eval_runtime": 21.5892, |
|
"eval_samples_per_second": 46.32, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9279, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.8891874551773071, |
|
"eval_runtime": 21.5992, |
|
"eval_samples_per_second": 46.298, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1e-06, |
|
"loss": 1.963, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.8878124952316284, |
|
"eval_runtime": 21.746, |
|
"eval_samples_per_second": 45.986, |
|
"eval_steps_per_second": 1.472, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9512, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.8887499570846558, |
|
"eval_runtime": 21.6357, |
|
"eval_samples_per_second": 46.22, |
|
"eval_steps_per_second": 1.479, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8867, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.8880624771118164, |
|
"eval_runtime": 21.5957, |
|
"eval_samples_per_second": 46.305, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9328, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.886812448501587, |
|
"eval_runtime": 21.6158, |
|
"eval_samples_per_second": 46.263, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8483, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.8883124589920044, |
|
"eval_runtime": 21.596, |
|
"eval_samples_per_second": 46.305, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9146, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.8877500295639038, |
|
"eval_runtime": 21.5948, |
|
"eval_samples_per_second": 46.307, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0254, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.8882499933242798, |
|
"eval_runtime": 21.5935, |
|
"eval_samples_per_second": 46.31, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8772, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.8884375095367432, |
|
"eval_runtime": 21.581, |
|
"eval_samples_per_second": 46.337, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9283, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.8867499828338623, |
|
"eval_runtime": 21.6018, |
|
"eval_samples_per_second": 46.292, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1e-06, |
|
"loss": 2.014, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.8861249685287476, |
|
"eval_runtime": 21.5908, |
|
"eval_samples_per_second": 46.316, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8434, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.8859374523162842, |
|
"eval_runtime": 21.6442, |
|
"eval_samples_per_second": 46.202, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9982, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.8853750228881836, |
|
"eval_runtime": 21.6247, |
|
"eval_samples_per_second": 46.243, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9145, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.885812520980835, |
|
"eval_runtime": 21.6265, |
|
"eval_samples_per_second": 46.24, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8797, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.8846875429153442, |
|
"eval_runtime": 21.5985, |
|
"eval_samples_per_second": 46.299, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9019, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.8836250305175781, |
|
"eval_runtime": 21.6199, |
|
"eval_samples_per_second": 46.254, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9676, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.8840625286102295, |
|
"eval_runtime": 21.6856, |
|
"eval_samples_per_second": 46.113, |
|
"eval_steps_per_second": 1.476, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0023, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.8842500448226929, |
|
"eval_runtime": 21.5877, |
|
"eval_samples_per_second": 46.323, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9213, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.885312557220459, |
|
"eval_runtime": 21.6471, |
|
"eval_samples_per_second": 46.196, |
|
"eval_steps_per_second": 1.478, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9546, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.8870625495910645, |
|
"eval_runtime": 21.6008, |
|
"eval_samples_per_second": 46.295, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9272, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.8875625133514404, |
|
"eval_runtime": 21.5928, |
|
"eval_samples_per_second": 46.312, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9035, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.8865000009536743, |
|
"eval_runtime": 21.6432, |
|
"eval_samples_per_second": 46.204, |
|
"eval_steps_per_second": 1.479, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8836, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.88462495803833, |
|
"eval_runtime": 21.5992, |
|
"eval_samples_per_second": 46.298, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9743, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.8844374418258667, |
|
"eval_runtime": 21.5787, |
|
"eval_samples_per_second": 46.342, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9226, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.8844374418258667, |
|
"eval_runtime": 21.5815, |
|
"eval_samples_per_second": 46.336, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9141, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.8848750591278076, |
|
"eval_runtime": 21.5897, |
|
"eval_samples_per_second": 46.318, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9228, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.8845624923706055, |
|
"eval_runtime": 21.5792, |
|
"eval_samples_per_second": 46.341, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0065, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.8842500448226929, |
|
"eval_runtime": 21.6043, |
|
"eval_samples_per_second": 46.287, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8786, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.884374976158142, |
|
"eval_runtime": 21.6133, |
|
"eval_samples_per_second": 46.268, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1e-06, |
|
"loss": 1.893, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.8838750123977661, |
|
"eval_runtime": 21.5693, |
|
"eval_samples_per_second": 46.362, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8662, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.8825000524520874, |
|
"eval_runtime": 21.5912, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9182, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.8834375143051147, |
|
"eval_runtime": 21.6098, |
|
"eval_samples_per_second": 46.275, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9631, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.8834375143051147, |
|
"eval_runtime": 21.7457, |
|
"eval_samples_per_second": 45.986, |
|
"eval_steps_per_second": 1.472, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1e-06, |
|
"loss": 1.7758, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.8830000162124634, |
|
"eval_runtime": 21.5915, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9728, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.885312557220459, |
|
"eval_runtime": 21.6235, |
|
"eval_samples_per_second": 46.246, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8779, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.885312557220459, |
|
"eval_runtime": 21.6072, |
|
"eval_samples_per_second": 46.281, |
|
"eval_steps_per_second": 1.481, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8991, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.8837499618530273, |
|
"eval_runtime": 21.5731, |
|
"eval_samples_per_second": 46.354, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9968, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.8833750486373901, |
|
"eval_runtime": 21.5715, |
|
"eval_samples_per_second": 46.357, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9012, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.8824374675750732, |
|
"eval_runtime": 21.5769, |
|
"eval_samples_per_second": 46.346, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8975, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.8840625286102295, |
|
"eval_runtime": 21.5778, |
|
"eval_samples_per_second": 46.344, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0213, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.8850624561309814, |
|
"eval_runtime": 21.7712, |
|
"eval_samples_per_second": 45.932, |
|
"eval_steps_per_second": 1.47, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9718, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.883562445640564, |
|
"eval_runtime": 21.5678, |
|
"eval_samples_per_second": 46.365, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9855, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.8822499513626099, |
|
"eval_runtime": 21.5723, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 1.952, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.8823750019073486, |
|
"eval_runtime": 21.5631, |
|
"eval_samples_per_second": 46.376, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8926, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.8806250095367432, |
|
"eval_runtime": 21.624, |
|
"eval_samples_per_second": 46.245, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9346, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.8816250562667847, |
|
"eval_runtime": 21.5878, |
|
"eval_samples_per_second": 46.322, |
|
"eval_steps_per_second": 1.482, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8823, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.881250023841858, |
|
"eval_runtime": 21.5569, |
|
"eval_samples_per_second": 46.389, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9383, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.8806250095367432, |
|
"eval_runtime": 21.6839, |
|
"eval_samples_per_second": 46.117, |
|
"eval_steps_per_second": 1.476, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 1.7947, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.8808749914169312, |
|
"eval_runtime": 21.5728, |
|
"eval_samples_per_second": 46.355, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9227, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.882312536239624, |
|
"eval_runtime": 21.5671, |
|
"eval_samples_per_second": 46.367, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8955, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.8823750019073486, |
|
"eval_runtime": 21.5831, |
|
"eval_samples_per_second": 46.333, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9989, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.881250023841858, |
|
"eval_runtime": 21.5628, |
|
"eval_samples_per_second": 46.376, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9677, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.8819375038146973, |
|
"eval_runtime": 21.5581, |
|
"eval_samples_per_second": 46.386, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8583, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.8811875581741333, |
|
"eval_runtime": 21.5968, |
|
"eval_samples_per_second": 46.303, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9972, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.8818124532699585, |
|
"eval_runtime": 21.6261, |
|
"eval_samples_per_second": 46.24, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8436, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8809374570846558, |
|
"eval_runtime": 21.6385, |
|
"eval_samples_per_second": 46.214, |
|
"eval_steps_per_second": 1.479, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9464, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8806250095367432, |
|
"eval_runtime": 21.5985, |
|
"eval_samples_per_second": 46.299, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9133, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8808125257492065, |
|
"eval_runtime": 21.6055, |
|
"eval_samples_per_second": 46.284, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9932, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8813749551773071, |
|
"eval_runtime": 21.5789, |
|
"eval_samples_per_second": 46.342, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8855, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8813124895095825, |
|
"eval_runtime": 21.549, |
|
"eval_samples_per_second": 46.406, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9001, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8811875581741333, |
|
"eval_runtime": 21.5415, |
|
"eval_samples_per_second": 46.422, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8811, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.8804999589920044, |
|
"eval_runtime": 21.5596, |
|
"eval_samples_per_second": 46.383, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0566, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.8810625076293945, |
|
"eval_runtime": 21.5594, |
|
"eval_samples_per_second": 46.384, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9561, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.881250023841858, |
|
"eval_runtime": 21.5398, |
|
"eval_samples_per_second": 46.426, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9045, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.8804999589920044, |
|
"eval_runtime": 21.5702, |
|
"eval_samples_per_second": 46.36, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9714, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.8798749446868896, |
|
"eval_runtime": 21.5811, |
|
"eval_samples_per_second": 46.337, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9288, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.879812479019165, |
|
"eval_runtime": 21.55, |
|
"eval_samples_per_second": 46.404, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8237, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.8797500133514404, |
|
"eval_runtime": 21.5524, |
|
"eval_samples_per_second": 46.398, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9617, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.8813124895095825, |
|
"eval_runtime": 21.5313, |
|
"eval_samples_per_second": 46.444, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9844, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.8817499876022339, |
|
"eval_runtime": 21.5808, |
|
"eval_samples_per_second": 46.338, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9525, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.8786875009536743, |
|
"eval_runtime": 21.5448, |
|
"eval_samples_per_second": 46.415, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8189, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.8784375190734863, |
|
"eval_runtime": 21.5565, |
|
"eval_samples_per_second": 46.39, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8924, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.878812551498413, |
|
"eval_runtime": 21.6129, |
|
"eval_samples_per_second": 46.269, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8579, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.8789374828338623, |
|
"eval_runtime": 21.6098, |
|
"eval_samples_per_second": 46.275, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8618, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.8783124685287476, |
|
"eval_runtime": 21.5917, |
|
"eval_samples_per_second": 46.314, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9999, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.8779375553131104, |
|
"eval_runtime": 21.6272, |
|
"eval_samples_per_second": 46.238, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8795, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.8776874542236328, |
|
"eval_runtime": 21.5765, |
|
"eval_samples_per_second": 46.347, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8766, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.878749966621399, |
|
"eval_runtime": 21.6054, |
|
"eval_samples_per_second": 46.285, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8596, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.8778125047683716, |
|
"eval_runtime": 21.6178, |
|
"eval_samples_per_second": 46.258, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9722, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.8778749704360962, |
|
"eval_runtime": 21.6267, |
|
"eval_samples_per_second": 46.239, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9459, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.878749966621399, |
|
"eval_runtime": 21.5929, |
|
"eval_samples_per_second": 46.312, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9772, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.8791249990463257, |
|
"eval_runtime": 21.6453, |
|
"eval_samples_per_second": 46.199, |
|
"eval_steps_per_second": 1.478, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9241, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.877750039100647, |
|
"eval_runtime": 21.5578, |
|
"eval_samples_per_second": 46.387, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9391, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.8766249418258667, |
|
"eval_runtime": 21.5657, |
|
"eval_samples_per_second": 46.37, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9602, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.8764375448226929, |
|
"eval_runtime": 21.5619, |
|
"eval_samples_per_second": 46.378, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0581, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.8766875267028809, |
|
"eval_runtime": 21.5636, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9303, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.8773750066757202, |
|
"eval_runtime": 21.5664, |
|
"eval_samples_per_second": 46.368, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-06, |
|
"loss": 1.837, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.8773125410079956, |
|
"eval_runtime": 21.561, |
|
"eval_samples_per_second": 46.38, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9485, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.8799375295639038, |
|
"eval_runtime": 21.5621, |
|
"eval_samples_per_second": 46.378, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9599, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.8831249475479126, |
|
"eval_runtime": 21.5818, |
|
"eval_samples_per_second": 46.335, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9171, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.8783750534057617, |
|
"eval_runtime": 21.6332, |
|
"eval_samples_per_second": 46.225, |
|
"eval_steps_per_second": 1.479, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8861, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.8766875267028809, |
|
"eval_runtime": 21.5533, |
|
"eval_samples_per_second": 46.397, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9208, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.8760625123977661, |
|
"eval_runtime": 21.5483, |
|
"eval_samples_per_second": 46.407, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8775, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.8756250143051147, |
|
"eval_runtime": 21.5365, |
|
"eval_samples_per_second": 46.433, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-06, |
|
"loss": 1.962, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.875499963760376, |
|
"eval_runtime": 21.5519, |
|
"eval_samples_per_second": 46.4, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-06, |
|
"loss": 1.92, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.8764375448226929, |
|
"eval_runtime": 21.5524, |
|
"eval_samples_per_second": 46.399, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8704, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.875749945640564, |
|
"eval_runtime": 21.5397, |
|
"eval_samples_per_second": 46.426, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-06, |
|
"loss": 1.896, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.874750018119812, |
|
"eval_runtime": 21.5328, |
|
"eval_samples_per_second": 46.441, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8639, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.8745625019073486, |
|
"eval_runtime": 21.541, |
|
"eval_samples_per_second": 46.423, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8501, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.8751250505447388, |
|
"eval_runtime": 21.5841, |
|
"eval_samples_per_second": 46.33, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1e-06, |
|
"loss": 1.958, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.8756250143051147, |
|
"eval_runtime": 21.7332, |
|
"eval_samples_per_second": 46.012, |
|
"eval_steps_per_second": 1.472, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9339, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.8751875162124634, |
|
"eval_runtime": 21.5783, |
|
"eval_samples_per_second": 46.343, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8906, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.8748749494552612, |
|
"eval_runtime": 21.5897, |
|
"eval_samples_per_second": 46.318, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1e-06, |
|
"loss": 1.953, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.8763749599456787, |
|
"eval_runtime": 21.6102, |
|
"eval_samples_per_second": 46.274, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9304, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8776249885559082, |
|
"eval_runtime": 21.5663, |
|
"eval_samples_per_second": 46.369, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9081, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.876562476158142, |
|
"eval_runtime": 21.5451, |
|
"eval_samples_per_second": 46.414, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1e-06, |
|
"loss": 1.966, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8761249780654907, |
|
"eval_runtime": 21.5413, |
|
"eval_samples_per_second": 46.422, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9819, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8755625486373901, |
|
"eval_runtime": 21.5524, |
|
"eval_samples_per_second": 46.399, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9206, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8758125305175781, |
|
"eval_runtime": 21.7021, |
|
"eval_samples_per_second": 46.079, |
|
"eval_steps_per_second": 1.475, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9011, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8749375343322754, |
|
"eval_runtime": 21.5651, |
|
"eval_samples_per_second": 46.371, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0006, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.8758749961853027, |
|
"eval_runtime": 21.5599, |
|
"eval_samples_per_second": 46.382, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8774, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.8758125305175781, |
|
"eval_runtime": 21.5817, |
|
"eval_samples_per_second": 46.336, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9009, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.875249981880188, |
|
"eval_runtime": 21.5655, |
|
"eval_samples_per_second": 46.37, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1e-06, |
|
"loss": 1.93, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.8749375343322754, |
|
"eval_runtime": 21.5523, |
|
"eval_samples_per_second": 46.399, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9684, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.8741874694824219, |
|
"eval_runtime": 21.5577, |
|
"eval_samples_per_second": 46.387, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9726, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.8737499713897705, |
|
"eval_runtime": 21.7399, |
|
"eval_samples_per_second": 45.998, |
|
"eval_steps_per_second": 1.472, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8864, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.8734999895095825, |
|
"eval_runtime": 21.7297, |
|
"eval_samples_per_second": 46.02, |
|
"eval_steps_per_second": 1.473, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8647, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.8739374876022339, |
|
"eval_runtime": 21.5773, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9116, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.8738750219345093, |
|
"eval_runtime": 21.5785, |
|
"eval_samples_per_second": 46.342, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8138, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.8732500076293945, |
|
"eval_runtime": 21.5722, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8506, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.8730000257492065, |
|
"eval_runtime": 21.5369, |
|
"eval_samples_per_second": 46.432, |
|
"eval_steps_per_second": 1.486, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9682, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.8727500438690186, |
|
"eval_runtime": 21.5444, |
|
"eval_samples_per_second": 46.416, |
|
"eval_steps_per_second": 1.485, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9206, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.871749997138977, |
|
"eval_runtime": 21.6398, |
|
"eval_samples_per_second": 46.211, |
|
"eval_steps_per_second": 1.479, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9281, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.8723125457763672, |
|
"eval_runtime": 21.5661, |
|
"eval_samples_per_second": 46.369, |
|
"eval_steps_per_second": 1.484, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1e-06, |
|
"loss": 1.7545, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.872499942779541, |
|
"eval_runtime": 21.587, |
|
"eval_samples_per_second": 46.324, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9969, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.8730624914169312, |
|
"eval_runtime": 21.6092, |
|
"eval_samples_per_second": 46.276, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8196, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.8728125095367432, |
|
"eval_runtime": 21.6084, |
|
"eval_samples_per_second": 46.278, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8404, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.8725625276565552, |
|
"eval_runtime": 21.58, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8799, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.8741250038146973, |
|
"eval_runtime": 21.5927, |
|
"eval_samples_per_second": 46.312, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1e-06, |
|
"loss": 1.993, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.8760625123977661, |
|
"eval_runtime": 21.5913, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9346, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.8756874799728394, |
|
"eval_runtime": 21.601, |
|
"eval_samples_per_second": 46.294, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9037, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.8730624914169312, |
|
"eval_runtime": 21.6017, |
|
"eval_samples_per_second": 46.293, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8902, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.8721250295639038, |
|
"eval_runtime": 21.579, |
|
"eval_samples_per_second": 46.341, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1e-06, |
|
"loss": 2.019, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.8739999532699585, |
|
"eval_runtime": 21.5802, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8906, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.8728749752044678, |
|
"eval_runtime": 21.5856, |
|
"eval_samples_per_second": 46.327, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9652, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.871000051498413, |
|
"eval_runtime": 21.6176, |
|
"eval_samples_per_second": 46.258, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9899, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.870937466621399, |
|
"eval_runtime": 21.5907, |
|
"eval_samples_per_second": 46.316, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9037, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.8716249465942383, |
|
"eval_runtime": 21.5777, |
|
"eval_samples_per_second": 46.344, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8631, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.8726874589920044, |
|
"eval_runtime": 21.6244, |
|
"eval_samples_per_second": 46.244, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1e-06, |
|
"loss": 1.7984, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.8730624914169312, |
|
"eval_runtime": 21.6044, |
|
"eval_samples_per_second": 46.287, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9407, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.8730624914169312, |
|
"eval_runtime": 21.6177, |
|
"eval_samples_per_second": 46.258, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8164, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.873437523841858, |
|
"eval_runtime": 21.6277, |
|
"eval_samples_per_second": 46.237, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8932, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.871999979019165, |
|
"eval_runtime": 21.6169, |
|
"eval_samples_per_second": 46.26, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9844, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.8725625276565552, |
|
"eval_runtime": 21.5819, |
|
"eval_samples_per_second": 46.335, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9424, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.8738750219345093, |
|
"eval_runtime": 21.5932, |
|
"eval_samples_per_second": 46.311, |
|
"eval_steps_per_second": 1.482, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9486, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.8728125095367432, |
|
"eval_runtime": 21.5827, |
|
"eval_samples_per_second": 46.333, |
|
"eval_steps_per_second": 1.483, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9749, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.8716249465942383, |
|
"eval_runtime": 21.6181, |
|
"eval_samples_per_second": 46.258, |
|
"eval_steps_per_second": 1.48, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8682, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.8724374771118164, |
|
"eval_runtime": 21.6027, |
|
"eval_samples_per_second": 46.291, |
|
"eval_steps_per_second": 1.481, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9345, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.8714375495910645, |
|
"eval_runtime": 21.6133, |
|
"eval_samples_per_second": 46.268, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8661, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.8723125457763672, |
|
"eval_runtime": 21.6661, |
|
"eval_samples_per_second": 46.155, |
|
"eval_steps_per_second": 1.477, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9508, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.871999979019165, |
|
"eval_runtime": 21.5876, |
|
"eval_samples_per_second": 46.323, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9146, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.8727500438690186, |
|
"eval_runtime": 21.5919, |
|
"eval_samples_per_second": 46.314, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1e-06, |
|
"loss": 1.7965, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.871500015258789, |
|
"eval_runtime": 21.6483, |
|
"eval_samples_per_second": 46.193, |
|
"eval_steps_per_second": 1.478, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9109, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.8706250190734863, |
|
"eval_runtime": 21.6261, |
|
"eval_samples_per_second": 46.24, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9373, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8695000410079956, |
|
"eval_runtime": 21.6392, |
|
"eval_samples_per_second": 46.213, |
|
"eval_steps_per_second": 1.479, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8915, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8704999685287476, |
|
"eval_runtime": 21.6056, |
|
"eval_samples_per_second": 46.284, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9567, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8711249828338623, |
|
"eval_runtime": 21.6009, |
|
"eval_samples_per_second": 46.294, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9928, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8720624446868896, |
|
"eval_runtime": 21.6088, |
|
"eval_samples_per_second": 46.277, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9007, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8716875314712524, |
|
"eval_runtime": 21.5836, |
|
"eval_samples_per_second": 46.331, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0197, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8726249933242798, |
|
"eval_runtime": 21.6035, |
|
"eval_samples_per_second": 46.289, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8577, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.8723125457763672, |
|
"eval_runtime": 21.5721, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9027, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.871500015258789, |
|
"eval_runtime": 21.6009, |
|
"eval_samples_per_second": 46.294, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9688, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.870687484741211, |
|
"eval_runtime": 21.6082, |
|
"eval_samples_per_second": 46.279, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8481, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.871000051498413, |
|
"eval_runtime": 21.5824, |
|
"eval_samples_per_second": 46.334, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0055, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.8702499866485596, |
|
"eval_runtime": 21.5896, |
|
"eval_samples_per_second": 46.319, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8399, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.8694374561309814, |
|
"eval_runtime": 21.5874, |
|
"eval_samples_per_second": 46.323, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9194, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.870687484741211, |
|
"eval_runtime": 21.5716, |
|
"eval_samples_per_second": 46.357, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1e-06, |
|
"loss": 1.88, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.8708125352859497, |
|
"eval_runtime": 21.6211, |
|
"eval_samples_per_second": 46.251, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9445, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.8707499504089355, |
|
"eval_runtime": 21.5768, |
|
"eval_samples_per_second": 46.346, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8887, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.8695625066757202, |
|
"eval_runtime": 21.6318, |
|
"eval_samples_per_second": 46.228, |
|
"eval_steps_per_second": 1.479, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1e-06, |
|
"loss": 1.888, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.86899995803833, |
|
"eval_runtime": 21.6021, |
|
"eval_samples_per_second": 46.292, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1e-06, |
|
"loss": 1.933, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.869687557220459, |
|
"eval_runtime": 21.5965, |
|
"eval_samples_per_second": 46.304, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9369, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.870437502861023, |
|
"eval_runtime": 21.6057, |
|
"eval_samples_per_second": 46.284, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8328, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.8711249828338623, |
|
"eval_runtime": 21.5785, |
|
"eval_samples_per_second": 46.343, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9838, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.871000051498413, |
|
"eval_runtime": 21.6054, |
|
"eval_samples_per_second": 46.285, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8654, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.869687557220459, |
|
"eval_runtime": 21.5808, |
|
"eval_samples_per_second": 46.338, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8876, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.8686250448226929, |
|
"eval_runtime": 21.5951, |
|
"eval_samples_per_second": 46.307, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8914, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.8711249828338623, |
|
"eval_runtime": 21.5778, |
|
"eval_samples_per_second": 46.344, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1e-06, |
|
"loss": 1.967, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.8733124732971191, |
|
"eval_runtime": 21.6111, |
|
"eval_samples_per_second": 46.272, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9661, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.872249960899353, |
|
"eval_runtime": 21.5819, |
|
"eval_samples_per_second": 46.335, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8294, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.8703750371932983, |
|
"eval_runtime": 21.6271, |
|
"eval_samples_per_second": 46.238, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1e-06, |
|
"loss": 1.856, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.869937539100647, |
|
"eval_runtime": 21.5927, |
|
"eval_samples_per_second": 46.312, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9569, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.8698749542236328, |
|
"eval_runtime": 21.618, |
|
"eval_samples_per_second": 46.258, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9006, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.8704999685287476, |
|
"eval_runtime": 21.5544, |
|
"eval_samples_per_second": 46.394, |
|
"eval_steps_per_second": 1.485, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8635, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.869687557220459, |
|
"eval_runtime": 21.5869, |
|
"eval_samples_per_second": 46.324, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9302, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.8683124780654907, |
|
"eval_runtime": 21.6281, |
|
"eval_samples_per_second": 46.236, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1e-06, |
|
"loss": 1.939, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.8693749904632568, |
|
"eval_runtime": 21.6034, |
|
"eval_samples_per_second": 46.289, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0063, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.8693125247955322, |
|
"eval_runtime": 21.614, |
|
"eval_samples_per_second": 46.266, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9715, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.86899995803833, |
|
"eval_runtime": 21.6058, |
|
"eval_samples_per_second": 46.284, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8157, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.868749976158142, |
|
"eval_runtime": 21.6026, |
|
"eval_samples_per_second": 46.291, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8611, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.8670624494552612, |
|
"eval_runtime": 21.5635, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8827, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.8689374923706055, |
|
"eval_runtime": 21.59, |
|
"eval_samples_per_second": 46.318, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9646, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.868749976158142, |
|
"eval_runtime": 21.5851, |
|
"eval_samples_per_second": 46.328, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9296, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.8684375286102295, |
|
"eval_runtime": 21.591, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9716, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.867687463760376, |
|
"eval_runtime": 21.5971, |
|
"eval_samples_per_second": 46.303, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9831, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.8676249980926514, |
|
"eval_runtime": 21.6382, |
|
"eval_samples_per_second": 46.215, |
|
"eval_steps_per_second": 1.479, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9715, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.8678125143051147, |
|
"eval_runtime": 21.6005, |
|
"eval_samples_per_second": 46.295, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9537, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8674999475479126, |
|
"eval_runtime": 21.6161, |
|
"eval_samples_per_second": 46.262, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8924, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8671875, |
|
"eval_runtime": 21.5707, |
|
"eval_samples_per_second": 46.359, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8183, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8671250343322754, |
|
"eval_runtime": 21.5773, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1e-06, |
|
"loss": 1.969, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8673750162124634, |
|
"eval_runtime": 21.5742, |
|
"eval_samples_per_second": 46.352, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8521, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8665624856948853, |
|
"eval_runtime": 21.6165, |
|
"eval_samples_per_second": 46.261, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8958, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8663749694824219, |
|
"eval_runtime": 21.5664, |
|
"eval_samples_per_second": 46.368, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9078, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.8668124675750732, |
|
"eval_runtime": 21.5521, |
|
"eval_samples_per_second": 46.399, |
|
"eval_steps_per_second": 1.485, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0287, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.8656874895095825, |
|
"eval_runtime": 21.6113, |
|
"eval_samples_per_second": 46.272, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8675, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.866437554359436, |
|
"eval_runtime": 21.5543, |
|
"eval_samples_per_second": 46.394, |
|
"eval_steps_per_second": 1.485, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9092, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.8676249980926514, |
|
"eval_runtime": 21.5876, |
|
"eval_samples_per_second": 46.323, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9228, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.8668750524520874, |
|
"eval_runtime": 21.5838, |
|
"eval_samples_per_second": 46.331, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8323, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.8671250343322754, |
|
"eval_runtime": 21.5909, |
|
"eval_samples_per_second": 46.316, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8681, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.8672499656677246, |
|
"eval_runtime": 21.6347, |
|
"eval_samples_per_second": 46.222, |
|
"eval_steps_per_second": 1.479, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8857, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.866937518119812, |
|
"eval_runtime": 21.5667, |
|
"eval_samples_per_second": 46.368, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8886, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.8674999475479126, |
|
"eval_runtime": 21.5708, |
|
"eval_samples_per_second": 46.359, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8823, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.8668750524520874, |
|
"eval_runtime": 21.5567, |
|
"eval_samples_per_second": 46.389, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8864, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.8669999837875366, |
|
"eval_runtime": 21.5934, |
|
"eval_samples_per_second": 46.311, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8154, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.8663125038146973, |
|
"eval_runtime": 21.5778, |
|
"eval_samples_per_second": 46.344, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0137, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.8665624856948853, |
|
"eval_runtime": 21.607, |
|
"eval_samples_per_second": 46.281, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9625, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.8675625324249268, |
|
"eval_runtime": 21.5865, |
|
"eval_samples_per_second": 46.325, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8932, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.8663749694824219, |
|
"eval_runtime": 21.5997, |
|
"eval_samples_per_second": 46.297, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8908, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.8663749694824219, |
|
"eval_runtime": 21.5689, |
|
"eval_samples_per_second": 46.363, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8462, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.8659374713897705, |
|
"eval_runtime": 21.6266, |
|
"eval_samples_per_second": 46.239, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1e-06, |
|
"loss": 1.931, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.8665000200271606, |
|
"eval_runtime": 21.5565, |
|
"eval_samples_per_second": 46.39, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8934, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.8661249876022339, |
|
"eval_runtime": 21.5483, |
|
"eval_samples_per_second": 46.407, |
|
"eval_steps_per_second": 1.485, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9062, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.864687442779541, |
|
"eval_runtime": 21.5553, |
|
"eval_samples_per_second": 46.392, |
|
"eval_steps_per_second": 1.485, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9488, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.865625023841858, |
|
"eval_runtime": 21.6493, |
|
"eval_samples_per_second": 46.191, |
|
"eval_steps_per_second": 1.478, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8542, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.8663749694824219, |
|
"eval_runtime": 21.5708, |
|
"eval_samples_per_second": 46.359, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8613, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.865625023841858, |
|
"eval_runtime": 21.5531, |
|
"eval_samples_per_second": 46.397, |
|
"eval_steps_per_second": 1.485, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9469, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.864437460899353, |
|
"eval_runtime": 21.6086, |
|
"eval_samples_per_second": 46.278, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9361, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.8630625009536743, |
|
"eval_runtime": 21.5955, |
|
"eval_samples_per_second": 46.306, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8308, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.8620624542236328, |
|
"eval_runtime": 21.6009, |
|
"eval_samples_per_second": 46.294, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1e-06, |
|
"loss": 2.009, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.862874984741211, |
|
"eval_runtime": 21.5914, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1e-06, |
|
"loss": 1.904, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.8635624647140503, |
|
"eval_runtime": 21.5927, |
|
"eval_samples_per_second": 46.312, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9847, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.8627500534057617, |
|
"eval_runtime": 21.5734, |
|
"eval_samples_per_second": 46.353, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9402, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.8653124570846558, |
|
"eval_runtime": 21.6183, |
|
"eval_samples_per_second": 46.257, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9575, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.8649375438690186, |
|
"eval_runtime": 21.5847, |
|
"eval_samples_per_second": 46.329, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9414, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.8635624647140503, |
|
"eval_runtime": 21.5693, |
|
"eval_samples_per_second": 46.362, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8488, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.863124966621399, |
|
"eval_runtime": 21.7767, |
|
"eval_samples_per_second": 45.921, |
|
"eval_steps_per_second": 1.469, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8756, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.8624999523162842, |
|
"eval_runtime": 21.5671, |
|
"eval_samples_per_second": 46.367, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9141, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.8633124828338623, |
|
"eval_runtime": 21.5606, |
|
"eval_samples_per_second": 46.381, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8407, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.8626874685287476, |
|
"eval_runtime": 21.5655, |
|
"eval_samples_per_second": 46.37, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8517, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.8627500534057617, |
|
"eval_runtime": 21.5756, |
|
"eval_samples_per_second": 46.349, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 1.856, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.8637499809265137, |
|
"eval_runtime": 21.5785, |
|
"eval_samples_per_second": 46.342, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8582, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.863437533378601, |
|
"eval_runtime": 21.5986, |
|
"eval_samples_per_second": 46.299, |
|
"eval_steps_per_second": 1.482, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8528, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.8622499704360962, |
|
"eval_runtime": 21.6191, |
|
"eval_samples_per_second": 46.255, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 1.896, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.8615000247955322, |
|
"eval_runtime": 21.5689, |
|
"eval_samples_per_second": 46.363, |
|
"eval_steps_per_second": 1.484, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9112, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.86118745803833, |
|
"eval_runtime": 21.577, |
|
"eval_samples_per_second": 46.346, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9361, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.8620624542236328, |
|
"eval_runtime": 21.6002, |
|
"eval_samples_per_second": 46.296, |
|
"eval_steps_per_second": 1.481, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9024, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.8619375228881836, |
|
"eval_runtime": 21.6227, |
|
"eval_samples_per_second": 46.248, |
|
"eval_steps_per_second": 1.48, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9028, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.8627500534057617, |
|
"eval_runtime": 21.5823, |
|
"eval_samples_per_second": 46.334, |
|
"eval_steps_per_second": 1.483, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8198, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.8626874685287476, |
|
"eval_runtime": 21.5914, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1e-06, |
|
"loss": 1.967, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.8630625009536743, |
|
"eval_runtime": 21.5946, |
|
"eval_samples_per_second": 46.308, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8466, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.8624374866485596, |
|
"eval_runtime": 21.5514, |
|
"eval_samples_per_second": 46.401, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8635, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.8626874685287476, |
|
"eval_runtime": 21.5938, |
|
"eval_samples_per_second": 46.31, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9223, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.8628125190734863, |
|
"eval_runtime": 21.5984, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9931, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.8619999885559082, |
|
"eval_runtime": 21.8827, |
|
"eval_samples_per_second": 45.698, |
|
"eval_steps_per_second": 1.462, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9137, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.8615000247955322, |
|
"eval_runtime": 21.5734, |
|
"eval_samples_per_second": 46.353, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9683, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.863124966621399, |
|
"eval_runtime": 21.5788, |
|
"eval_samples_per_second": 46.342, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9427, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.8628125190734863, |
|
"eval_runtime": 21.5727, |
|
"eval_samples_per_second": 46.355, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8678, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.8617500066757202, |
|
"eval_runtime": 21.5841, |
|
"eval_samples_per_second": 46.33, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9757, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.8612500429153442, |
|
"eval_runtime": 21.5639, |
|
"eval_samples_per_second": 46.374, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9448, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.860687494277954, |
|
"eval_runtime": 21.5528, |
|
"eval_samples_per_second": 46.398, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9297, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.8616249561309814, |
|
"eval_runtime": 21.5684, |
|
"eval_samples_per_second": 46.364, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9876, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.8630000352859497, |
|
"eval_runtime": 21.5722, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9954, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.8636250495910645, |
|
"eval_runtime": 21.5747, |
|
"eval_samples_per_second": 46.35, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8693, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.8613749742507935, |
|
"eval_runtime": 21.6006, |
|
"eval_samples_per_second": 46.295, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9001, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.859874963760376, |
|
"eval_runtime": 21.6053, |
|
"eval_samples_per_second": 46.285, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9062, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.8604375123977661, |
|
"eval_runtime": 21.6077, |
|
"eval_samples_per_second": 46.28, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8388, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.860937476158142, |
|
"eval_runtime": 21.5983, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8595, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.8619375228881836, |
|
"eval_runtime": 21.5823, |
|
"eval_samples_per_second": 46.334, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1e-06, |
|
"loss": 1.839, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.8622499704360962, |
|
"eval_runtime": 21.5981, |
|
"eval_samples_per_second": 46.3, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8295, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.8627500534057617, |
|
"eval_runtime": 21.5809, |
|
"eval_samples_per_second": 46.337, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8441, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.8630625009536743, |
|
"eval_runtime": 21.6036, |
|
"eval_samples_per_second": 46.289, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8115, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.862625002861023, |
|
"eval_runtime": 21.5853, |
|
"eval_samples_per_second": 46.328, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9005, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.862874984741211, |
|
"eval_runtime": 21.591, |
|
"eval_samples_per_second": 46.316, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-06, |
|
"loss": 2.0089, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.863124966621399, |
|
"eval_runtime": 21.5772, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8844, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.8622499704360962, |
|
"eval_runtime": 21.5763, |
|
"eval_samples_per_second": 46.347, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9881, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.8629374504089355, |
|
"eval_runtime": 21.6029, |
|
"eval_samples_per_second": 46.29, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9513, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.8619999885559082, |
|
"eval_runtime": 21.5844, |
|
"eval_samples_per_second": 46.33, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9402, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.8602499961853027, |
|
"eval_runtime": 21.5782, |
|
"eval_samples_per_second": 46.343, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8831, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.8602499961853027, |
|
"eval_runtime": 21.5914, |
|
"eval_samples_per_second": 46.315, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8559, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.860124945640564, |
|
"eval_runtime": 21.621, |
|
"eval_samples_per_second": 46.251, |
|
"eval_steps_per_second": 1.48, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9143, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.859125018119812, |
|
"eval_runtime": 21.635, |
|
"eval_samples_per_second": 46.221, |
|
"eval_steps_per_second": 1.479, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9208, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.8601875305175781, |
|
"eval_runtime": 21.5808, |
|
"eval_samples_per_second": 46.338, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9305, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.8603750467300415, |
|
"eval_runtime": 21.5694, |
|
"eval_samples_per_second": 46.362, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8194, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.8591874837875366, |
|
"eval_runtime": 21.5705, |
|
"eval_samples_per_second": 46.36, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9233, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.8585000038146973, |
|
"eval_runtime": 21.5597, |
|
"eval_samples_per_second": 46.383, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8612, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.8569999933242798, |
|
"eval_runtime": 21.5726, |
|
"eval_samples_per_second": 46.355, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8693, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.8571875095367432, |
|
"eval_runtime": 21.5497, |
|
"eval_samples_per_second": 46.404, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-06, |
|
"loss": 1.907, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.8586875200271606, |
|
"eval_runtime": 21.5621, |
|
"eval_samples_per_second": 46.378, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8938, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.8597500324249268, |
|
"eval_runtime": 21.573, |
|
"eval_samples_per_second": 46.354, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9379, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.8589375019073486, |
|
"eval_runtime": 21.6039, |
|
"eval_samples_per_second": 46.288, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9152, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.8595625162124634, |
|
"eval_runtime": 21.5632, |
|
"eval_samples_per_second": 46.375, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8737, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.8599375486373901, |
|
"eval_runtime": 21.5502, |
|
"eval_samples_per_second": 46.403, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9452, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.859624981880188, |
|
"eval_runtime": 21.555, |
|
"eval_samples_per_second": 46.393, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-06, |
|
"loss": 1.7954, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.858875036239624, |
|
"eval_runtime": 21.5874, |
|
"eval_samples_per_second": 46.323, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-06, |
|
"loss": 1.867, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.8594374656677246, |
|
"eval_runtime": 21.5688, |
|
"eval_samples_per_second": 46.363, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9465, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.859375, |
|
"eval_runtime": 21.5683, |
|
"eval_samples_per_second": 46.364, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8563, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.8601875305175781, |
|
"eval_runtime": 21.5556, |
|
"eval_samples_per_second": 46.392, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9413, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.8608125448226929, |
|
"eval_runtime": 21.554, |
|
"eval_samples_per_second": 46.395, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8672, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.8600624799728394, |
|
"eval_runtime": 21.561, |
|
"eval_samples_per_second": 46.38, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9427, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.8600000143051147, |
|
"eval_runtime": 21.5522, |
|
"eval_samples_per_second": 46.399, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8576, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.8570624589920044, |
|
"eval_runtime": 21.5628, |
|
"eval_samples_per_second": 46.376, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9259, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.8559999465942383, |
|
"eval_runtime": 21.5485, |
|
"eval_samples_per_second": 46.407, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9291, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.856124997138977, |
|
"eval_runtime": 21.5528, |
|
"eval_samples_per_second": 46.398, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8441, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.8566875457763672, |
|
"eval_runtime": 21.5783, |
|
"eval_samples_per_second": 46.343, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1e-06, |
|
"loss": 1.905, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.8570624589920044, |
|
"eval_runtime": 21.5547, |
|
"eval_samples_per_second": 46.394, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9797, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.8564374446868896, |
|
"eval_runtime": 21.5456, |
|
"eval_samples_per_second": 46.413, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8988, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.8571875095367432, |
|
"eval_runtime": 21.5553, |
|
"eval_samples_per_second": 46.392, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9099, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.8568124771118164, |
|
"eval_runtime": 21.5449, |
|
"eval_samples_per_second": 46.415, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9458, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.8576250076293945, |
|
"eval_runtime": 21.5523, |
|
"eval_samples_per_second": 46.399, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9416, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.8564374446868896, |
|
"eval_runtime": 21.561, |
|
"eval_samples_per_second": 46.38, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8823, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.8567500114440918, |
|
"eval_runtime": 21.5589, |
|
"eval_samples_per_second": 46.385, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8919, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.8569375276565552, |
|
"eval_runtime": 21.5546, |
|
"eval_samples_per_second": 46.394, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9271, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.8585624694824219, |
|
"eval_runtime": 21.5722, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9337, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.8586875200271606, |
|
"eval_runtime": 21.5528, |
|
"eval_samples_per_second": 46.398, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9362, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.8581249713897705, |
|
"eval_runtime": 21.5364, |
|
"eval_samples_per_second": 46.433, |
|
"eval_steps_per_second": 1.486, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8491, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.8567500114440918, |
|
"eval_runtime": 21.6083, |
|
"eval_samples_per_second": 46.279, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8954, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.8565624952316284, |
|
"eval_runtime": 21.576, |
|
"eval_samples_per_second": 46.348, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8662, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.856624960899353, |
|
"eval_runtime": 21.567, |
|
"eval_samples_per_second": 46.367, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9247, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.856124997138977, |
|
"eval_runtime": 21.5695, |
|
"eval_samples_per_second": 46.362, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9457, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.8573124408721924, |
|
"eval_runtime": 21.5802, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8775, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.855562448501587, |
|
"eval_runtime": 21.5795, |
|
"eval_samples_per_second": 46.34, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9068, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.855875015258789, |
|
"eval_runtime": 21.5796, |
|
"eval_samples_per_second": 46.34, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1e-06, |
|
"loss": 1.946, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.8560625314712524, |
|
"eval_runtime": 21.5655, |
|
"eval_samples_per_second": 46.37, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9571, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.8551249504089355, |
|
"eval_runtime": 21.5888, |
|
"eval_samples_per_second": 46.32, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8821, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.8554999828338623, |
|
"eval_runtime": 21.6112, |
|
"eval_samples_per_second": 46.272, |
|
"eval_steps_per_second": 1.481, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8955, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.8547500371932983, |
|
"eval_runtime": 21.5724, |
|
"eval_samples_per_second": 46.355, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9742, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.8536875247955322, |
|
"eval_runtime": 21.5718, |
|
"eval_samples_per_second": 46.357, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9093, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.8535000085830688, |
|
"eval_runtime": 21.5691, |
|
"eval_samples_per_second": 46.363, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9554, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.8543750047683716, |
|
"eval_runtime": 21.5774, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8263, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.8541874885559082, |
|
"eval_runtime": 21.5724, |
|
"eval_samples_per_second": 46.356, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9127, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.8545000553131104, |
|
"eval_runtime": 21.5547, |
|
"eval_samples_per_second": 46.394, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8252, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.8549375534057617, |
|
"eval_runtime": 21.5655, |
|
"eval_samples_per_second": 46.37, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8472, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.8551249504089355, |
|
"eval_runtime": 21.5877, |
|
"eval_samples_per_second": 46.323, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9126, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.8554999828338623, |
|
"eval_runtime": 21.5809, |
|
"eval_samples_per_second": 46.337, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8399, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.8544374704360962, |
|
"eval_runtime": 21.5745, |
|
"eval_samples_per_second": 46.351, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8661, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.8534375429153442, |
|
"eval_runtime": 21.5844, |
|
"eval_samples_per_second": 46.33, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8978, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.853124976158142, |
|
"eval_runtime": 21.5729, |
|
"eval_samples_per_second": 46.354, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8909, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.85337495803833, |
|
"eval_runtime": 21.5575, |
|
"eval_samples_per_second": 46.388, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9605, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.85337495803833, |
|
"eval_runtime": 21.6309, |
|
"eval_samples_per_second": 46.23, |
|
"eval_steps_per_second": 1.479, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9278, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.8543750047683716, |
|
"eval_runtime": 21.5801, |
|
"eval_samples_per_second": 46.339, |
|
"eval_steps_per_second": 1.483, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8411, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.8535000085830688, |
|
"eval_runtime": 21.5905, |
|
"eval_samples_per_second": 46.317, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9211, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.8535000085830688, |
|
"eval_runtime": 21.5883, |
|
"eval_samples_per_second": 46.321, |
|
"eval_steps_per_second": 1.482, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8927, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.8531874418258667, |
|
"eval_runtime": 21.561, |
|
"eval_samples_per_second": 46.38, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8337, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.8536250591278076, |
|
"eval_runtime": 21.562, |
|
"eval_samples_per_second": 46.378, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8951, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.8544374704360962, |
|
"eval_runtime": 21.5582, |
|
"eval_samples_per_second": 46.386, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9056, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.855062484741211, |
|
"eval_runtime": 21.5666, |
|
"eval_samples_per_second": 46.368, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8787, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.8537499904632568, |
|
"eval_runtime": 21.5601, |
|
"eval_samples_per_second": 46.382, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9587, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.8549375534057617, |
|
"eval_runtime": 21.5652, |
|
"eval_samples_per_second": 46.371, |
|
"eval_steps_per_second": 1.484, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1e-06, |
|
"loss": 2.048, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.8557499647140503, |
|
"eval_runtime": 21.5611, |
|
"eval_samples_per_second": 46.38, |
|
"eval_steps_per_second": 1.484, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8774, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.8544374704360962, |
|
"eval_runtime": 21.5818, |
|
"eval_samples_per_second": 46.335, |
|
"eval_steps_per_second": 1.483, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8685, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.8531874418258667, |
|
"eval_runtime": 21.541, |
|
"eval_samples_per_second": 46.423, |
|
"eval_steps_per_second": 1.486, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9003, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.8539999723434448, |
|
"eval_runtime": 21.5656, |
|
"eval_samples_per_second": 46.37, |
|
"eval_steps_per_second": 1.484, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8741, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.854812502861023, |
|
"eval_runtime": 21.5498, |
|
"eval_samples_per_second": 46.404, |
|
"eval_steps_per_second": 1.485, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9455, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.854312539100647, |
|
"eval_runtime": 21.548, |
|
"eval_samples_per_second": 46.408, |
|
"eval_steps_per_second": 1.485, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8652, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.8546874523162842, |
|
"eval_runtime": 21.6256, |
|
"eval_samples_per_second": 46.241, |
|
"eval_steps_per_second": 1.48, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8865, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.8538124561309814, |
|
"eval_runtime": 21.5739, |
|
"eval_samples_per_second": 46.352, |
|
"eval_steps_per_second": 1.483, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8254, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.8561874628067017, |
|
"eval_runtime": 21.5774, |
|
"eval_samples_per_second": 46.345, |
|
"eval_steps_per_second": 1.483, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1e-06, |
|
"loss": 1.931, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.8570624589920044, |
|
"eval_runtime": 21.5741, |
|
"eval_samples_per_second": 46.352, |
|
"eval_steps_per_second": 1.483, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9049, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.8558125495910645, |
|
"eval_runtime": 21.5808, |
|
"eval_samples_per_second": 46.337, |
|
"eval_steps_per_second": 1.483, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9455, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.8558125495910645, |
|
"eval_runtime": 21.5517, |
|
"eval_samples_per_second": 46.4, |
|
"eval_steps_per_second": 1.485, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8257, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.8536250591278076, |
|
"eval_runtime": 21.5808, |
|
"eval_samples_per_second": 46.337, |
|
"eval_steps_per_second": 1.483, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8485, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.8534375429153442, |
|
"eval_runtime": 21.5622, |
|
"eval_samples_per_second": 46.377, |
|
"eval_steps_per_second": 1.484, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8769, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.8535624742507935, |
|
"eval_runtime": 21.5883, |
|
"eval_samples_per_second": 46.321, |
|
"eval_steps_per_second": 1.482, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9807, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.8546249866485596, |
|
"eval_runtime": 21.5658, |
|
"eval_samples_per_second": 46.37, |
|
"eval_steps_per_second": 1.484, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9611, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.8552500009536743, |
|
"eval_runtime": 21.5706, |
|
"eval_samples_per_second": 46.359, |
|
"eval_steps_per_second": 1.484, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1e-06, |
|
"loss": 1.8893, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.854312539100647, |
|
"eval_runtime": 21.5674, |
|
"eval_samples_per_second": 46.366, |
|
"eval_steps_per_second": 1.484, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9574, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.854562520980835, |
|
"eval_runtime": 21.5527, |
|
"eval_samples_per_second": 46.398, |
|
"eval_steps_per_second": 1.485, |
|
"step": 6180 |
|
} |
|
], |
|
"max_steps": 6187, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.777315501421101e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|