|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 37.51465416178195, |
|
"global_step": 32000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-06, |
|
"loss": 0.793, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7508, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7777, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7741, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7435, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7681, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7505, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7285, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7243, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7562, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-06, |
|
"loss": 0.743, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7086, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7177, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5e-06, |
|
"loss": 0.727, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.6887246966362, |
|
"eval_runtime": 70.0491, |
|
"eval_samples_per_second": 3.041, |
|
"eval_steps_per_second": 3.041, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7781, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7509, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7057, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7292, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7037, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7005, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7501, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7162, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7428, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7403, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7286, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 5e-06, |
|
"loss": 0.72, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6998, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7515, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7263, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 0.6762834191322327, |
|
"eval_runtime": 70.0592, |
|
"eval_samples_per_second": 3.04, |
|
"eval_steps_per_second": 3.04, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6931, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6907, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6818, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7417, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6987, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6854, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7236, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7045, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 5e-06, |
|
"loss": 0.699, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7179, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7066, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 5e-06, |
|
"loss": 0.713, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6749, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6882, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6809, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 0.6606600284576416, |
|
"eval_runtime": 69.4571, |
|
"eval_samples_per_second": 3.067, |
|
"eval_steps_per_second": 3.067, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7433, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6881, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7343, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6675, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7187, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6837, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6825, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6976, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7055, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6584, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6819, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6652, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6728, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6916, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 5e-06, |
|
"loss": 0.657, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_loss": 0.6467106938362122, |
|
"eval_runtime": 69.8327, |
|
"eval_samples_per_second": 3.05, |
|
"eval_steps_per_second": 3.05, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7331, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6598, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6602, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6819, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6764, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6674, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6848, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6446, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6601, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6649, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 5e-06, |
|
"loss": 0.657, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6326, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6541, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6579, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6784, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 0.6341073513031006, |
|
"eval_runtime": 70.1746, |
|
"eval_samples_per_second": 3.035, |
|
"eval_steps_per_second": 3.035, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6889, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6752, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6654, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6516, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6847, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6396, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6484, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6396, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6951, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 5e-06, |
|
"loss": 0.641, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6379, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6264, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6364, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 5e-06, |
|
"loss": 0.676, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6756, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_loss": 0.6359875202178955, |
|
"eval_runtime": 70.4519, |
|
"eval_samples_per_second": 3.023, |
|
"eval_steps_per_second": 3.023, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6641, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6126, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6538, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 5e-06, |
|
"loss": 0.641, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6501, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 5e-06, |
|
"loss": 0.647, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6463, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6507, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6525, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6194, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6338, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6492, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6531, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6073, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6307, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"eval_loss": 0.6309817433357239, |
|
"eval_runtime": 70.2077, |
|
"eval_samples_per_second": 3.034, |
|
"eval_steps_per_second": 3.034, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6608, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6252, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6258, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6504, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6281, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6398, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6318, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6162, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6101, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6124, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5994, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6599, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6192, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6341, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6155, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"eval_loss": 0.63248211145401, |
|
"eval_runtime": 70.8191, |
|
"eval_samples_per_second": 3.008, |
|
"eval_steps_per_second": 3.008, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6562, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 5e-06, |
|
"loss": 0.633, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6169, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6312, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6401, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6365, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6286, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5877, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6334, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5785, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6155, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6404, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6302, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6154, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6119, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"eval_loss": 0.6228322386741638, |
|
"eval_runtime": 70.1345, |
|
"eval_samples_per_second": 3.037, |
|
"eval_steps_per_second": 3.037, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6193, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6161, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5785, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6043, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6205, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6321, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5996, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6232, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6148, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.602, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5716, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 5e-06, |
|
"loss": 0.629, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6134, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6023, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5943, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"eval_loss": 0.623548686504364, |
|
"eval_runtime": 70.4978, |
|
"eval_samples_per_second": 3.021, |
|
"eval_steps_per_second": 3.021, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6092, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6172, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6247, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6043, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6186, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5877, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5993, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5949, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5775, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 18.76, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6147, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5973, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6103, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6024, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5729, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6012, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"eval_loss": 0.6155942678451538, |
|
"eval_runtime": 70.5596, |
|
"eval_samples_per_second": 3.019, |
|
"eval_steps_per_second": 3.019, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6123, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5937, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5824, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6433, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5799, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 5e-06, |
|
"loss": 0.593, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5909, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 20.28, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5918, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5908, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5932, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 20.63, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6085, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5737, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5926, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 5e-06, |
|
"loss": 0.606, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5834, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"eval_loss": 0.6064698100090027, |
|
"eval_runtime": 70.551, |
|
"eval_samples_per_second": 3.019, |
|
"eval_steps_per_second": 3.019, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 5e-06, |
|
"loss": 0.57, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 21.34, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5878, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5623, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5978, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"learning_rate": 5e-06, |
|
"loss": 0.594, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6013, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5576, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5794, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5863, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5956, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5849, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 22.51, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5705, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5945, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 22.74, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5673, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5942, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"eval_loss": 0.6019883155822754, |
|
"eval_runtime": 70.6262, |
|
"eval_samples_per_second": 3.016, |
|
"eval_steps_per_second": 3.016, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"learning_rate": 5e-06, |
|
"loss": 0.556, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5919, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 23.21, |
|
"learning_rate": 5e-06, |
|
"loss": 0.554, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5708, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5555, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6004, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5894, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5718, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5744, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5602, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 24.15, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5656, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5657, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 24.38, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5553, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 24.5, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5962, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 24.62, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5982, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 24.62, |
|
"eval_loss": 0.5987153053283691, |
|
"eval_runtime": 70.2465, |
|
"eval_samples_per_second": 3.032, |
|
"eval_steps_per_second": 3.032, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 24.74, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5946, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5473, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 24.97, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5605, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 25.09, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5953, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5697, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5627, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 25.44, |
|
"learning_rate": 5e-06, |
|
"loss": 0.567, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5394, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5461, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5615, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5547, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5534, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 26.14, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5494, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 26.26, |
|
"learning_rate": 5e-06, |
|
"loss": 0.569, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5352, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"eval_loss": 0.6042129397392273, |
|
"eval_runtime": 69.7858, |
|
"eval_samples_per_second": 3.052, |
|
"eval_steps_per_second": 3.052, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 26.49, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5754, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5443, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 26.73, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5765, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 26.85, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5494, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5598, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5634, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 5e-06, |
|
"loss": 0.534, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5626, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5681, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 27.55, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5735, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 27.67, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5464, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 5e-06, |
|
"loss": 0.529, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"learning_rate": 5e-06, |
|
"loss": 0.548, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5699, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 28.14, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5746, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 28.14, |
|
"eval_loss": 0.5994372367858887, |
|
"eval_runtime": 70.1931, |
|
"eval_samples_per_second": 3.034, |
|
"eval_steps_per_second": 3.034, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 28.25, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5537, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5479, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 28.49, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5643, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5273, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"learning_rate": 5e-06, |
|
"loss": 0.544, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5172, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5658, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 29.07, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5343, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 29.19, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5307, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 29.31, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5386, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 29.43, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5553, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 29.54, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5309, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5323, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5477, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 29.89, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5618, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 29.89, |
|
"eval_loss": 0.5992656350135803, |
|
"eval_runtime": 70.8526, |
|
"eval_samples_per_second": 3.006, |
|
"eval_steps_per_second": 3.006, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5368, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 30.13, |
|
"learning_rate": 5e-06, |
|
"loss": 0.55, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5138, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 30.36, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5266, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5539, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 5e-06, |
|
"loss": 0.536, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 30.72, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5427, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 30.83, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5496, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 30.95, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5127, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 31.07, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5569, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 31.18, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5196, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5268, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 31.42, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5419, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 31.54, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5087, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 31.65, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5254, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 31.65, |
|
"eval_loss": 0.5909192562103271, |
|
"eval_runtime": 70.5089, |
|
"eval_samples_per_second": 3.021, |
|
"eval_steps_per_second": 3.021, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 31.77, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5346, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 31.89, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5279, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5711, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 32.12, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5079, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5303, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 32.36, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5347, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 32.47, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4936, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 32.59, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5303, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 32.71, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5543, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 32.83, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5266, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5258, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 33.06, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5559, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 33.18, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5096, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 33.29, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5427, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 33.41, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5336, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 33.41, |
|
"eval_loss": 0.587517261505127, |
|
"eval_runtime": 70.6475, |
|
"eval_samples_per_second": 3.015, |
|
"eval_steps_per_second": 3.015, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 33.53, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5419, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5235, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5266, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 33.88, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5308, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5203, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 34.11, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4988, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 34.23, |
|
"learning_rate": 5e-06, |
|
"loss": 0.502, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 34.35, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5111, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 34.47, |
|
"learning_rate": 5e-06, |
|
"loss": 0.539, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5086, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 34.7, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5285, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 34.82, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5153, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 34.94, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5366, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 35.05, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5307, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 35.17, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5677, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 35.17, |
|
"eval_loss": 0.591385543346405, |
|
"eval_runtime": 70.1376, |
|
"eval_samples_per_second": 3.037, |
|
"eval_steps_per_second": 3.037, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5283, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5281, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5087, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 35.64, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4965, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 35.76, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5085, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 35.87, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5159, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5149, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 36.11, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5281, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"learning_rate": 5e-06, |
|
"loss": 0.511, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 36.34, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5327, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 36.46, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5267, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 36.58, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5124, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 36.69, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5069, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 36.81, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4839, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 36.93, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5009, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 36.93, |
|
"eval_loss": 0.6007654070854187, |
|
"eval_runtime": 70.1373, |
|
"eval_samples_per_second": 3.037, |
|
"eval_steps_per_second": 3.037, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5169, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5003, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 37.28, |
|
"learning_rate": 5e-06, |
|
"loss": 0.502, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5149, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 37.51, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4903, |
|
"step": 32000 |
|
} |
|
], |
|
"max_steps": 255900, |
|
"num_train_epochs": 300, |
|
"total_flos": 3.193778270424269e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|