|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.145165784341042, |
|
"global_step": 200000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5805, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5732, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5959, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6017, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6004, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5835, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5751, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.611, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6164, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5925, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6045, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6015, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.594, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.594, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5996, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6044, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6066, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5974, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5914, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5953, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5899, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5944, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5942, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5967, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.583, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6051, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5985, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5988, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5909, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5862, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5955, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6082, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5945, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5902, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5972, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6033, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5833, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6024, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5846, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.608, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6056, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5992, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5884, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.611, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6078, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6079, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5802, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5957, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5927, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.601, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6067, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6032, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5967, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6086, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6088, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6004, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6081, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5881, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6046, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5831, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5986, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5978, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6047, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6051, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5896, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5985, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6038, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6022, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6061, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5962, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6063, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5968, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6073, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5951, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.596, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5992, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6136, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6036, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6085, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5925, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6132, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5988, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6097, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5982, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6047, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.614, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6009, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6006, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5976, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5978, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6068, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6083, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6061, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6045, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5928, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5981, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6171, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6079, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 2.610891819000244, |
|
"eval_rouge1": 25.2905, |
|
"eval_rouge2": 12.3803, |
|
"eval_rougeL": 20.8964, |
|
"eval_rougeLsum": 23.9052, |
|
"eval_runtime": 491.8614, |
|
"eval_samples_per_second": 27.178, |
|
"eval_steps_per_second": 1.7, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6068, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5903, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6039, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6014, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6076, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6062, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6013, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.615, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6003, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6059, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6144, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.626, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5908, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6052, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6159, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6093, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6048, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6134, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.603, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5971, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6122, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.603, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6016, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5888, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6016, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6155, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6121, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6147, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5936, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5999, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6058, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6123, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5933, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5945, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6042, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6169, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6122, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6122, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6034, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5984, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6166, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.623, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6052, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6056, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6185, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6204, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6194, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6042, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5974, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6052, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.597, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6095, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6187, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6066, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6132, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6156, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6081, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6079, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6253, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6029, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6056, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6165, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6223, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5993, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6096, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.613, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5931, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6158, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6129, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6097, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6102, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6232, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6099, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6182, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6115, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6156, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6217, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6155, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5991, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5913, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5782, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5648, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5837, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.582, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5695, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.59, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5902, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5841, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.579, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5733, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5818, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5821, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5849, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5966, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5895, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5865, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5904, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.578, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 2.692544460296631, |
|
"eval_rouge1": 25.2837, |
|
"eval_rouge2": 12.3389, |
|
"eval_rougeL": 20.91, |
|
"eval_rougeLsum": 23.8923, |
|
"eval_runtime": 488.8383, |
|
"eval_samples_per_second": 27.346, |
|
"eval_steps_per_second": 1.71, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5816, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.602, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.594, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5886, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5985, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5962, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5733, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5889, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6017, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5867, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5847, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5918, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5794, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5806, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5855, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5821, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5735, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5953, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5904, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5962, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5942, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5871, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5851, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5921, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5971, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5882, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5813, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5916, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5959, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5926, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5883, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5929, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.584, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5957, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5954, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5938, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5961, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6012, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5899, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5941, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5936, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5912, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5873, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.578, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5942, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5918, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5923, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5818, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5904, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5991, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.601, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5919, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5836, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.592, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5917, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5796, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5914, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5897, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5851, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5803, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5905, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.598, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5915, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6016, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.593, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5911, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5933, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.613, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5972, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5806, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5964, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5815, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6144, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6142, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.583, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5924, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.586, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5985, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5974, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5967, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5927, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6018, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5816, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.601, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6021, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5941, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5921, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6102, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5957, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5884, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.596, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5879, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5907, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.586, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5921, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5971, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6035, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6047, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5819, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6072, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 2.629517078399658, |
|
"eval_rouge1": 25.3476, |
|
"eval_rouge2": 12.4149, |
|
"eval_rougeL": 20.9269, |
|
"eval_rougeLsum": 23.9015, |
|
"eval_runtime": 487.904, |
|
"eval_samples_per_second": 27.399, |
|
"eval_steps_per_second": 1.713, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6078, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5932, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5958, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5965, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5863, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5901, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.604, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5981, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5928, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5911, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5957, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5924, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5909, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6015, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5902, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5853, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5808, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6019, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6043, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5967, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6074, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5957, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5929, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6048, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5983, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6049, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5919, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5978, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6035, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5923, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.599, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6021, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6071, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6054, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6068, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6015, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6029, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6054, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6003, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6049, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5992, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.605, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.604, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6024, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6052, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6082, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5998, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5957, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6004, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6019, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5966, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6017, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5934, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6139, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6122, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6056, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6016, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5992, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6011, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5693, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.58, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5761, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5743, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.596, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5819, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5774, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5728, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5731, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5581, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5765, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5834, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5782, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5666, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5876, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5725, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.577, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5717, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5775, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5777, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5907, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5656, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.574, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5883, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5836, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5699, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5808, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5871, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5876, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5783, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5787, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5748, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5892, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.574, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5739, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5854, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5805, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.583, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5755, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5823, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 2.674076557159424, |
|
"eval_rouge1": 25.2523, |
|
"eval_rouge2": 12.374, |
|
"eval_rougeL": 20.8567, |
|
"eval_rougeLsum": 23.8483, |
|
"eval_runtime": 489.0303, |
|
"eval_samples_per_second": 27.336, |
|
"eval_steps_per_second": 1.71, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5809, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.584, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5734, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5707, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5797, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5752, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5887, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5852, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5796, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5758, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5844, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5859, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5693, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5805, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5677, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5764, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.568, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.573, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5856, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5831, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5815, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5834, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5786, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5914, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5717, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5857, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.591, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.59, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5756, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5975, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5751, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5824, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5916, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.575, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5894, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5894, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5857, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5766, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5763, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.576, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5874, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5772, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5825, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5925, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5797, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5805, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5747, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.591, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5858, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5865, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5849, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5829, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5974, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5808, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.582, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5809, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5842, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5924, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5927, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6064, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5891, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5968, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5984, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5988, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5838, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5851, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5889, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5874, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5747, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5752, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5863, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5961, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5942, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5969, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5686, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5889, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5823, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5915, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5911, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5729, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5869, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5831, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5893, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5912, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5927, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5816, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5849, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5918, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5974, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5887, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5906, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5889, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5868, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5709, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5891, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6005, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6005, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 2.6170895099639893, |
|
"eval_rouge1": 25.4354, |
|
"eval_rouge2": 12.4785, |
|
"eval_rougeL": 20.9991, |
|
"eval_rougeLsum": 24.0222, |
|
"eval_runtime": 478.2398, |
|
"eval_samples_per_second": 27.953, |
|
"eval_steps_per_second": 1.748, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5748, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6024, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5859, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5833, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5993, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5915, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6065, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.588, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5919, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.602, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5956, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5902, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5957, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.59, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5979, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5866, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6044, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6081, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5913, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5963, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5866, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5958, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5928, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.598, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5956, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5936, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5993, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.595, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5832, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5852, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6021, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5775, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5884, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5869, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.591, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6016, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5903, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6011, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5817, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5661, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5718, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5666, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5652, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5642, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5686, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5641, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5638, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5696, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5741, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5615, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5784, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5653, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5747, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5537, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5715, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5653, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5816, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5714, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5657, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5628, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5718, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5609, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.567, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5624, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5781, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5673, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5561, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5635, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5719, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5809, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.577, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5801, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5742, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5615, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5636, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5679, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5935, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5807, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.564, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5651, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5757, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5674, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.576, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5661, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5748, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5732, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5791, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5658, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.577, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5809, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5807, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5686, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5679, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5861, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5636, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5711, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5762, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5712, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5757, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_gen_len": 18.999925194494313, |
|
"eval_loss": 2.659139394760132, |
|
"eval_rouge1": 25.3502, |
|
"eval_rouge2": 12.4437, |
|
"eval_rougeL": 20.9208, |
|
"eval_rougeLsum": 23.9305, |
|
"eval_runtime": 475.715, |
|
"eval_samples_per_second": 28.101, |
|
"eval_steps_per_second": 1.757, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5747, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5769, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5794, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.581, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5785, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5871, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5682, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5752, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5789, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5722, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5862, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5649, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5761, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5655, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5642, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.573, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5891, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.579, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5825, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5834, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5807, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5752, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5666, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5886, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5913, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5877, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5842, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5786, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5848, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5735, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5793, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5799, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5792, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5679, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.579, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5686, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5718, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5702, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5801, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5804, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.568, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5687, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5794, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5774, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5788, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.596, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5777, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5811, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.584, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5806, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5709, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5723, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5644, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.579, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.574, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5685, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5842, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.585, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5808, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5851, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5732, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5807, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5777, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5786, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5784, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5821, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5891, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5765, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.58, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5853, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5872, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5754, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5762, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5862, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5955, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5846, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5797, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5913, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5758, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.586, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5766, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5887, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5903, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5825, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5884, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5784, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5985, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6003, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5723, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5924, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5921, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5882, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5845, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5883, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5887, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5837, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5772, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5861, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 2.6559603214263916, |
|
"eval_rouge1": 25.3295, |
|
"eval_rouge2": 12.3962, |
|
"eval_rougeL": 20.8838, |
|
"eval_rougeLsum": 23.8838, |
|
"eval_runtime": 478.6986, |
|
"eval_samples_per_second": 27.926, |
|
"eval_steps_per_second": 1.746, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5741, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5985, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5858, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5875, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5933, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5907, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5862, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.581, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5837, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5949, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5737, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5864, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5771, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5926, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.572, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5779, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5775, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5602, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5671, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5618, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5457, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5569, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5486, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.563, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.562, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5526, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5637, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5603, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5511, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5513, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.563, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5532, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5475, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5603, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5531, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5607, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5731, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.575, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5583, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5499, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5669, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5584, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5679, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5709, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5598, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5663, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5663, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5692, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5686, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5649, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5628, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5688, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5705, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.549, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5568, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5649, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5766, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5709, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5595, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5636, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5823, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5677, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5667, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5578, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5755, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.564, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5604, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5689, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5689, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5676, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5589, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5697, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5785, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5593, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5769, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5705, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5497, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5697, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5683, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5571, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5651, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5605, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5736, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5736, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5658, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5583, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5612, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5549, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5716, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5598, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5757, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5601, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5804, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.572, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.588, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5641, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5719, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5689, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5786, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5615, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 2.6867384910583496, |
|
"eval_rouge1": 25.3508, |
|
"eval_rouge2": 12.4212, |
|
"eval_rougeL": 20.9341, |
|
"eval_rougeLsum": 23.949, |
|
"eval_runtime": 474.7006, |
|
"eval_samples_per_second": 28.161, |
|
"eval_steps_per_second": 1.761, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5773, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5652, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.575, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5599, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5621, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5787, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.573, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5698, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5748, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.577, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5789, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5693, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.567, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5708, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.566, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5643, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5774, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5571, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5739, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5811, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5809, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5672, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5883, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5576, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5705, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5699, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5786, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5674, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5733, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5706, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5635, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5721, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5665, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5839, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5831, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5699, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5777, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5809, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5846, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5711, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5668, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5747, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5752, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5707, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5777, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5725, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5749, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5892, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5735, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5699, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5702, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.576, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5835, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5764, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5746, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5689, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5669, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5775, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5978, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5812, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5726, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5865, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5809, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5846, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5694, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5694, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.581, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5801, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.596, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5683, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5771, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5772, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5719, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5911, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5747, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5804, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5736, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5621, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5716, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.582, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5743, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5756, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5729, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5546, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5688, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5702, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5811, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.565, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5643, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5915, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5978, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5888, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5706, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5537, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5571, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5387, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 2.7078752517700195, |
|
"eval_rouge1": 25.2864, |
|
"eval_rouge2": 12.3885, |
|
"eval_rougeL": 20.9095, |
|
"eval_rougeLsum": 23.8569, |
|
"eval_runtime": 479.4466, |
|
"eval_samples_per_second": 27.882, |
|
"eval_steps_per_second": 1.744, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5436, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.55, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5454, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5504, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5469, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5441, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5517, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5517, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5419, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5447, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5425, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5549, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5595, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5442, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.552, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5665, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5503, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5589, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5792, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5517, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5693, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5496, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5557, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5545, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5533, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.544, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5582, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5517, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5585, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5561, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5446, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5587, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5644, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5549, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5679, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5663, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5499, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.561, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5538, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5563, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5515, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5626, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5562, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5474, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5698, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5642, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5564, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5562, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5623, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5565, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.565, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5618, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5586, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5572, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5605, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.563, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5688, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5562, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5529, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5564, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5708, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5667, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.552, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5715, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5697, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5552, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5791, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5527, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5761, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5725, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5731, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5688, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.555, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.564, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5729, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5668, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5735, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.566, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5525, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5592, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5664, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5622, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5678, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5802, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5661, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5603, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5653, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5568, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5695, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5688, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5689, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5637, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5671, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5667, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5797, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5687, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5743, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.575, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5626, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5662, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 2.673166275024414, |
|
"eval_rouge1": 25.3971, |
|
"eval_rouge2": 12.406, |
|
"eval_rougeL": 20.9243, |
|
"eval_rougeLsum": 23.9681, |
|
"eval_runtime": 474.5023, |
|
"eval_samples_per_second": 28.173, |
|
"eval_steps_per_second": 1.762, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5734, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.562, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5777, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5653, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.562, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5713, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5686, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5673, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5635, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5625, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5609, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5761, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5744, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5672, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5728, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5725, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5741, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5661, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5594, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5622, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5683, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5642, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5788, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.564, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5563, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5587, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5744, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5776, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5711, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5646, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.569, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5704, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5439, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5685, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5519, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5806, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5666, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5706, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5681, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5766, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.584, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5765, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5586, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5694, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5556, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5757, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5501, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5656, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5775, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5772, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5664, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5648, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5565, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5608, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5614, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5664, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5822, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5655, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5814, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.555, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5776, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5832, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5637, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.563, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5648, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5727, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5745, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5797, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5714, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5603, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5671, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.56, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5676, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5907, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5651, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5675, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5716, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5342, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5334, |
|
"step": 107900 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5439, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5571, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5569, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5569, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5481, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5507, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5438, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5498, |
|
"step": 108700 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.55, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5625, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.547, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5498, |
|
"step": 109100 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5467, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5346, |
|
"step": 109300 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5439, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5395, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.543, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5494, |
|
"step": 109700 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5471, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5491, |
|
"step": 109900 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5515, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_gen_len": 18.999925194494313, |
|
"eval_loss": 2.722730875015259, |
|
"eval_rouge1": 25.2536, |
|
"eval_rouge2": 12.3269, |
|
"eval_rougeL": 20.8448, |
|
"eval_rougeLsum": 23.8308, |
|
"eval_runtime": 477.19, |
|
"eval_samples_per_second": 28.014, |
|
"eval_steps_per_second": 1.752, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5518, |
|
"step": 110100 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 110300 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5357, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5489, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5659, |
|
"step": 110700 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5502, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5564, |
|
"step": 110900 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5411, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.542, |
|
"step": 111100 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5582, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.535, |
|
"step": 111300 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5458, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5683, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5659, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5536, |
|
"step": 111700 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5594, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5633, |
|
"step": 111900 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.561, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5485, |
|
"step": 112100 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5567, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5568, |
|
"step": 112300 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5407, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5433, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5583, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5516, |
|
"step": 112700 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5493, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5513, |
|
"step": 112900 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5622, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5431, |
|
"step": 113100 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5497, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5398, |
|
"step": 113300 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5613, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5565, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5485, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.554, |
|
"step": 113700 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5594, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5483, |
|
"step": 113900 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5468, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5549, |
|
"step": 114100 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5605, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5541, |
|
"step": 114300 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5553, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5566, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5599, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5422, |
|
"step": 114700 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5648, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5511, |
|
"step": 114900 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.553, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5412, |
|
"step": 115100 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5762, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5528, |
|
"step": 115300 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5404, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5462, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5516, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5538, |
|
"step": 115700 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5509, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5516, |
|
"step": 115900 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5522, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.556, |
|
"step": 116100 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5657, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5643, |
|
"step": 116300 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5467, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5562, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5572, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5423, |
|
"step": 116700 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5541, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5601, |
|
"step": 116900 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5675, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5427, |
|
"step": 117100 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.554, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5649, |
|
"step": 117300 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.556, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5549, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5614, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5721, |
|
"step": 117700 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5596, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5656, |
|
"step": 117900 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5459, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.552, |
|
"step": 118100 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5642, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5729, |
|
"step": 118300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5577, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5576, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5848, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5571, |
|
"step": 118700 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5502, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5676, |
|
"step": 118900 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5556, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5538, |
|
"step": 119100 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5527, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5675, |
|
"step": 119300 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.543, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5614, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5577, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5507, |
|
"step": 119700 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.562, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.554, |
|
"step": 119900 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5651, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 2.6814467906951904, |
|
"eval_rouge1": 25.3277, |
|
"eval_rouge2": 12.4166, |
|
"eval_rougeL": 20.9334, |
|
"eval_rougeLsum": 23.9166, |
|
"eval_runtime": 477.4979, |
|
"eval_samples_per_second": 27.996, |
|
"eval_steps_per_second": 1.751, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5714, |
|
"step": 120100 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5739, |
|
"step": 120200 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.552, |
|
"step": 120300 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5548, |
|
"step": 120400 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5658, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5729, |
|
"step": 120600 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5614, |
|
"step": 120700 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5745, |
|
"step": 120800 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5595, |
|
"step": 120900 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5583, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.553, |
|
"step": 121100 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5788, |
|
"step": 121200 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5535, |
|
"step": 121300 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5666, |
|
"step": 121400 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5616, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5619, |
|
"step": 121600 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.559, |
|
"step": 121700 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5806, |
|
"step": 121800 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5696, |
|
"step": 121900 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5667, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5452, |
|
"step": 122100 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5609, |
|
"step": 122200 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5703, |
|
"step": 122300 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5568, |
|
"step": 122400 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.565, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5556, |
|
"step": 122600 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5577, |
|
"step": 122700 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5625, |
|
"step": 122800 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5589, |
|
"step": 122900 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5728, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5534, |
|
"step": 123100 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5592, |
|
"step": 123200 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5646, |
|
"step": 123300 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5774, |
|
"step": 123400 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5629, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5676, |
|
"step": 123600 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5734, |
|
"step": 123700 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5566, |
|
"step": 123800 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5603, |
|
"step": 123900 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5758, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.57, |
|
"step": 124100 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5823, |
|
"step": 124200 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5699, |
|
"step": 124300 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5487, |
|
"step": 124400 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.561, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.585, |
|
"step": 124600 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5693, |
|
"step": 124700 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5728, |
|
"step": 124800 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5794, |
|
"step": 124900 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5483, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.56, |
|
"step": 125100 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5635, |
|
"step": 125200 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5735, |
|
"step": 125300 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5811, |
|
"step": 125400 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5757, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5567, |
|
"step": 125600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5306, |
|
"step": 125700 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5365, |
|
"step": 125800 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5386, |
|
"step": 125900 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5402, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5341, |
|
"step": 126100 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5384, |
|
"step": 126200 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5285, |
|
"step": 126300 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.553, |
|
"step": 126400 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5334, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5277, |
|
"step": 126600 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.56, |
|
"step": 126700 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5326, |
|
"step": 126800 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5438, |
|
"step": 126900 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.551, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5461, |
|
"step": 127100 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5438, |
|
"step": 127200 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5459, |
|
"step": 127300 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5509, |
|
"step": 127400 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5329, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5525, |
|
"step": 127600 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5337, |
|
"step": 127700 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.541, |
|
"step": 127800 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5577, |
|
"step": 127900 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.546, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5478, |
|
"step": 128100 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 128200 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5302, |
|
"step": 128300 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5545, |
|
"step": 128400 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.532, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5468, |
|
"step": 128600 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 128700 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.542, |
|
"step": 128800 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5319, |
|
"step": 128900 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5431, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5413, |
|
"step": 129100 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5368, |
|
"step": 129200 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5458, |
|
"step": 129300 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5405, |
|
"step": 129400 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5543, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5318, |
|
"step": 129600 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5489, |
|
"step": 129700 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5346, |
|
"step": 129800 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5403, |
|
"step": 129900 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5442, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 2.7132627964019775, |
|
"eval_rouge1": 25.3009, |
|
"eval_rouge2": 12.3948, |
|
"eval_rougeL": 20.9085, |
|
"eval_rougeLsum": 23.8773, |
|
"eval_runtime": 477.2595, |
|
"eval_samples_per_second": 28.01, |
|
"eval_steps_per_second": 1.752, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5588, |
|
"step": 130100 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5397, |
|
"step": 130200 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5392, |
|
"step": 130300 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.538, |
|
"step": 130400 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5464, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5475, |
|
"step": 130600 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.547, |
|
"step": 130700 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5494, |
|
"step": 130800 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5357, |
|
"step": 130900 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5518, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.546, |
|
"step": 131100 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5416, |
|
"step": 131200 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5489, |
|
"step": 131300 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5483, |
|
"step": 131400 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5557, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5605, |
|
"step": 131600 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5532, |
|
"step": 131700 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5482, |
|
"step": 131800 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5482, |
|
"step": 131900 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5458, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5499, |
|
"step": 132100 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5537, |
|
"step": 132200 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5375, |
|
"step": 132300 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5515, |
|
"step": 132400 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5499, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.535, |
|
"step": 132600 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5537, |
|
"step": 132700 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5498, |
|
"step": 132800 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5648, |
|
"step": 132900 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5372, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5622, |
|
"step": 133100 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5463, |
|
"step": 133200 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5639, |
|
"step": 133300 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5437, |
|
"step": 133400 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.542, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5435, |
|
"step": 133600 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5531, |
|
"step": 133700 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5522, |
|
"step": 133800 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5569, |
|
"step": 133900 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5489, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5459, |
|
"step": 134100 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5584, |
|
"step": 134200 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5563, |
|
"step": 134300 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5459, |
|
"step": 134400 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5389, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5451, |
|
"step": 134600 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5435, |
|
"step": 134700 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5538, |
|
"step": 134800 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5466, |
|
"step": 134900 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5522, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5564, |
|
"step": 135100 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5531, |
|
"step": 135200 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5478, |
|
"step": 135300 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5715, |
|
"step": 135400 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5547, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5514, |
|
"step": 135600 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 135700 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.547, |
|
"step": 135800 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5566, |
|
"step": 135900 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5594, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5524, |
|
"step": 136100 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5635, |
|
"step": 136200 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5551, |
|
"step": 136300 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.553, |
|
"step": 136400 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5664, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 136600 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5626, |
|
"step": 136700 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5541, |
|
"step": 136800 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5575, |
|
"step": 136900 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5567, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5554, |
|
"step": 137100 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5423, |
|
"step": 137200 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5524, |
|
"step": 137300 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5636, |
|
"step": 137400 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5629, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5536, |
|
"step": 137600 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 137700 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5601, |
|
"step": 137800 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5649, |
|
"step": 137900 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5438, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5511, |
|
"step": 138100 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5453, |
|
"step": 138200 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5592, |
|
"step": 138300 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5518, |
|
"step": 138400 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5637, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5533, |
|
"step": 138600 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5615, |
|
"step": 138700 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.549, |
|
"step": 138800 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5433, |
|
"step": 138900 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5614, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5621, |
|
"step": 139100 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5581, |
|
"step": 139200 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5498, |
|
"step": 139300 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5725, |
|
"step": 139400 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5553, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.556, |
|
"step": 139600 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5517, |
|
"step": 139700 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5554, |
|
"step": 139800 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5647, |
|
"step": 139900 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5574, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_gen_len": 18.999027528426094, |
|
"eval_loss": 2.6958916187286377, |
|
"eval_rouge1": 25.3491, |
|
"eval_rouge2": 12.3925, |
|
"eval_rougeL": 20.911, |
|
"eval_rougeLsum": 23.9331, |
|
"eval_runtime": 475.0144, |
|
"eval_samples_per_second": 28.142, |
|
"eval_steps_per_second": 1.76, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5604, |
|
"step": 140100 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5708, |
|
"step": 140200 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5585, |
|
"step": 140300 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5554, |
|
"step": 140400 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5517, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5564, |
|
"step": 140600 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5588, |
|
"step": 140700 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 140800 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.55, |
|
"step": 140900 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5656, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5566, |
|
"step": 141100 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5513, |
|
"step": 141200 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5572, |
|
"step": 141300 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5448, |
|
"step": 141400 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5676, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5592, |
|
"step": 141600 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5619, |
|
"step": 141700 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5511, |
|
"step": 141800 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.557, |
|
"step": 141900 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5537, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5544, |
|
"step": 142100 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5628, |
|
"step": 142200 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5547, |
|
"step": 142300 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5657, |
|
"step": 142400 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5575, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5755, |
|
"step": 142600 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5491, |
|
"step": 142700 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5622, |
|
"step": 142800 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5463, |
|
"step": 142900 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5626, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5753, |
|
"step": 143100 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5586, |
|
"step": 143200 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5518, |
|
"step": 143300 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5666, |
|
"step": 143400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5732, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5422, |
|
"step": 143600 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5353, |
|
"step": 143700 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5337, |
|
"step": 143800 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5239, |
|
"step": 143900 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.526, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5331, |
|
"step": 144100 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.537, |
|
"step": 144200 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5256, |
|
"step": 144300 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5276, |
|
"step": 144400 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5404, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.53, |
|
"step": 144600 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5364, |
|
"step": 144700 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5441, |
|
"step": 144800 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5423, |
|
"step": 144900 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5399, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5367, |
|
"step": 145100 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5313, |
|
"step": 145200 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5339, |
|
"step": 145300 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5247, |
|
"step": 145400 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.545, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.53, |
|
"step": 145600 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5461, |
|
"step": 145700 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5278, |
|
"step": 145800 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5479, |
|
"step": 145900 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5442, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5381, |
|
"step": 146100 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5421, |
|
"step": 146200 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5307, |
|
"step": 146300 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5392, |
|
"step": 146400 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5243, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5124, |
|
"step": 146600 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5479, |
|
"step": 146700 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5614, |
|
"step": 146800 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 146900 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5495, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5425, |
|
"step": 147100 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.541, |
|
"step": 147200 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 147300 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.542, |
|
"step": 147400 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5384, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5356, |
|
"step": 147600 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5508, |
|
"step": 147700 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.528, |
|
"step": 147800 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5416, |
|
"step": 147900 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5389, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5429, |
|
"step": 148100 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5313, |
|
"step": 148200 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5464, |
|
"step": 148300 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5389, |
|
"step": 148400 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.541, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5405, |
|
"step": 148600 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5413, |
|
"step": 148700 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5379, |
|
"step": 148800 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5388, |
|
"step": 148900 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5401, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5521, |
|
"step": 149100 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 149200 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5296, |
|
"step": 149300 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5261, |
|
"step": 149400 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5475, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5374, |
|
"step": 149600 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5505, |
|
"step": 149700 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5434, |
|
"step": 149800 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5448, |
|
"step": 149900 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5518, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_gen_len": 18.999251944943147, |
|
"eval_loss": 2.71974515914917, |
|
"eval_rouge1": 25.2364, |
|
"eval_rouge2": 12.3372, |
|
"eval_rougeL": 20.8569, |
|
"eval_rougeLsum": 23.8285, |
|
"eval_runtime": 474.6128, |
|
"eval_samples_per_second": 28.166, |
|
"eval_steps_per_second": 1.761, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5358, |
|
"step": 150100 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5428, |
|
"step": 150200 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5452, |
|
"step": 150300 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5507, |
|
"step": 150400 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5443, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.533, |
|
"step": 150600 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5419, |
|
"step": 150700 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5339, |
|
"step": 150800 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5349, |
|
"step": 150900 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5347, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5357, |
|
"step": 151100 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5466, |
|
"step": 151200 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5467, |
|
"step": 151300 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5596, |
|
"step": 151400 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.552, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5514, |
|
"step": 151600 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 151700 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5414, |
|
"step": 151800 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 151900 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.548, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5334, |
|
"step": 152100 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5516, |
|
"step": 152200 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5587, |
|
"step": 152300 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5423, |
|
"step": 152400 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5519, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5395, |
|
"step": 152600 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5368, |
|
"step": 152700 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.554, |
|
"step": 152800 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5437, |
|
"step": 152900 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5485, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5463, |
|
"step": 153100 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5429, |
|
"step": 153200 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5436, |
|
"step": 153300 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5445, |
|
"step": 153400 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5458, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5417, |
|
"step": 153600 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5412, |
|
"step": 153700 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.54, |
|
"step": 153800 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5561, |
|
"step": 153900 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5431, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5445, |
|
"step": 154100 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5549, |
|
"step": 154200 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5478, |
|
"step": 154300 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5493, |
|
"step": 154400 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5445, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.55, |
|
"step": 154600 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5478, |
|
"step": 154700 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5339, |
|
"step": 154800 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5501, |
|
"step": 154900 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5357, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5444, |
|
"step": 155100 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5563, |
|
"step": 155200 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5588, |
|
"step": 155300 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5416, |
|
"step": 155400 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5581, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5446, |
|
"step": 155600 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5425, |
|
"step": 155700 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.551, |
|
"step": 155800 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5342, |
|
"step": 155900 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5547, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.555, |
|
"step": 156100 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5554, |
|
"step": 156200 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5408, |
|
"step": 156300 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5596, |
|
"step": 156400 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.555, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.555, |
|
"step": 156600 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.54, |
|
"step": 156700 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5525, |
|
"step": 156800 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.548, |
|
"step": 156900 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5516, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5529, |
|
"step": 157100 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5571, |
|
"step": 157200 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5417, |
|
"step": 157300 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5547, |
|
"step": 157400 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5384, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5475, |
|
"step": 157600 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5433, |
|
"step": 157700 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5399, |
|
"step": 157800 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5513, |
|
"step": 157900 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5509, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5535, |
|
"step": 158100 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5473, |
|
"step": 158200 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5421, |
|
"step": 158300 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5507, |
|
"step": 158400 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5534, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5578, |
|
"step": 158600 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5521, |
|
"step": 158700 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5543, |
|
"step": 158800 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5527, |
|
"step": 158900 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.564, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5587, |
|
"step": 159100 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5576, |
|
"step": 159200 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5505, |
|
"step": 159300 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5528, |
|
"step": 159400 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5384, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5531, |
|
"step": 159600 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5595, |
|
"step": 159700 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5588, |
|
"step": 159800 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5555, |
|
"step": 159900 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5573, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 2.7115721702575684, |
|
"eval_rouge1": 25.334, |
|
"eval_rouge2": 12.363, |
|
"eval_rougeL": 20.8863, |
|
"eval_rougeLsum": 23.93, |
|
"eval_runtime": 474.6063, |
|
"eval_samples_per_second": 28.167, |
|
"eval_steps_per_second": 1.761, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5481, |
|
"step": 160100 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5629, |
|
"step": 160200 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5519, |
|
"step": 160300 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5581, |
|
"step": 160400 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5452, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.55, |
|
"step": 160600 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5602, |
|
"step": 160700 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 160800 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5509, |
|
"step": 160900 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5535, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5718, |
|
"step": 161100 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5674, |
|
"step": 161200 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.547, |
|
"step": 161300 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5581, |
|
"step": 161400 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5519, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 161600 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5365, |
|
"step": 161700 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5084, |
|
"step": 161800 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5278, |
|
"step": 161900 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5293, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5331, |
|
"step": 162100 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5307, |
|
"step": 162200 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5459, |
|
"step": 162300 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5259, |
|
"step": 162400 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.532, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5166, |
|
"step": 162600 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5336, |
|
"step": 162700 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5253, |
|
"step": 162800 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5324, |
|
"step": 162900 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5249, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5111, |
|
"step": 163100 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5426, |
|
"step": 163200 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.531, |
|
"step": 163300 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5279, |
|
"step": 163400 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5135, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5335, |
|
"step": 163600 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5267, |
|
"step": 163700 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5336, |
|
"step": 163800 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5383, |
|
"step": 163900 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5256, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5333, |
|
"step": 164100 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5335, |
|
"step": 164200 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5282, |
|
"step": 164300 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.535, |
|
"step": 164400 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5298, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5353, |
|
"step": 164600 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5209, |
|
"step": 164700 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5476, |
|
"step": 164800 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5329, |
|
"step": 164900 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5145, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5455, |
|
"step": 165100 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5414, |
|
"step": 165200 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5301, |
|
"step": 165300 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5325, |
|
"step": 165400 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5183, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5309, |
|
"step": 165600 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5325, |
|
"step": 165700 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5233, |
|
"step": 165800 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5404, |
|
"step": 165900 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5505, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5324, |
|
"step": 166100 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5407, |
|
"step": 166200 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.525, |
|
"step": 166300 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5371, |
|
"step": 166400 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5302, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5252, |
|
"step": 166600 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5418, |
|
"step": 166700 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5428, |
|
"step": 166800 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5319, |
|
"step": 166900 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5379, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5344, |
|
"step": 167100 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5416, |
|
"step": 167200 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5404, |
|
"step": 167300 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5353, |
|
"step": 167400 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5403, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5418, |
|
"step": 167600 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.535, |
|
"step": 167700 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5351, |
|
"step": 167800 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5278, |
|
"step": 167900 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5318, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5521, |
|
"step": 168100 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5358, |
|
"step": 168200 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5364, |
|
"step": 168300 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5399, |
|
"step": 168400 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.558, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5534, |
|
"step": 168600 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.526, |
|
"step": 168700 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5424, |
|
"step": 168800 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5253, |
|
"step": 168900 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5365, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5369, |
|
"step": 169100 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5361, |
|
"step": 169200 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5415, |
|
"step": 169300 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5368, |
|
"step": 169400 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5502, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5378, |
|
"step": 169600 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.524, |
|
"step": 169700 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5337, |
|
"step": 169800 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5375, |
|
"step": 169900 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5336, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 2.7171103954315186, |
|
"eval_rouge1": 25.2508, |
|
"eval_rouge2": 12.3825, |
|
"eval_rougeL": 20.8692, |
|
"eval_rougeLsum": 23.8414, |
|
"eval_runtime": 472.9738, |
|
"eval_samples_per_second": 28.264, |
|
"eval_steps_per_second": 1.768, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.547, |
|
"step": 170100 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5296, |
|
"step": 170200 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5416, |
|
"step": 170300 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5479, |
|
"step": 170400 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5469, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.534, |
|
"step": 170600 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5279, |
|
"step": 170700 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5364, |
|
"step": 170800 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5243, |
|
"step": 170900 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.52, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5427, |
|
"step": 171100 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5384, |
|
"step": 171200 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5301, |
|
"step": 171300 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5386, |
|
"step": 171400 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5555, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5462, |
|
"step": 171600 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 171700 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5438, |
|
"step": 171800 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5386, |
|
"step": 171900 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5469, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5511, |
|
"step": 172100 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5418, |
|
"step": 172200 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5376, |
|
"step": 172300 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5583, |
|
"step": 172400 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5459, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5393, |
|
"step": 172600 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5434, |
|
"step": 172700 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5472, |
|
"step": 172800 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5535, |
|
"step": 172900 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5424, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5458, |
|
"step": 173100 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5552, |
|
"step": 173200 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5353, |
|
"step": 173300 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 173400 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5329, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.544, |
|
"step": 173600 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5365, |
|
"step": 173700 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 173800 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5403, |
|
"step": 173900 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5453, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5467, |
|
"step": 174100 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5368, |
|
"step": 174200 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5376, |
|
"step": 174300 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5534, |
|
"step": 174400 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5407, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5508, |
|
"step": 174600 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5367, |
|
"step": 174700 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 174800 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5397, |
|
"step": 174900 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5403, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5395, |
|
"step": 175100 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5429, |
|
"step": 175200 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.554, |
|
"step": 175300 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5445, |
|
"step": 175400 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5426, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5454, |
|
"step": 175600 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5651, |
|
"step": 175700 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5494, |
|
"step": 175800 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5408, |
|
"step": 175900 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5528, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5511, |
|
"step": 176100 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5406, |
|
"step": 176200 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5588, |
|
"step": 176300 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.56, |
|
"step": 176400 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5472, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5445, |
|
"step": 176600 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5527, |
|
"step": 176700 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5385, |
|
"step": 176800 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5327, |
|
"step": 176900 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.55, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5555, |
|
"step": 177100 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5413, |
|
"step": 177200 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5533, |
|
"step": 177300 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5504, |
|
"step": 177400 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.559, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5433, |
|
"step": 177600 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5495, |
|
"step": 177700 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5308, |
|
"step": 177800 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5479, |
|
"step": 177900 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5562, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5418, |
|
"step": 178100 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5496, |
|
"step": 178200 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5532, |
|
"step": 178300 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.54, |
|
"step": 178400 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.549, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5341, |
|
"step": 178600 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5487, |
|
"step": 178700 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5403, |
|
"step": 178800 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5369, |
|
"step": 178900 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5506, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5423, |
|
"step": 179100 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5442, |
|
"step": 179200 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5455, |
|
"step": 179300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.565, |
|
"step": 179400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5376, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5155, |
|
"step": 179600 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5199, |
|
"step": 179700 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5276, |
|
"step": 179800 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5285, |
|
"step": 179900 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.513, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 2.771777391433716, |
|
"eval_rouge1": 25.3651, |
|
"eval_rouge2": 12.4707, |
|
"eval_rougeL": 20.9642, |
|
"eval_rougeLsum": 23.9479, |
|
"eval_runtime": 473.669, |
|
"eval_samples_per_second": 28.222, |
|
"eval_steps_per_second": 1.765, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.531, |
|
"step": 180100 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5042, |
|
"step": 180200 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5018, |
|
"step": 180300 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5239, |
|
"step": 180400 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5154, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5311, |
|
"step": 180600 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5206, |
|
"step": 180700 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5234, |
|
"step": 180800 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5209, |
|
"step": 180900 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.533, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5323, |
|
"step": 181100 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5174, |
|
"step": 181200 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5279, |
|
"step": 181300 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5141, |
|
"step": 181400 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.533, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5359, |
|
"step": 181600 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5312, |
|
"step": 181700 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.51, |
|
"step": 181800 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5172, |
|
"step": 181900 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5128, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5275, |
|
"step": 182100 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5317, |
|
"step": 182200 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5378, |
|
"step": 182300 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5298, |
|
"step": 182400 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5181, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5086, |
|
"step": 182600 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5257, |
|
"step": 182700 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5237, |
|
"step": 182800 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5226, |
|
"step": 182900 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.526, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5257, |
|
"step": 183100 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5188, |
|
"step": 183200 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5265, |
|
"step": 183300 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5292, |
|
"step": 183400 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5394, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.533, |
|
"step": 183600 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5327, |
|
"step": 183700 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5291, |
|
"step": 183800 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5398, |
|
"step": 183900 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5236, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5248, |
|
"step": 184100 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5148, |
|
"step": 184200 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5402, |
|
"step": 184300 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5241, |
|
"step": 184400 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5356, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5171, |
|
"step": 184600 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5339, |
|
"step": 184700 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5228, |
|
"step": 184800 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5405, |
|
"step": 184900 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5441, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5335, |
|
"step": 185100 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5289, |
|
"step": 185200 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5374, |
|
"step": 185300 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5185, |
|
"step": 185400 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5413, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5399, |
|
"step": 185600 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.521, |
|
"step": 185700 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5314, |
|
"step": 185800 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5355, |
|
"step": 185900 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.537, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5304, |
|
"step": 186100 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.532, |
|
"step": 186200 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5198, |
|
"step": 186300 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5347, |
|
"step": 186400 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5288, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5347, |
|
"step": 186600 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5324, |
|
"step": 186700 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5292, |
|
"step": 186800 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5339, |
|
"step": 186900 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5327, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5309, |
|
"step": 187100 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5216, |
|
"step": 187200 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5325, |
|
"step": 187300 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5251, |
|
"step": 187400 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5321, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5425, |
|
"step": 187600 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5305, |
|
"step": 187700 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5306, |
|
"step": 187800 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5345, |
|
"step": 187900 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5374, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5142, |
|
"step": 188100 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5354, |
|
"step": 188200 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5265, |
|
"step": 188300 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5264, |
|
"step": 188400 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5377, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5393, |
|
"step": 188600 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.532, |
|
"step": 188700 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5439, |
|
"step": 188800 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5392, |
|
"step": 188900 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5407, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5352, |
|
"step": 189100 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5384, |
|
"step": 189200 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5453, |
|
"step": 189300 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5289, |
|
"step": 189400 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5363, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5311, |
|
"step": 189600 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5483, |
|
"step": 189700 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5446, |
|
"step": 189800 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5417, |
|
"step": 189900 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5348, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 2.70188045501709, |
|
"eval_rouge1": 25.274, |
|
"eval_rouge2": 12.3837, |
|
"eval_rougeL": 20.8714, |
|
"eval_rougeLsum": 23.8766, |
|
"eval_runtime": 474.7941, |
|
"eval_samples_per_second": 28.155, |
|
"eval_steps_per_second": 1.761, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 0.0002, |
|
"loss": 0.549, |
|
"step": 190100 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5398, |
|
"step": 190200 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5492, |
|
"step": 190300 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5443, |
|
"step": 190400 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5407, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5348, |
|
"step": 190600 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5335, |
|
"step": 190700 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5479, |
|
"step": 190800 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5297, |
|
"step": 190900 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5317, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5418, |
|
"step": 191100 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5397, |
|
"step": 191200 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5381, |
|
"step": 191300 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5266, |
|
"step": 191400 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5321, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5549, |
|
"step": 191600 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5473, |
|
"step": 191700 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5292, |
|
"step": 191800 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5326, |
|
"step": 191900 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5344, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5493, |
|
"step": 192100 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5407, |
|
"step": 192200 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5373, |
|
"step": 192300 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5357, |
|
"step": 192400 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5288, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5439, |
|
"step": 192600 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5521, |
|
"step": 192700 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.551, |
|
"step": 192800 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5298, |
|
"step": 192900 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5484, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5351, |
|
"step": 193100 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5399, |
|
"step": 193200 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5424, |
|
"step": 193300 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5401, |
|
"step": 193400 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5417, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5265, |
|
"step": 193600 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5315, |
|
"step": 193700 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5348, |
|
"step": 193800 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5475, |
|
"step": 193900 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5381, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5387, |
|
"step": 194100 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5442, |
|
"step": 194200 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5332, |
|
"step": 194300 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5377, |
|
"step": 194400 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5426, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5512, |
|
"step": 194600 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5364, |
|
"step": 194700 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5418, |
|
"step": 194800 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5566, |
|
"step": 194900 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5395, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5356, |
|
"step": 195100 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5351, |
|
"step": 195200 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5411, |
|
"step": 195300 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5446, |
|
"step": 195400 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5496, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5284, |
|
"step": 195600 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5406, |
|
"step": 195700 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5461, |
|
"step": 195800 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5376, |
|
"step": 195900 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5552, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5493, |
|
"step": 196100 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5328, |
|
"step": 196200 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5427, |
|
"step": 196300 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5296, |
|
"step": 196400 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5362, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5336, |
|
"step": 196600 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5329, |
|
"step": 196700 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5411, |
|
"step": 196800 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5385, |
|
"step": 196900 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5469, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5579, |
|
"step": 197100 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.548, |
|
"step": 197200 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5447, |
|
"step": 197300 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5431, |
|
"step": 197400 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5184, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5116, |
|
"step": 197600 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5287, |
|
"step": 197700 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5241, |
|
"step": 197800 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5229, |
|
"step": 197900 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5249, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5093, |
|
"step": 198100 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5304, |
|
"step": 198200 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5147, |
|
"step": 198300 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5274, |
|
"step": 198400 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5266, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.507, |
|
"step": 198600 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5189, |
|
"step": 198700 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5388, |
|
"step": 198800 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5069, |
|
"step": 198900 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5212, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5088, |
|
"step": 199100 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5189, |
|
"step": 199200 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5218, |
|
"step": 199300 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5197, |
|
"step": 199400 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5246, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5255, |
|
"step": 199600 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5164, |
|
"step": 199700 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5201, |
|
"step": 199800 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5277, |
|
"step": 199900 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5277, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 2.7242300510406494, |
|
"eval_rouge1": 25.376, |
|
"eval_rouge2": 12.4944, |
|
"eval_rougeL": 20.9774, |
|
"eval_rougeLsum": 23.985, |
|
"eval_runtime": 476.9451, |
|
"eval_samples_per_second": 28.028, |
|
"eval_steps_per_second": 1.753, |
|
"step": 200000 |
|
} |
|
], |
|
"max_steps": 897250, |
|
"num_train_epochs": 50, |
|
"total_flos": 8.661379023313306e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|