{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.145165784341042, "global_step": 200000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0002, "loss": 0.5805, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 0.5732, "step": 200 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 0.5959, "step": 300 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 0.6017, "step": 400 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 0.6004, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 0.5835, "step": 600 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 0.5751, "step": 700 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 0.611, "step": 800 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 0.6164, "step": 900 }, { "epoch": 0.06, "learning_rate": 0.0002, "loss": 0.5925, "step": 1000 }, { "epoch": 0.06, "learning_rate": 0.0002, "loss": 0.6045, "step": 1100 }, { "epoch": 0.07, "learning_rate": 0.0002, "loss": 0.6015, "step": 1200 }, { "epoch": 0.07, "learning_rate": 0.0002, "loss": 0.594, "step": 1300 }, { "epoch": 0.08, "learning_rate": 0.0002, "loss": 0.594, "step": 1400 }, { "epoch": 0.08, "learning_rate": 0.0002, "loss": 0.5996, "step": 1500 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 0.6044, "step": 1600 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 0.6066, "step": 1700 }, { "epoch": 0.1, "learning_rate": 0.0002, "loss": 0.5974, "step": 1800 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 0.5914, "step": 1900 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 0.5953, "step": 2000 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 0.5899, "step": 2100 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 0.5944, "step": 2200 }, { "epoch": 0.13, "learning_rate": 0.0002, "loss": 0.5942, "step": 2300 }, { "epoch": 0.13, "learning_rate": 0.0002, "loss": 0.5967, "step": 2400 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 0.583, "step": 2500 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 0.6051, "step": 2600 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 0.5985, "step": 2700 }, { "epoch": 0.16, "learning_rate": 0.0002, "loss": 0.5988, "step": 2800 }, { "epoch": 0.16, "learning_rate": 0.0002, "loss": 0.5909, "step": 2900 }, { "epoch": 0.17, "learning_rate": 0.0002, "loss": 0.5862, "step": 3000 }, { "epoch": 0.17, "learning_rate": 0.0002, "loss": 0.5955, "step": 3100 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 0.6082, "step": 3200 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 0.5945, "step": 3300 }, { "epoch": 0.19, "learning_rate": 0.0002, "loss": 0.5902, "step": 3400 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 0.5972, "step": 3500 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 0.6033, "step": 3600 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 0.5833, "step": 3700 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 0.6024, "step": 3800 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 0.5846, "step": 3900 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 0.608, "step": 4000 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 0.6056, "step": 4100 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 0.5992, "step": 4200 }, { "epoch": 0.24, "learning_rate": 0.0002, "loss": 0.5884, "step": 4300 }, { "epoch": 0.25, "learning_rate": 0.0002, "loss": 0.611, "step": 4400 }, { "epoch": 0.25, "learning_rate": 0.0002, "loss": 0.6021, "step": 4500 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 0.6078, "step": 4600 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 0.6079, "step": 4700 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 0.5802, "step": 4800 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 0.5957, "step": 4900 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 0.5927, "step": 5000 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 0.601, "step": 5100 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 0.6067, "step": 5200 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 0.6032, "step": 5300 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 0.5967, "step": 5400 }, { "epoch": 0.31, "learning_rate": 0.0002, "loss": 0.6086, "step": 5500 }, { "epoch": 0.31, "learning_rate": 0.0002, "loss": 0.6088, "step": 5600 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 0.6004, "step": 5700 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 0.6081, "step": 5800 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 0.5881, "step": 5900 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 0.6046, "step": 6000 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 0.5831, "step": 6100 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 0.5986, "step": 6200 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 0.5978, "step": 6300 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 0.6047, "step": 6400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 0.6051, "step": 6500 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 0.5896, "step": 6600 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 0.5985, "step": 6700 }, { "epoch": 0.38, "learning_rate": 0.0002, "loss": 0.6038, "step": 6800 }, { "epoch": 0.38, "learning_rate": 0.0002, "loss": 0.6022, "step": 6900 }, { "epoch": 0.39, "learning_rate": 0.0002, "loss": 0.6061, "step": 7000 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.5962, "step": 7100 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.6063, "step": 7200 }, { "epoch": 0.41, "learning_rate": 0.0002, "loss": 0.5968, "step": 7300 }, { "epoch": 0.41, "learning_rate": 0.0002, "loss": 0.6073, "step": 7400 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 0.5951, "step": 7500 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 0.596, "step": 7600 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 0.5992, "step": 7700 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 0.6136, "step": 7800 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 0.6, "step": 7900 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 0.6036, "step": 8000 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 0.6085, "step": 8100 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 0.5925, "step": 8200 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 0.6132, "step": 8300 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 0.5988, "step": 8400 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 0.6097, "step": 8500 }, { "epoch": 0.48, "learning_rate": 0.0002, "loss": 0.5982, "step": 8600 }, { "epoch": 0.48, "learning_rate": 0.0002, "loss": 0.6047, "step": 8700 }, { "epoch": 0.49, "learning_rate": 0.0002, "loss": 0.614, "step": 8800 }, { "epoch": 0.5, "learning_rate": 0.0002, "loss": 0.6009, "step": 8900 }, { "epoch": 0.5, "learning_rate": 0.0002, "loss": 0.6006, "step": 9000 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 0.5976, "step": 9100 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 0.5978, "step": 9200 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 0.6068, "step": 9300 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 0.6083, "step": 9400 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.6061, "step": 9500 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.6045, "step": 9600 }, { "epoch": 0.54, "learning_rate": 0.0002, "loss": 0.5928, "step": 9700 }, { "epoch": 0.55, "learning_rate": 0.0002, "loss": 0.5981, "step": 9800 }, { "epoch": 0.55, "learning_rate": 0.0002, "loss": 0.6171, "step": 9900 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 0.6079, "step": 10000 }, { "epoch": 0.56, "eval_gen_len": 18.999476361460204, "eval_loss": 2.610891819000244, "eval_rouge1": 25.2905, "eval_rouge2": 12.3803, "eval_rougeL": 20.8964, "eval_rougeLsum": 23.9052, "eval_runtime": 491.8614, "eval_samples_per_second": 27.178, "eval_steps_per_second": 1.7, "step": 10000 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 0.6068, "step": 10100 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 0.5903, "step": 10200 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 0.6039, "step": 10300 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 0.6014, "step": 10400 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 0.6076, "step": 10500 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 0.6062, "step": 10600 }, { "epoch": 0.6, "learning_rate": 0.0002, "loss": 0.6013, "step": 10700 }, { "epoch": 0.6, "learning_rate": 0.0002, "loss": 0.615, "step": 10800 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 0.6003, "step": 10900 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 0.6059, "step": 11000 }, { "epoch": 0.62, "learning_rate": 0.0002, "loss": 0.6144, "step": 11100 }, { "epoch": 0.62, "learning_rate": 0.0002, "loss": 0.626, "step": 11200 }, { "epoch": 0.63, "learning_rate": 0.0002, "loss": 0.5908, "step": 11300 }, { "epoch": 0.64, "learning_rate": 0.0002, "loss": 0.6052, "step": 11400 }, { "epoch": 0.64, "learning_rate": 0.0002, "loss": 0.6159, "step": 11500 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 0.6093, "step": 11600 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 0.6048, "step": 11700 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 0.6134, "step": 11800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 0.603, "step": 11900 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.5971, "step": 12000 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.6122, "step": 12100 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 0.603, "step": 12200 }, { "epoch": 0.69, "learning_rate": 0.0002, "loss": 0.6016, "step": 12300 }, { "epoch": 0.69, "learning_rate": 0.0002, "loss": 0.5888, "step": 12400 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 0.6016, "step": 12500 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 0.6155, "step": 12600 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 0.6121, "step": 12700 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 0.6147, "step": 12800 }, { "epoch": 0.72, "learning_rate": 0.0002, "loss": 0.5936, "step": 12900 }, { "epoch": 0.72, "learning_rate": 0.0002, "loss": 0.5999, "step": 13000 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 0.6058, "step": 13100 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 0.6123, "step": 13200 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 0.5933, "step": 13300 }, { "epoch": 0.75, "learning_rate": 0.0002, "loss": 0.5945, "step": 13400 }, { "epoch": 0.75, "learning_rate": 0.0002, "loss": 0.6042, "step": 13500 }, { "epoch": 0.76, "learning_rate": 0.0002, "loss": 0.6169, "step": 13600 }, { "epoch": 0.76, "learning_rate": 0.0002, "loss": 0.6122, "step": 13700 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 0.6122, "step": 13800 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 0.6034, "step": 13900 }, { "epoch": 0.78, "learning_rate": 0.0002, "loss": 0.5984, "step": 14000 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 0.6166, "step": 14100 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 0.623, "step": 14200 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.6052, "step": 14300 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.6056, "step": 14400 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 0.6185, "step": 14500 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 0.6204, "step": 14600 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 0.6194, "step": 14700 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 0.6042, "step": 14800 }, { "epoch": 0.83, "learning_rate": 0.0002, "loss": 0.5974, "step": 14900 }, { "epoch": 0.84, "learning_rate": 0.0002, "loss": 0.6052, "step": 15000 }, { "epoch": 0.84, "learning_rate": 0.0002, "loss": 0.597, "step": 15100 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 0.6095, "step": 15200 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 0.6187, "step": 15300 }, { "epoch": 0.86, "learning_rate": 0.0002, "loss": 0.6066, "step": 15400 }, { "epoch": 0.86, "learning_rate": 0.0002, "loss": 0.6132, "step": 15500 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 0.6156, "step": 15600 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 0.6081, "step": 15700 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 0.6079, "step": 15800 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 0.6253, "step": 15900 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 0.6029, "step": 16000 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 0.6056, "step": 16100 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 0.6165, "step": 16200 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 0.6223, "step": 16300 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 0.5993, "step": 16400 }, { "epoch": 0.92, "learning_rate": 0.0002, "loss": 0.6096, "step": 16500 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.613, "step": 16600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.5931, "step": 16700 }, { "epoch": 0.94, "learning_rate": 0.0002, "loss": 0.6158, "step": 16800 }, { "epoch": 0.94, "learning_rate": 0.0002, "loss": 0.6129, "step": 16900 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 0.6097, "step": 17000 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 0.6102, "step": 17100 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 0.6232, "step": 17200 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 0.6099, "step": 17300 }, { "epoch": 0.97, "learning_rate": 0.0002, "loss": 0.6182, "step": 17400 }, { "epoch": 0.98, "learning_rate": 0.0002, "loss": 0.6115, "step": 17500 }, { "epoch": 0.98, "learning_rate": 0.0002, "loss": 0.6156, "step": 17600 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 0.6217, "step": 17700 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 0.6155, "step": 17800 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 0.5991, "step": 17900 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 0.5913, "step": 18000 }, { "epoch": 1.01, "learning_rate": 0.0002, "loss": 0.5782, "step": 18100 }, { "epoch": 1.01, "learning_rate": 0.0002, "loss": 0.5648, "step": 18200 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 0.5837, "step": 18300 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 0.582, "step": 18400 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 0.5695, "step": 18500 }, { "epoch": 1.04, "learning_rate": 0.0002, "loss": 0.59, "step": 18600 }, { "epoch": 1.04, "learning_rate": 0.0002, "loss": 0.5753, "step": 18700 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 0.5902, "step": 18800 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 0.5841, "step": 18900 }, { "epoch": 1.06, "learning_rate": 0.0002, "loss": 0.579, "step": 19000 }, { "epoch": 1.06, "learning_rate": 0.0002, "loss": 0.5733, "step": 19100 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.5818, "step": 19200 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 0.5821, "step": 19300 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 0.5814, "step": 19400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 0.5849, "step": 19500 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 0.5966, "step": 19600 }, { "epoch": 1.1, "learning_rate": 0.0002, "loss": 0.5895, "step": 19700 }, { "epoch": 1.1, "learning_rate": 0.0002, "loss": 0.5865, "step": 19800 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 0.5904, "step": 19900 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 0.578, "step": 20000 }, { "epoch": 1.11, "eval_gen_len": 18.99970077797726, "eval_loss": 2.692544460296631, "eval_rouge1": 25.2837, "eval_rouge2": 12.3389, "eval_rougeL": 20.91, "eval_rougeLsum": 23.8923, "eval_runtime": 488.8383, "eval_samples_per_second": 27.346, "eval_steps_per_second": 1.71, "step": 20000 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 0.5816, "step": 20100 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 0.602, "step": 20200 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 0.594, "step": 20300 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 0.5886, "step": 20400 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 0.5985, "step": 20500 }, { "epoch": 1.15, "learning_rate": 0.0002, "loss": 0.5962, "step": 20600 }, { "epoch": 1.15, "learning_rate": 0.0002, "loss": 0.5733, "step": 20700 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 0.5889, "step": 20800 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 0.6017, "step": 20900 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 0.5867, "step": 21000 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 0.5847, "step": 21100 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 0.5918, "step": 21200 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 0.5794, "step": 21300 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 0.5806, "step": 21400 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.5855, "step": 21500 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.5821, "step": 21600 }, { "epoch": 1.21, "learning_rate": 0.0002, "loss": 0.5735, "step": 21700 }, { "epoch": 1.21, "learning_rate": 0.0002, "loss": 0.5953, "step": 21800 }, { "epoch": 1.22, "learning_rate": 0.0002, "loss": 0.5904, "step": 21900 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 0.5962, "step": 22000 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 0.5942, "step": 22100 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 0.5871, "step": 22200 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 0.5851, "step": 22300 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 0.5921, "step": 22400 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 0.5971, "step": 22500 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 0.5882, "step": 22600 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 0.5813, "step": 22700 }, { "epoch": 1.27, "learning_rate": 0.0002, "loss": 0.5916, "step": 22800 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 0.5959, "step": 22900 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 0.5926, "step": 23000 }, { "epoch": 1.29, "learning_rate": 0.0002, "loss": 0.5883, "step": 23100 }, { "epoch": 1.29, "learning_rate": 0.0002, "loss": 0.5929, "step": 23200 }, { "epoch": 1.3, "learning_rate": 0.0002, "loss": 0.584, "step": 23300 }, { "epoch": 1.3, "learning_rate": 0.0002, "loss": 0.5957, "step": 23400 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 0.5954, "step": 23500 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 0.5938, "step": 23600 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 0.5961, "step": 23700 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.6012, "step": 23800 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.5899, "step": 23900 }, { "epoch": 1.34, "learning_rate": 0.0002, "loss": 0.5941, "step": 24000 }, { "epoch": 1.34, "learning_rate": 0.0002, "loss": 0.5936, "step": 24100 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 0.5912, "step": 24200 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 0.5873, "step": 24300 }, { "epoch": 1.36, "learning_rate": 0.0002, "loss": 0.578, "step": 24400 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 0.5942, "step": 24500 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 0.5918, "step": 24600 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 0.5923, "step": 24700 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 0.5818, "step": 24800 }, { "epoch": 1.39, "learning_rate": 0.0002, "loss": 0.5904, "step": 24900 }, { "epoch": 1.39, "learning_rate": 0.0002, "loss": 0.5991, "step": 25000 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 0.601, "step": 25100 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 0.5919, "step": 25200 }, { "epoch": 1.41, "learning_rate": 0.0002, "loss": 0.5836, "step": 25300 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 0.592, "step": 25400 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 0.5917, "step": 25500 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 0.5796, "step": 25600 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 0.5914, "step": 25700 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 0.5897, "step": 25800 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 0.5851, "step": 25900 }, { "epoch": 1.45, "learning_rate": 0.0002, "loss": 0.5803, "step": 26000 }, { "epoch": 1.45, "learning_rate": 0.0002, "loss": 0.5905, "step": 26100 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 0.598, "step": 26200 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.5915, "step": 26300 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.6016, "step": 26400 }, { "epoch": 1.48, "learning_rate": 0.0002, "loss": 0.593, "step": 26500 }, { "epoch": 1.48, "learning_rate": 0.0002, "loss": 0.5911, "step": 26600 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 0.5933, "step": 26700 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 0.613, "step": 26800 }, { "epoch": 1.5, "learning_rate": 0.0002, "loss": 0.5972, "step": 26900 }, { "epoch": 1.5, "learning_rate": 0.0002, "loss": 0.5806, "step": 27000 }, { "epoch": 1.51, "learning_rate": 0.0002, "loss": 0.5964, "step": 27100 }, { "epoch": 1.52, "learning_rate": 0.0002, "loss": 0.5815, "step": 27200 }, { "epoch": 1.52, "learning_rate": 0.0002, "loss": 0.6144, "step": 27300 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 0.6142, "step": 27400 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 0.583, "step": 27500 }, { "epoch": 1.54, "learning_rate": 0.0002, "loss": 0.5924, "step": 27600 }, { "epoch": 1.54, "learning_rate": 0.0002, "loss": 0.586, "step": 27700 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 0.5985, "step": 27800 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 0.5974, "step": 27900 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 0.5967, "step": 28000 }, { "epoch": 1.57, "learning_rate": 0.0002, "loss": 0.5927, "step": 28100 }, { "epoch": 1.57, "learning_rate": 0.0002, "loss": 0.6018, "step": 28200 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 0.5816, "step": 28300 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 0.601, "step": 28400 }, { "epoch": 1.59, "learning_rate": 0.0002, "loss": 0.6021, "step": 28500 }, { "epoch": 1.59, "learning_rate": 0.0002, "loss": 0.5941, "step": 28600 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.5921, "step": 28700 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.6102, "step": 28800 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 0.5957, "step": 28900 }, { "epoch": 1.62, "learning_rate": 0.0002, "loss": 0.5884, "step": 29000 }, { "epoch": 1.62, "learning_rate": 0.0002, "loss": 0.596, "step": 29100 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 0.5879, "step": 29200 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 0.5907, "step": 29300 }, { "epoch": 1.64, "learning_rate": 0.0002, "loss": 0.586, "step": 29400 }, { "epoch": 1.64, "learning_rate": 0.0002, "loss": 0.5921, "step": 29500 }, { "epoch": 1.65, "learning_rate": 0.0002, "loss": 0.5971, "step": 29600 }, { "epoch": 1.66, "learning_rate": 0.0002, "loss": 0.6035, "step": 29700 }, { "epoch": 1.66, "learning_rate": 0.0002, "loss": 0.6047, "step": 29800 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 0.5819, "step": 29900 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 0.6072, "step": 30000 }, { "epoch": 1.67, "eval_gen_len": 18.99970077797726, "eval_loss": 2.629517078399658, "eval_rouge1": 25.3476, "eval_rouge2": 12.4149, "eval_rougeL": 20.9269, "eval_rougeLsum": 23.9015, "eval_runtime": 487.904, "eval_samples_per_second": 27.399, "eval_steps_per_second": 1.713, "step": 30000 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 0.6078, "step": 30100 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 0.5932, "step": 30200 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 0.5958, "step": 30300 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 0.5965, "step": 30400 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 0.5863, "step": 30500 }, { "epoch": 1.71, "learning_rate": 0.0002, "loss": 0.5901, "step": 30600 }, { "epoch": 1.71, "learning_rate": 0.0002, "loss": 0.604, "step": 30700 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 0.5981, "step": 30800 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 0.5928, "step": 30900 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.5911, "step": 31000 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.5957, "step": 31100 }, { "epoch": 1.74, "learning_rate": 0.0002, "loss": 0.5924, "step": 31200 }, { "epoch": 1.74, "learning_rate": 0.0002, "loss": 0.5909, "step": 31300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 0.6015, "step": 31400 }, { "epoch": 1.76, "learning_rate": 0.0002, "loss": 0.5902, "step": 31500 }, { "epoch": 1.76, "learning_rate": 0.0002, "loss": 0.5853, "step": 31600 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 0.5808, "step": 31700 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 0.6019, "step": 31800 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 0.6043, "step": 31900 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 0.5967, "step": 32000 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 0.6074, "step": 32100 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 0.5957, "step": 32200 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 0.5929, "step": 32300 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 0.6048, "step": 32400 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 0.5983, "step": 32500 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 0.6049, "step": 32600 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 0.5919, "step": 32700 }, { "epoch": 1.83, "learning_rate": 0.0002, "loss": 0.5978, "step": 32800 }, { "epoch": 1.83, "learning_rate": 0.0002, "loss": 0.6035, "step": 32900 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 0.5923, "step": 33000 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 0.599, "step": 33100 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 0.6021, "step": 33200 }, { "epoch": 1.86, "learning_rate": 0.0002, "loss": 0.6071, "step": 33300 }, { "epoch": 1.86, "learning_rate": 0.0002, "loss": 0.6054, "step": 33400 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.6068, "step": 33500 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.6015, "step": 33600 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 0.6029, "step": 33700 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 0.6054, "step": 33800 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 0.6003, "step": 33900 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 0.6049, "step": 34000 }, { "epoch": 1.9, "learning_rate": 0.0002, "loss": 0.5992, "step": 34100 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 0.605, "step": 34200 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 0.604, "step": 34300 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 0.6024, "step": 34400 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 0.6052, "step": 34500 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 0.6082, "step": 34600 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 0.5998, "step": 34700 }, { "epoch": 1.94, "learning_rate": 0.0002, "loss": 0.5957, "step": 34800 }, { "epoch": 1.94, "learning_rate": 0.0002, "loss": 0.6004, "step": 34900 }, { "epoch": 1.95, "learning_rate": 0.0002, "loss": 0.6019, "step": 35000 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 0.5966, "step": 35100 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 0.6017, "step": 35200 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 0.5934, "step": 35300 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 0.6139, "step": 35400 }, { "epoch": 1.98, "learning_rate": 0.0002, "loss": 0.6122, "step": 35500 }, { "epoch": 1.98, "learning_rate": 0.0002, "loss": 0.6056, "step": 35600 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 0.6016, "step": 35700 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 0.5992, "step": 35800 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.6011, "step": 35900 }, { "epoch": 2.01, "learning_rate": 0.0002, "loss": 0.5693, "step": 36000 }, { "epoch": 2.01, "learning_rate": 0.0002, "loss": 0.58, "step": 36100 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 0.5761, "step": 36200 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 0.5743, "step": 36300 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 0.596, "step": 36400 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 0.5819, "step": 36500 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 0.5774, "step": 36600 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 0.5728, "step": 36700 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 0.5731, "step": 36800 }, { "epoch": 2.06, "learning_rate": 0.0002, "loss": 0.5581, "step": 36900 }, { "epoch": 2.06, "learning_rate": 0.0002, "loss": 0.5765, "step": 37000 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 0.5834, "step": 37100 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 0.5782, "step": 37200 }, { "epoch": 2.08, "learning_rate": 0.0002, "loss": 0.5666, "step": 37300 }, { "epoch": 2.08, "learning_rate": 0.0002, "loss": 0.5876, "step": 37400 }, { "epoch": 2.09, "learning_rate": 0.0002, "loss": 0.5814, "step": 37500 }, { "epoch": 2.1, "learning_rate": 0.0002, "loss": 0.5725, "step": 37600 }, { "epoch": 2.1, "learning_rate": 0.0002, "loss": 0.577, "step": 37700 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 0.5717, "step": 37800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 0.5775, "step": 37900 }, { "epoch": 2.12, "learning_rate": 0.0002, "loss": 0.5777, "step": 38000 }, { "epoch": 2.12, "learning_rate": 0.0002, "loss": 0.5907, "step": 38100 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.5656, "step": 38200 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.574, "step": 38300 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 0.5883, "step": 38400 }, { "epoch": 2.15, "learning_rate": 0.0002, "loss": 0.5836, "step": 38500 }, { "epoch": 2.15, "learning_rate": 0.0002, "loss": 0.5699, "step": 38600 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 0.5808, "step": 38700 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 0.5871, "step": 38800 }, { "epoch": 2.17, "learning_rate": 0.0002, "loss": 0.5876, "step": 38900 }, { "epoch": 2.17, "learning_rate": 0.0002, "loss": 0.5783, "step": 39000 }, { "epoch": 2.18, "learning_rate": 0.0002, "loss": 0.5787, "step": 39100 }, { "epoch": 2.18, "learning_rate": 0.0002, "loss": 0.5748, "step": 39200 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 0.5892, "step": 39300 }, { "epoch": 2.2, "learning_rate": 0.0002, "loss": 0.574, "step": 39400 }, { "epoch": 2.2, "learning_rate": 0.0002, "loss": 0.5739, "step": 39500 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 0.5854, "step": 39600 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 0.5805, "step": 39700 }, { "epoch": 2.22, "learning_rate": 0.0002, "loss": 0.583, "step": 39800 }, { "epoch": 2.22, "learning_rate": 0.0002, "loss": 0.5755, "step": 39900 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 0.5823, "step": 40000 }, { "epoch": 2.23, "eval_gen_len": 18.999326750448834, "eval_loss": 2.674076557159424, "eval_rouge1": 25.2523, "eval_rouge2": 12.374, "eval_rougeL": 20.8567, "eval_rougeLsum": 23.8483, "eval_runtime": 489.0303, "eval_samples_per_second": 27.336, "eval_steps_per_second": 1.71, "step": 40000 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 0.5809, "step": 40100 }, { "epoch": 2.24, "learning_rate": 0.0002, "loss": 0.584, "step": 40200 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 0.5734, "step": 40300 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 0.5707, "step": 40400 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 0.5797, "step": 40500 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 0.5752, "step": 40600 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.5887, "step": 40700 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.5814, "step": 40800 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 0.5852, "step": 40900 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 0.5796, "step": 41000 }, { "epoch": 2.29, "learning_rate": 0.0002, "loss": 0.5758, "step": 41100 }, { "epoch": 2.3, "learning_rate": 0.0002, "loss": 0.5844, "step": 41200 }, { "epoch": 2.3, "learning_rate": 0.0002, "loss": 0.5859, "step": 41300 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 0.5693, "step": 41400 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 0.5805, "step": 41500 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 0.5677, "step": 41600 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 0.5764, "step": 41700 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 0.568, "step": 41800 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 0.573, "step": 41900 }, { "epoch": 2.34, "learning_rate": 0.0002, "loss": 0.5856, "step": 42000 }, { "epoch": 2.35, "learning_rate": 0.0002, "loss": 0.5831, "step": 42100 }, { "epoch": 2.35, "learning_rate": 0.0002, "loss": 0.5815, "step": 42200 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 0.5834, "step": 42300 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 0.5786, "step": 42400 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 0.5914, "step": 42500 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 0.5717, "step": 42600 }, { "epoch": 2.38, "learning_rate": 0.0002, "loss": 0.5857, "step": 42700 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 0.591, "step": 42800 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 0.59, "step": 42900 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.5756, "step": 43000 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.5975, "step": 43100 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 0.5751, "step": 43200 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 0.5824, "step": 43300 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 0.5916, "step": 43400 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 0.575, "step": 43500 }, { "epoch": 2.43, "learning_rate": 0.0002, "loss": 0.5894, "step": 43600 }, { "epoch": 2.44, "learning_rate": 0.0002, "loss": 0.5894, "step": 43700 }, { "epoch": 2.44, "learning_rate": 0.0002, "loss": 0.5753, "step": 43800 }, { "epoch": 2.45, "learning_rate": 0.0002, "loss": 0.5857, "step": 43900 }, { "epoch": 2.45, "learning_rate": 0.0002, "loss": 0.5766, "step": 44000 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 0.5763, "step": 44100 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 0.576, "step": 44200 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 0.5874, "step": 44300 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 0.5772, "step": 44400 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 0.5825, "step": 44500 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 0.5925, "step": 44600 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 0.5797, "step": 44700 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 0.5805, "step": 44800 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 0.5747, "step": 44900 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 0.591, "step": 45000 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 0.5858, "step": 45100 }, { "epoch": 2.52, "learning_rate": 0.0002, "loss": 0.5865, "step": 45200 }, { "epoch": 2.52, "learning_rate": 0.0002, "loss": 0.5849, "step": 45300 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.5829, "step": 45400 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 0.5974, "step": 45500 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 0.5808, "step": 45600 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 0.582, "step": 45700 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 0.5809, "step": 45800 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 0.5842, "step": 45900 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 0.5924, "step": 46000 }, { "epoch": 2.57, "learning_rate": 0.0002, "loss": 0.5927, "step": 46100 }, { "epoch": 2.57, "learning_rate": 0.0002, "loss": 0.6064, "step": 46200 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 0.5891, "step": 46300 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 0.5968, "step": 46400 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 0.5984, "step": 46500 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 0.5988, "step": 46600 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 0.5838, "step": 46700 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 0.5851, "step": 46800 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 0.5889, "step": 46900 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 0.5814, "step": 47000 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 0.5874, "step": 47100 }, { "epoch": 2.63, "learning_rate": 0.0002, "loss": 0.5747, "step": 47200 }, { "epoch": 2.64, "learning_rate": 0.0002, "loss": 0.5752, "step": 47300 }, { "epoch": 2.64, "learning_rate": 0.0002, "loss": 0.5863, "step": 47400 }, { "epoch": 2.65, "learning_rate": 0.0002, "loss": 0.5961, "step": 47500 }, { "epoch": 2.65, "learning_rate": 0.0002, "loss": 0.5942, "step": 47600 }, { "epoch": 2.66, "learning_rate": 0.0002, "loss": 0.5969, "step": 47700 }, { "epoch": 2.66, "learning_rate": 0.0002, "loss": 0.5686, "step": 47800 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.5889, "step": 47900 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.5823, "step": 48000 }, { "epoch": 2.68, "learning_rate": 0.0002, "loss": 0.5915, "step": 48100 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 0.5911, "step": 48200 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 0.5729, "step": 48300 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 0.5869, "step": 48400 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 0.5831, "step": 48500 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 0.5893, "step": 48600 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 0.5912, "step": 48700 }, { "epoch": 2.72, "learning_rate": 0.0002, "loss": 0.5927, "step": 48800 }, { "epoch": 2.72, "learning_rate": 0.0002, "loss": 0.5816, "step": 48900 }, { "epoch": 2.73, "learning_rate": 0.0002, "loss": 0.5849, "step": 49000 }, { "epoch": 2.74, "learning_rate": 0.0002, "loss": 0.5918, "step": 49100 }, { "epoch": 2.74, "learning_rate": 0.0002, "loss": 0.5974, "step": 49200 }, { "epoch": 2.75, "learning_rate": 0.0002, "loss": 0.5887, "step": 49300 }, { "epoch": 2.75, "learning_rate": 0.0002, "loss": 0.5906, "step": 49400 }, { "epoch": 2.76, "learning_rate": 0.0002, "loss": 0.5889, "step": 49500 }, { "epoch": 2.76, "learning_rate": 0.0002, "loss": 0.5868, "step": 49600 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 0.5709, "step": 49700 }, { "epoch": 2.78, "learning_rate": 0.0002, "loss": 0.5891, "step": 49800 }, { "epoch": 2.78, "learning_rate": 0.0002, "loss": 0.6005, "step": 49900 }, { "epoch": 2.79, "learning_rate": 0.0002, "loss": 0.6005, "step": 50000 }, { "epoch": 2.79, "eval_gen_len": 18.999476361460204, "eval_loss": 2.6170895099639893, "eval_rouge1": 25.4354, "eval_rouge2": 12.4785, "eval_rougeL": 20.9991, "eval_rougeLsum": 24.0222, "eval_runtime": 478.2398, "eval_samples_per_second": 27.953, "eval_steps_per_second": 1.748, "step": 50000 }, { "epoch": 2.79, "learning_rate": 0.0002, "loss": 0.5748, "step": 50100 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.6024, "step": 50200 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.5859, "step": 50300 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 0.5833, "step": 50400 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 0.5993, "step": 50500 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 0.5915, "step": 50600 }, { "epoch": 2.83, "learning_rate": 0.0002, "loss": 0.6065, "step": 50700 }, { "epoch": 2.83, "learning_rate": 0.0002, "loss": 0.588, "step": 50800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 0.5919, "step": 50900 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 0.602, "step": 51000 }, { "epoch": 2.85, "learning_rate": 0.0002, "loss": 0.5956, "step": 51100 }, { "epoch": 2.85, "learning_rate": 0.0002, "loss": 0.5902, "step": 51200 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 0.5957, "step": 51300 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 0.59, "step": 51400 }, { "epoch": 2.87, "learning_rate": 0.0002, "loss": 0.5979, "step": 51500 }, { "epoch": 2.88, "learning_rate": 0.0002, "loss": 0.5866, "step": 51600 }, { "epoch": 2.88, "learning_rate": 0.0002, "loss": 0.6044, "step": 51700 }, { "epoch": 2.89, "learning_rate": 0.0002, "loss": 0.6081, "step": 51800 }, { "epoch": 2.89, "learning_rate": 0.0002, "loss": 0.5913, "step": 51900 }, { "epoch": 2.9, "learning_rate": 0.0002, "loss": 0.5963, "step": 52000 }, { "epoch": 2.9, "learning_rate": 0.0002, "loss": 0.5866, "step": 52100 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 0.5958, "step": 52200 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 0.5928, "step": 52300 }, { "epoch": 2.92, "learning_rate": 0.0002, "loss": 0.598, "step": 52400 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.5956, "step": 52500 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.5936, "step": 52600 }, { "epoch": 2.94, "learning_rate": 0.0002, "loss": 0.5993, "step": 52700 }, { "epoch": 2.94, "learning_rate": 0.0002, "loss": 0.595, "step": 52800 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 0.5832, "step": 52900 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 0.5852, "step": 53000 }, { "epoch": 2.96, "learning_rate": 0.0002, "loss": 0.6021, "step": 53100 }, { "epoch": 2.96, "learning_rate": 0.0002, "loss": 0.5775, "step": 53200 }, { "epoch": 2.97, "learning_rate": 0.0002, "loss": 0.5884, "step": 53300 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 0.5869, "step": 53400 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 0.591, "step": 53500 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 0.6016, "step": 53600 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 0.5903, "step": 53700 }, { "epoch": 3.0, "learning_rate": 0.0002, "loss": 0.6011, "step": 53800 }, { "epoch": 3.0, "learning_rate": 0.0002, "loss": 0.5817, "step": 53900 }, { "epoch": 3.01, "learning_rate": 0.0002, "loss": 0.5661, "step": 54000 }, { "epoch": 3.01, "learning_rate": 0.0002, "loss": 0.5718, "step": 54100 }, { "epoch": 3.02, "learning_rate": 0.0002, "loss": 0.5666, "step": 54200 }, { "epoch": 3.03, "learning_rate": 0.0002, "loss": 0.5652, "step": 54300 }, { "epoch": 3.03, "learning_rate": 0.0002, "loss": 0.5642, "step": 54400 }, { "epoch": 3.04, "learning_rate": 0.0002, "loss": 0.5686, "step": 54500 }, { "epoch": 3.04, "learning_rate": 0.0002, "loss": 0.5641, "step": 54600 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 0.5638, "step": 54700 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 0.5696, "step": 54800 }, { "epoch": 3.06, "learning_rate": 0.0002, "loss": 0.5741, "step": 54900 }, { "epoch": 3.06, "learning_rate": 0.0002, "loss": 0.5615, "step": 55000 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 0.5784, "step": 55100 }, { "epoch": 3.08, "learning_rate": 0.0002, "loss": 0.5653, "step": 55200 }, { "epoch": 3.08, "learning_rate": 0.0002, "loss": 0.5747, "step": 55300 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 0.5537, "step": 55400 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 0.5715, "step": 55500 }, { "epoch": 3.1, "learning_rate": 0.0002, "loss": 0.5653, "step": 55600 }, { "epoch": 3.1, "learning_rate": 0.0002, "loss": 0.5816, "step": 55700 }, { "epoch": 3.11, "learning_rate": 0.0002, "loss": 0.5714, "step": 55800 }, { "epoch": 3.12, "learning_rate": 0.0002, "loss": 0.5657, "step": 55900 }, { "epoch": 3.12, "learning_rate": 0.0002, "loss": 0.5628, "step": 56000 }, { "epoch": 3.13, "learning_rate": 0.0002, "loss": 0.5718, "step": 56100 }, { "epoch": 3.13, "learning_rate": 0.0002, "loss": 0.5609, "step": 56200 }, { "epoch": 3.14, "learning_rate": 0.0002, "loss": 0.567, "step": 56300 }, { "epoch": 3.14, "learning_rate": 0.0002, "loss": 0.5624, "step": 56400 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 0.5781, "step": 56500 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 0.5673, "step": 56600 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 0.5561, "step": 56700 }, { "epoch": 3.17, "learning_rate": 0.0002, "loss": 0.5635, "step": 56800 }, { "epoch": 3.17, "learning_rate": 0.0002, "loss": 0.5719, "step": 56900 }, { "epoch": 3.18, "learning_rate": 0.0002, "loss": 0.5809, "step": 57000 }, { "epoch": 3.18, "learning_rate": 0.0002, "loss": 0.577, "step": 57100 }, { "epoch": 3.19, "learning_rate": 0.0002, "loss": 0.5801, "step": 57200 }, { "epoch": 3.19, "learning_rate": 0.0002, "loss": 0.5742, "step": 57300 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 0.5615, "step": 57400 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 0.5636, "step": 57500 }, { "epoch": 3.21, "learning_rate": 0.0002, "loss": 0.5679, "step": 57600 }, { "epoch": 3.22, "learning_rate": 0.0002, "loss": 0.5935, "step": 57700 }, { "epoch": 3.22, "learning_rate": 0.0002, "loss": 0.5807, "step": 57800 }, { "epoch": 3.23, "learning_rate": 0.0002, "loss": 0.564, "step": 57900 }, { "epoch": 3.23, "learning_rate": 0.0002, "loss": 0.5651, "step": 58000 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 0.5757, "step": 58100 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 0.5674, "step": 58200 }, { "epoch": 3.25, "learning_rate": 0.0002, "loss": 0.576, "step": 58300 }, { "epoch": 3.25, "learning_rate": 0.0002, "loss": 0.5661, "step": 58400 }, { "epoch": 3.26, "learning_rate": 0.0002, "loss": 0.5748, "step": 58500 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 0.5732, "step": 58600 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 0.5791, "step": 58700 }, { "epoch": 3.28, "learning_rate": 0.0002, "loss": 0.5658, "step": 58800 }, { "epoch": 3.28, "learning_rate": 0.0002, "loss": 0.577, "step": 58900 }, { "epoch": 3.29, "learning_rate": 0.0002, "loss": 0.5809, "step": 59000 }, { "epoch": 3.29, "learning_rate": 0.0002, "loss": 0.5807, "step": 59100 }, { "epoch": 3.3, "learning_rate": 0.0002, "loss": 0.5686, "step": 59200 }, { "epoch": 3.3, "learning_rate": 0.0002, "loss": 0.5679, "step": 59300 }, { "epoch": 3.31, "learning_rate": 0.0002, "loss": 0.5861, "step": 59400 }, { "epoch": 3.32, "learning_rate": 0.0002, "loss": 0.5753, "step": 59500 }, { "epoch": 3.32, "learning_rate": 0.0002, "loss": 0.5636, "step": 59600 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 0.5711, "step": 59700 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 0.5762, "step": 59800 }, { "epoch": 3.34, "learning_rate": 0.0002, "loss": 0.5712, "step": 59900 }, { "epoch": 3.34, "learning_rate": 0.0002, "loss": 0.5757, "step": 60000 }, { "epoch": 3.34, "eval_gen_len": 18.999925194494313, "eval_loss": 2.659139394760132, "eval_rouge1": 25.3502, "eval_rouge2": 12.4437, "eval_rougeL": 20.9208, "eval_rougeLsum": 23.9305, "eval_runtime": 475.715, "eval_samples_per_second": 28.101, "eval_steps_per_second": 1.757, "step": 60000 }, { "epoch": 3.35, "learning_rate": 0.0002, "loss": 0.5747, "step": 60100 }, { "epoch": 3.35, "learning_rate": 0.0002, "loss": 0.5769, "step": 60200 }, { "epoch": 3.36, "learning_rate": 0.0002, "loss": 0.5794, "step": 60300 }, { "epoch": 3.37, "learning_rate": 0.0002, "loss": 0.581, "step": 60400 }, { "epoch": 3.37, "learning_rate": 0.0002, "loss": 0.5785, "step": 60500 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 0.5871, "step": 60600 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 0.5682, "step": 60700 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 0.5752, "step": 60800 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 0.5789, "step": 60900 }, { "epoch": 3.4, "learning_rate": 0.0002, "loss": 0.5722, "step": 61000 }, { "epoch": 3.4, "learning_rate": 0.0002, "loss": 0.5862, "step": 61100 }, { "epoch": 3.41, "learning_rate": 0.0002, "loss": 0.5649, "step": 61200 }, { "epoch": 3.42, "learning_rate": 0.0002, "loss": 0.5761, "step": 61300 }, { "epoch": 3.42, "learning_rate": 0.0002, "loss": 0.5655, "step": 61400 }, { "epoch": 3.43, "learning_rate": 0.0002, "loss": 0.5642, "step": 61500 }, { "epoch": 3.43, "learning_rate": 0.0002, "loss": 0.573, "step": 61600 }, { "epoch": 3.44, "learning_rate": 0.0002, "loss": 0.5891, "step": 61700 }, { "epoch": 3.44, "learning_rate": 0.0002, "loss": 0.579, "step": 61800 }, { "epoch": 3.45, "learning_rate": 0.0002, "loss": 0.5825, "step": 61900 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 0.5834, "step": 62000 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 0.5807, "step": 62100 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 0.5752, "step": 62200 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 0.5666, "step": 62300 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 0.5886, "step": 62400 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 0.5913, "step": 62500 }, { "epoch": 3.49, "learning_rate": 0.0002, "loss": 0.5877, "step": 62600 }, { "epoch": 3.49, "learning_rate": 0.0002, "loss": 0.5842, "step": 62700 }, { "epoch": 3.5, "learning_rate": 0.0002, "loss": 0.5786, "step": 62800 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 0.5848, "step": 62900 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 0.5735, "step": 63000 }, { "epoch": 3.52, "learning_rate": 0.0002, "loss": 0.5793, "step": 63100 }, { "epoch": 3.52, "learning_rate": 0.0002, "loss": 0.5799, "step": 63200 }, { "epoch": 3.53, "learning_rate": 0.0002, "loss": 0.5792, "step": 63300 }, { "epoch": 3.53, "learning_rate": 0.0002, "loss": 0.5679, "step": 63400 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 0.579, "step": 63500 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 0.5686, "step": 63600 }, { "epoch": 3.55, "learning_rate": 0.0002, "loss": 0.5718, "step": 63700 }, { "epoch": 3.56, "learning_rate": 0.0002, "loss": 0.5702, "step": 63800 }, { "epoch": 3.56, "learning_rate": 0.0002, "loss": 0.5801, "step": 63900 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 0.5804, "step": 64000 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 0.568, "step": 64100 }, { "epoch": 3.58, "learning_rate": 0.0002, "loss": 0.5687, "step": 64200 }, { "epoch": 3.58, "learning_rate": 0.0002, "loss": 0.5794, "step": 64300 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 0.5774, "step": 64400 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 0.5788, "step": 64500 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 0.596, "step": 64600 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 0.5814, "step": 64700 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 0.5777, "step": 64800 }, { "epoch": 3.62, "learning_rate": 0.0002, "loss": 0.5811, "step": 64900 }, { "epoch": 3.62, "learning_rate": 0.0002, "loss": 0.584, "step": 65000 }, { "epoch": 3.63, "learning_rate": 0.0002, "loss": 0.5806, "step": 65100 }, { "epoch": 3.63, "learning_rate": 0.0002, "loss": 0.5709, "step": 65200 }, { "epoch": 3.64, "learning_rate": 0.0002, "loss": 0.5723, "step": 65300 }, { "epoch": 3.64, "learning_rate": 0.0002, "loss": 0.5644, "step": 65400 }, { "epoch": 3.65, "learning_rate": 0.0002, "loss": 0.579, "step": 65500 }, { "epoch": 3.66, "learning_rate": 0.0002, "loss": 0.574, "step": 65600 }, { "epoch": 3.66, "learning_rate": 0.0002, "loss": 0.5685, "step": 65700 }, { "epoch": 3.67, "learning_rate": 0.0002, "loss": 0.5842, "step": 65800 }, { "epoch": 3.67, "learning_rate": 0.0002, "loss": 0.585, "step": 65900 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 0.5808, "step": 66000 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 0.5851, "step": 66100 }, { "epoch": 3.69, "learning_rate": 0.0002, "loss": 0.5732, "step": 66200 }, { "epoch": 3.69, "learning_rate": 0.0002, "loss": 0.5807, "step": 66300 }, { "epoch": 3.7, "learning_rate": 0.0002, "loss": 0.5777, "step": 66400 }, { "epoch": 3.71, "learning_rate": 0.0002, "loss": 0.5786, "step": 66500 }, { "epoch": 3.71, "learning_rate": 0.0002, "loss": 0.5753, "step": 66600 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 0.5784, "step": 66700 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 0.5821, "step": 66800 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 0.5891, "step": 66900 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 0.5765, "step": 67000 }, { "epoch": 3.74, "learning_rate": 0.0002, "loss": 0.58, "step": 67100 }, { "epoch": 3.74, "learning_rate": 0.0002, "loss": 0.5853, "step": 67200 }, { "epoch": 3.75, "learning_rate": 0.0002, "loss": 0.5872, "step": 67300 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 0.5754, "step": 67400 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 0.5762, "step": 67500 }, { "epoch": 3.77, "learning_rate": 0.0002, "loss": 0.5862, "step": 67600 }, { "epoch": 3.77, "learning_rate": 0.0002, "loss": 0.5955, "step": 67700 }, { "epoch": 3.78, "learning_rate": 0.0002, "loss": 0.5846, "step": 67800 }, { "epoch": 3.78, "learning_rate": 0.0002, "loss": 0.5797, "step": 67900 }, { "epoch": 3.79, "learning_rate": 0.0002, "loss": 0.5913, "step": 68000 }, { "epoch": 3.79, "learning_rate": 0.0002, "loss": 0.5758, "step": 68100 }, { "epoch": 3.8, "learning_rate": 0.0002, "loss": 0.586, "step": 68200 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 0.5766, "step": 68300 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 0.5887, "step": 68400 }, { "epoch": 3.82, "learning_rate": 0.0002, "loss": 0.5903, "step": 68500 }, { "epoch": 3.82, "learning_rate": 0.0002, "loss": 0.5825, "step": 68600 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 0.5884, "step": 68700 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 0.5784, "step": 68800 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 0.5985, "step": 68900 }, { "epoch": 3.85, "learning_rate": 0.0002, "loss": 0.6003, "step": 69000 }, { "epoch": 3.85, "learning_rate": 0.0002, "loss": 0.5723, "step": 69100 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 0.5924, "step": 69200 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 0.5921, "step": 69300 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 0.5882, "step": 69400 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 0.5845, "step": 69500 }, { "epoch": 3.88, "learning_rate": 0.0002, "loss": 0.5883, "step": 69600 }, { "epoch": 3.88, "learning_rate": 0.0002, "loss": 0.5887, "step": 69700 }, { "epoch": 3.89, "learning_rate": 0.0002, "loss": 0.5837, "step": 69800 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 0.5772, "step": 69900 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 0.5861, "step": 70000 }, { "epoch": 3.9, "eval_gen_len": 18.99985038898863, "eval_loss": 2.6559603214263916, "eval_rouge1": 25.3295, "eval_rouge2": 12.3962, "eval_rougeL": 20.8838, "eval_rougeLsum": 23.8838, "eval_runtime": 478.6986, "eval_samples_per_second": 27.926, "eval_steps_per_second": 1.746, "step": 70000 }, { "epoch": 3.91, "learning_rate": 0.0002, "loss": 0.5741, "step": 70100 }, { "epoch": 3.91, "learning_rate": 0.0002, "loss": 0.5985, "step": 70200 }, { "epoch": 3.92, "learning_rate": 0.0002, "loss": 0.5858, "step": 70300 }, { "epoch": 3.92, "learning_rate": 0.0002, "loss": 0.5875, "step": 70400 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 0.5933, "step": 70500 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 0.5907, "step": 70600 }, { "epoch": 3.94, "learning_rate": 0.0002, "loss": 0.5862, "step": 70700 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 0.581, "step": 70800 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 0.5837, "step": 70900 }, { "epoch": 3.96, "learning_rate": 0.0002, "loss": 0.5949, "step": 71000 }, { "epoch": 3.96, "learning_rate": 0.0002, "loss": 0.5737, "step": 71100 }, { "epoch": 3.97, "learning_rate": 0.0002, "loss": 0.5864, "step": 71200 }, { "epoch": 3.97, "learning_rate": 0.0002, "loss": 0.5771, "step": 71300 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 0.5926, "step": 71400 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 0.572, "step": 71500 }, { "epoch": 3.99, "learning_rate": 0.0002, "loss": 0.5779, "step": 71600 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 0.5775, "step": 71700 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 0.5602, "step": 71800 }, { "epoch": 4.01, "learning_rate": 0.0002, "loss": 0.5671, "step": 71900 }, { "epoch": 4.01, "learning_rate": 0.0002, "loss": 0.5618, "step": 72000 }, { "epoch": 4.02, "learning_rate": 0.0002, "loss": 0.5457, "step": 72100 }, { "epoch": 4.02, "learning_rate": 0.0002, "loss": 0.5569, "step": 72200 }, { "epoch": 4.03, "learning_rate": 0.0002, "loss": 0.5486, "step": 72300 }, { "epoch": 4.03, "learning_rate": 0.0002, "loss": 0.563, "step": 72400 }, { "epoch": 4.04, "learning_rate": 0.0002, "loss": 0.562, "step": 72500 }, { "epoch": 4.05, "learning_rate": 0.0002, "loss": 0.5526, "step": 72600 }, { "epoch": 4.05, "learning_rate": 0.0002, "loss": 0.5637, "step": 72700 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 0.5603, "step": 72800 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 0.5511, "step": 72900 }, { "epoch": 4.07, "learning_rate": 0.0002, "loss": 0.5513, "step": 73000 }, { "epoch": 4.07, "learning_rate": 0.0002, "loss": 0.563, "step": 73100 }, { "epoch": 4.08, "learning_rate": 0.0002, "loss": 0.5532, "step": 73200 }, { "epoch": 4.08, "learning_rate": 0.0002, "loss": 0.5475, "step": 73300 }, { "epoch": 4.09, "learning_rate": 0.0002, "loss": 0.5603, "step": 73400 }, { "epoch": 4.1, "learning_rate": 0.0002, "loss": 0.5531, "step": 73500 }, { "epoch": 4.1, "learning_rate": 0.0002, "loss": 0.5607, "step": 73600 }, { "epoch": 4.11, "learning_rate": 0.0002, "loss": 0.5731, "step": 73700 }, { "epoch": 4.11, "learning_rate": 0.0002, "loss": 0.575, "step": 73800 }, { "epoch": 4.12, "learning_rate": 0.0002, "loss": 0.5583, "step": 73900 }, { "epoch": 4.12, "learning_rate": 0.0002, "loss": 0.5499, "step": 74000 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 0.5669, "step": 74100 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 0.5584, "step": 74200 }, { "epoch": 4.14, "learning_rate": 0.0002, "loss": 0.5679, "step": 74300 }, { "epoch": 4.15, "learning_rate": 0.0002, "loss": 0.5709, "step": 74400 }, { "epoch": 4.15, "learning_rate": 0.0002, "loss": 0.5598, "step": 74500 }, { "epoch": 4.16, "learning_rate": 0.0002, "loss": 0.5663, "step": 74600 }, { "epoch": 4.16, "learning_rate": 0.0002, "loss": 0.5663, "step": 74700 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 0.5692, "step": 74800 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 0.5686, "step": 74900 }, { "epoch": 4.18, "learning_rate": 0.0002, "loss": 0.5649, "step": 75000 }, { "epoch": 4.19, "learning_rate": 0.0002, "loss": 0.5628, "step": 75100 }, { "epoch": 4.19, "learning_rate": 0.0002, "loss": 0.5688, "step": 75200 }, { "epoch": 4.2, "learning_rate": 0.0002, "loss": 0.5705, "step": 75300 }, { "epoch": 4.2, "learning_rate": 0.0002, "loss": 0.549, "step": 75400 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 0.5568, "step": 75500 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 0.5649, "step": 75600 }, { "epoch": 4.22, "learning_rate": 0.0002, "loss": 0.5766, "step": 75700 }, { "epoch": 4.22, "learning_rate": 0.0002, "loss": 0.5709, "step": 75800 }, { "epoch": 4.23, "learning_rate": 0.0002, "loss": 0.5595, "step": 75900 }, { "epoch": 4.24, "learning_rate": 0.0002, "loss": 0.5636, "step": 76000 }, { "epoch": 4.24, "learning_rate": 0.0002, "loss": 0.5823, "step": 76100 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 0.5677, "step": 76200 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 0.5667, "step": 76300 }, { "epoch": 4.26, "learning_rate": 0.0002, "loss": 0.5578, "step": 76400 }, { "epoch": 4.26, "learning_rate": 0.0002, "loss": 0.5755, "step": 76500 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 0.564, "step": 76600 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 0.5604, "step": 76700 }, { "epoch": 4.28, "learning_rate": 0.0002, "loss": 0.5689, "step": 76800 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 0.5689, "step": 76900 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 0.5676, "step": 77000 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 0.5589, "step": 77100 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 0.5697, "step": 77200 }, { "epoch": 4.31, "learning_rate": 0.0002, "loss": 0.5785, "step": 77300 }, { "epoch": 4.31, "learning_rate": 0.0002, "loss": 0.5593, "step": 77400 }, { "epoch": 4.32, "learning_rate": 0.0002, "loss": 0.5769, "step": 77500 }, { "epoch": 4.32, "learning_rate": 0.0002, "loss": 0.5705, "step": 77600 }, { "epoch": 4.33, "learning_rate": 0.0002, "loss": 0.5497, "step": 77700 }, { "epoch": 4.34, "learning_rate": 0.0002, "loss": 0.5697, "step": 77800 }, { "epoch": 4.34, "learning_rate": 0.0002, "loss": 0.5683, "step": 77900 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 0.5571, "step": 78000 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 0.5651, "step": 78100 }, { "epoch": 4.36, "learning_rate": 0.0002, "loss": 0.5605, "step": 78200 }, { "epoch": 4.36, "learning_rate": 0.0002, "loss": 0.5736, "step": 78300 }, { "epoch": 4.37, "learning_rate": 0.0002, "loss": 0.5736, "step": 78400 }, { "epoch": 4.37, "learning_rate": 0.0002, "loss": 0.5658, "step": 78500 }, { "epoch": 4.38, "learning_rate": 0.0002, "loss": 0.5583, "step": 78600 }, { "epoch": 4.39, "learning_rate": 0.0002, "loss": 0.5612, "step": 78700 }, { "epoch": 4.39, "learning_rate": 0.0002, "loss": 0.5549, "step": 78800 }, { "epoch": 4.4, "learning_rate": 0.0002, "loss": 0.5716, "step": 78900 }, { "epoch": 4.4, "learning_rate": 0.0002, "loss": 0.5598, "step": 79000 }, { "epoch": 4.41, "learning_rate": 0.0002, "loss": 0.5757, "step": 79100 }, { "epoch": 4.41, "learning_rate": 0.0002, "loss": 0.5601, "step": 79200 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 0.5804, "step": 79300 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 0.572, "step": 79400 }, { "epoch": 4.43, "learning_rate": 0.0002, "loss": 0.588, "step": 79500 }, { "epoch": 4.44, "learning_rate": 0.0002, "loss": 0.5641, "step": 79600 }, { "epoch": 4.44, "learning_rate": 0.0002, "loss": 0.5719, "step": 79700 }, { "epoch": 4.45, "learning_rate": 0.0002, "loss": 0.5689, "step": 79800 }, { "epoch": 4.45, "learning_rate": 0.0002, "loss": 0.5786, "step": 79900 }, { "epoch": 4.46, "learning_rate": 0.0002, "loss": 0.5615, "step": 80000 }, { "epoch": 4.46, "eval_gen_len": 18.99985038898863, "eval_loss": 2.6867384910583496, "eval_rouge1": 25.3508, "eval_rouge2": 12.4212, "eval_rougeL": 20.9341, "eval_rougeLsum": 23.949, "eval_runtime": 474.7006, "eval_samples_per_second": 28.161, "eval_steps_per_second": 1.761, "step": 80000 }, { "epoch": 4.46, "learning_rate": 0.0002, "loss": 0.5773, "step": 80100 }, { "epoch": 4.47, "learning_rate": 0.0002, "loss": 0.5652, "step": 80200 }, { "epoch": 4.47, "learning_rate": 0.0002, "loss": 0.575, "step": 80300 }, { "epoch": 4.48, "learning_rate": 0.0002, "loss": 0.5599, "step": 80400 }, { "epoch": 4.49, "learning_rate": 0.0002, "loss": 0.5621, "step": 80500 }, { "epoch": 4.49, "learning_rate": 0.0002, "loss": 0.5787, "step": 80600 }, { "epoch": 4.5, "learning_rate": 0.0002, "loss": 0.573, "step": 80700 }, { "epoch": 4.5, "learning_rate": 0.0002, "loss": 0.5698, "step": 80800 }, { "epoch": 4.51, "learning_rate": 0.0002, "loss": 0.5748, "step": 80900 }, { "epoch": 4.51, "learning_rate": 0.0002, "loss": 0.577, "step": 81000 }, { "epoch": 4.52, "learning_rate": 0.0002, "loss": 0.5789, "step": 81100 }, { "epoch": 4.52, "learning_rate": 0.0002, "loss": 0.5693, "step": 81200 }, { "epoch": 4.53, "learning_rate": 0.0002, "loss": 0.567, "step": 81300 }, { "epoch": 4.54, "learning_rate": 0.0002, "loss": 0.5708, "step": 81400 }, { "epoch": 4.54, "learning_rate": 0.0002, "loss": 0.566, "step": 81500 }, { "epoch": 4.55, "learning_rate": 0.0002, "loss": 0.5643, "step": 81600 }, { "epoch": 4.55, "learning_rate": 0.0002, "loss": 0.5774, "step": 81700 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 0.5571, "step": 81800 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 0.5739, "step": 81900 }, { "epoch": 4.57, "learning_rate": 0.0002, "loss": 0.5811, "step": 82000 }, { "epoch": 4.58, "learning_rate": 0.0002, "loss": 0.5809, "step": 82100 }, { "epoch": 4.58, "learning_rate": 0.0002, "loss": 0.5672, "step": 82200 }, { "epoch": 4.59, "learning_rate": 0.0002, "loss": 0.5883, "step": 82300 }, { "epoch": 4.59, "learning_rate": 0.0002, "loss": 0.5576, "step": 82400 }, { "epoch": 4.6, "learning_rate": 0.0002, "loss": 0.5705, "step": 82500 }, { "epoch": 4.6, "learning_rate": 0.0002, "loss": 0.5699, "step": 82600 }, { "epoch": 4.61, "learning_rate": 0.0002, "loss": 0.5786, "step": 82700 }, { "epoch": 4.61, "learning_rate": 0.0002, "loss": 0.5674, "step": 82800 }, { "epoch": 4.62, "learning_rate": 0.0002, "loss": 0.5753, "step": 82900 }, { "epoch": 4.63, "learning_rate": 0.0002, "loss": 0.5733, "step": 83000 }, { "epoch": 4.63, "learning_rate": 0.0002, "loss": 0.5706, "step": 83100 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 0.5635, "step": 83200 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 0.5721, "step": 83300 }, { "epoch": 4.65, "learning_rate": 0.0002, "loss": 0.5665, "step": 83400 }, { "epoch": 4.65, "learning_rate": 0.0002, "loss": 0.5839, "step": 83500 }, { "epoch": 4.66, "learning_rate": 0.0002, "loss": 0.5831, "step": 83600 }, { "epoch": 4.66, "learning_rate": 0.0002, "loss": 0.5699, "step": 83700 }, { "epoch": 4.67, "learning_rate": 0.0002, "loss": 0.5777, "step": 83800 }, { "epoch": 4.68, "learning_rate": 0.0002, "loss": 0.5809, "step": 83900 }, { "epoch": 4.68, "learning_rate": 0.0002, "loss": 0.5846, "step": 84000 }, { "epoch": 4.69, "learning_rate": 0.0002, "loss": 0.5711, "step": 84100 }, { "epoch": 4.69, "learning_rate": 0.0002, "loss": 0.5668, "step": 84200 }, { "epoch": 4.7, "learning_rate": 0.0002, "loss": 0.5747, "step": 84300 }, { "epoch": 4.7, "learning_rate": 0.0002, "loss": 0.5752, "step": 84400 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 0.5707, "step": 84500 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 0.5777, "step": 84600 }, { "epoch": 4.72, "learning_rate": 0.0002, "loss": 0.5725, "step": 84700 }, { "epoch": 4.73, "learning_rate": 0.0002, "loss": 0.5749, "step": 84800 }, { "epoch": 4.73, "learning_rate": 0.0002, "loss": 0.5892, "step": 84900 }, { "epoch": 4.74, "learning_rate": 0.0002, "loss": 0.5735, "step": 85000 }, { "epoch": 4.74, "learning_rate": 0.0002, "loss": 0.5753, "step": 85100 }, { "epoch": 4.75, "learning_rate": 0.0002, "loss": 0.5699, "step": 85200 }, { "epoch": 4.75, "learning_rate": 0.0002, "loss": 0.5702, "step": 85300 }, { "epoch": 4.76, "learning_rate": 0.0002, "loss": 0.576, "step": 85400 }, { "epoch": 4.76, "learning_rate": 0.0002, "loss": 0.5835, "step": 85500 }, { "epoch": 4.77, "learning_rate": 0.0002, "loss": 0.5764, "step": 85600 }, { "epoch": 4.78, "learning_rate": 0.0002, "loss": 0.5746, "step": 85700 }, { "epoch": 4.78, "learning_rate": 0.0002, "loss": 0.5814, "step": 85800 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 0.5689, "step": 85900 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 0.5669, "step": 86000 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 0.5775, "step": 86100 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 0.5753, "step": 86200 }, { "epoch": 4.81, "learning_rate": 0.0002, "loss": 0.5978, "step": 86300 }, { "epoch": 4.81, "learning_rate": 0.0002, "loss": 0.5812, "step": 86400 }, { "epoch": 4.82, "learning_rate": 0.0002, "loss": 0.5726, "step": 86500 }, { "epoch": 4.83, "learning_rate": 0.0002, "loss": 0.5865, "step": 86600 }, { "epoch": 4.83, "learning_rate": 0.0002, "loss": 0.5809, "step": 86700 }, { "epoch": 4.84, "learning_rate": 0.0002, "loss": 0.5846, "step": 86800 }, { "epoch": 4.84, "learning_rate": 0.0002, "loss": 0.5694, "step": 86900 }, { "epoch": 4.85, "learning_rate": 0.0002, "loss": 0.5694, "step": 87000 }, { "epoch": 4.85, "learning_rate": 0.0002, "loss": 0.581, "step": 87100 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 0.5801, "step": 87200 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 0.596, "step": 87300 }, { "epoch": 4.87, "learning_rate": 0.0002, "loss": 0.5683, "step": 87400 }, { "epoch": 4.88, "learning_rate": 0.0002, "loss": 0.5771, "step": 87500 }, { "epoch": 4.88, "learning_rate": 0.0002, "loss": 0.5772, "step": 87600 }, { "epoch": 4.89, "learning_rate": 0.0002, "loss": 0.5719, "step": 87700 }, { "epoch": 4.89, "learning_rate": 0.0002, "loss": 0.5911, "step": 87800 }, { "epoch": 4.9, "learning_rate": 0.0002, "loss": 0.5747, "step": 87900 }, { "epoch": 4.9, "learning_rate": 0.0002, "loss": 0.5804, "step": 88000 }, { "epoch": 4.91, "learning_rate": 0.0002, "loss": 0.5736, "step": 88100 }, { "epoch": 4.92, "learning_rate": 0.0002, "loss": 0.5621, "step": 88200 }, { "epoch": 4.92, "learning_rate": 0.0002, "loss": 0.5716, "step": 88300 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 0.582, "step": 88400 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 0.5743, "step": 88500 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 0.5756, "step": 88600 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 0.5729, "step": 88700 }, { "epoch": 4.95, "learning_rate": 0.0002, "loss": 0.5546, "step": 88800 }, { "epoch": 4.95, "learning_rate": 0.0002, "loss": 0.5688, "step": 88900 }, { "epoch": 4.96, "learning_rate": 0.0002, "loss": 0.5702, "step": 89000 }, { "epoch": 4.97, "learning_rate": 0.0002, "loss": 0.5811, "step": 89100 }, { "epoch": 4.97, "learning_rate": 0.0002, "loss": 0.565, "step": 89200 }, { "epoch": 4.98, "learning_rate": 0.0002, "loss": 0.5643, "step": 89300 }, { "epoch": 4.98, "learning_rate": 0.0002, "loss": 0.5915, "step": 89400 }, { "epoch": 4.99, "learning_rate": 0.0002, "loss": 0.5978, "step": 89500 }, { "epoch": 4.99, "learning_rate": 0.0002, "loss": 0.5888, "step": 89600 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 0.5706, "step": 89700 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 0.5537, "step": 89800 }, { "epoch": 5.01, "learning_rate": 0.0002, "loss": 0.5571, "step": 89900 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 0.5387, "step": 90000 }, { "epoch": 5.02, "eval_gen_len": 18.99955116696589, "eval_loss": 2.7078752517700195, "eval_rouge1": 25.2864, "eval_rouge2": 12.3885, "eval_rougeL": 20.9095, "eval_rougeLsum": 23.8569, "eval_runtime": 479.4466, "eval_samples_per_second": 27.882, "eval_steps_per_second": 1.744, "step": 90000 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 0.5436, "step": 90100 }, { "epoch": 5.03, "learning_rate": 0.0002, "loss": 0.55, "step": 90200 }, { "epoch": 5.03, "learning_rate": 0.0002, "loss": 0.5454, "step": 90300 }, { "epoch": 5.04, "learning_rate": 0.0002, "loss": 0.5504, "step": 90400 }, { "epoch": 5.04, "learning_rate": 0.0002, "loss": 0.5469, "step": 90500 }, { "epoch": 5.05, "learning_rate": 0.0002, "loss": 0.5441, "step": 90600 }, { "epoch": 5.05, "learning_rate": 0.0002, "loss": 0.5517, "step": 90700 }, { "epoch": 5.06, "learning_rate": 0.0002, "loss": 0.5517, "step": 90800 }, { "epoch": 5.07, "learning_rate": 0.0002, "loss": 0.5419, "step": 90900 }, { "epoch": 5.07, "learning_rate": 0.0002, "loss": 0.5447, "step": 91000 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 0.5425, "step": 91100 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 0.5549, "step": 91200 }, { "epoch": 5.09, "learning_rate": 0.0002, "loss": 0.5595, "step": 91300 }, { "epoch": 5.09, "learning_rate": 0.0002, "loss": 0.5442, "step": 91400 }, { "epoch": 5.1, "learning_rate": 0.0002, "loss": 0.552, "step": 91500 }, { "epoch": 5.1, "learning_rate": 0.0002, "loss": 0.5665, "step": 91600 }, { "epoch": 5.11, "learning_rate": 0.0002, "loss": 0.5503, "step": 91700 }, { "epoch": 5.12, "learning_rate": 0.0002, "loss": 0.5589, "step": 91800 }, { "epoch": 5.12, "learning_rate": 0.0002, "loss": 0.5792, "step": 91900 }, { "epoch": 5.13, "learning_rate": 0.0002, "loss": 0.5517, "step": 92000 }, { "epoch": 5.13, "learning_rate": 0.0002, "loss": 0.5693, "step": 92100 }, { "epoch": 5.14, "learning_rate": 0.0002, "loss": 0.5496, "step": 92200 }, { "epoch": 5.14, "learning_rate": 0.0002, "loss": 0.5557, "step": 92300 }, { "epoch": 5.15, "learning_rate": 0.0002, "loss": 0.5545, "step": 92400 }, { "epoch": 5.15, "learning_rate": 0.0002, "loss": 0.5533, "step": 92500 }, { "epoch": 5.16, "learning_rate": 0.0002, "loss": 0.544, "step": 92600 }, { "epoch": 5.17, "learning_rate": 0.0002, "loss": 0.5582, "step": 92700 }, { "epoch": 5.17, "learning_rate": 0.0002, "loss": 0.5517, "step": 92800 }, { "epoch": 5.18, "learning_rate": 0.0002, "loss": 0.5585, "step": 92900 }, { "epoch": 5.18, "learning_rate": 0.0002, "loss": 0.5561, "step": 93000 }, { "epoch": 5.19, "learning_rate": 0.0002, "loss": 0.5446, "step": 93100 }, { "epoch": 5.19, "learning_rate": 0.0002, "loss": 0.5587, "step": 93200 }, { "epoch": 5.2, "learning_rate": 0.0002, "loss": 0.5644, "step": 93300 }, { "epoch": 5.2, "learning_rate": 0.0002, "loss": 0.5549, "step": 93400 }, { "epoch": 5.21, "learning_rate": 0.0002, "loss": 0.5679, "step": 93500 }, { "epoch": 5.22, "learning_rate": 0.0002, "loss": 0.5663, "step": 93600 }, { "epoch": 5.22, "learning_rate": 0.0002, "loss": 0.5499, "step": 93700 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 0.561, "step": 93800 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 0.5538, "step": 93900 }, { "epoch": 5.24, "learning_rate": 0.0002, "loss": 0.5563, "step": 94000 }, { "epoch": 5.24, "learning_rate": 0.0002, "loss": 0.5515, "step": 94100 }, { "epoch": 5.25, "learning_rate": 0.0002, "loss": 0.5626, "step": 94200 }, { "epoch": 5.25, "learning_rate": 0.0002, "loss": 0.5562, "step": 94300 }, { "epoch": 5.26, "learning_rate": 0.0002, "loss": 0.5474, "step": 94400 }, { "epoch": 5.27, "learning_rate": 0.0002, "loss": 0.5698, "step": 94500 }, { "epoch": 5.27, "learning_rate": 0.0002, "loss": 0.5642, "step": 94600 }, { "epoch": 5.28, "learning_rate": 0.0002, "loss": 0.5564, "step": 94700 }, { "epoch": 5.28, "learning_rate": 0.0002, "loss": 0.5562, "step": 94800 }, { "epoch": 5.29, "learning_rate": 0.0002, "loss": 0.5623, "step": 94900 }, { "epoch": 5.29, "learning_rate": 0.0002, "loss": 0.5565, "step": 95000 }, { "epoch": 5.3, "learning_rate": 0.0002, "loss": 0.565, "step": 95100 }, { "epoch": 5.31, "learning_rate": 0.0002, "loss": 0.5618, "step": 95200 }, { "epoch": 5.31, "learning_rate": 0.0002, "loss": 0.5586, "step": 95300 }, { "epoch": 5.32, "learning_rate": 0.0002, "loss": 0.5572, "step": 95400 }, { "epoch": 5.32, "learning_rate": 0.0002, "loss": 0.5605, "step": 95500 }, { "epoch": 5.33, "learning_rate": 0.0002, "loss": 0.563, "step": 95600 }, { "epoch": 5.33, "learning_rate": 0.0002, "loss": 0.5688, "step": 95700 }, { "epoch": 5.34, "learning_rate": 0.0002, "loss": 0.5562, "step": 95800 }, { "epoch": 5.34, "learning_rate": 0.0002, "loss": 0.5529, "step": 95900 }, { "epoch": 5.35, "learning_rate": 0.0002, "loss": 0.5564, "step": 96000 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 0.5708, "step": 96100 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 0.5667, "step": 96200 }, { "epoch": 5.37, "learning_rate": 0.0002, "loss": 0.552, "step": 96300 }, { "epoch": 5.37, "learning_rate": 0.0002, "loss": 0.5715, "step": 96400 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 0.5697, "step": 96500 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 0.5552, "step": 96600 }, { "epoch": 5.39, "learning_rate": 0.0002, "loss": 0.5791, "step": 96700 }, { "epoch": 5.39, "learning_rate": 0.0002, "loss": 0.5527, "step": 96800 }, { "epoch": 5.4, "learning_rate": 0.0002, "loss": 0.5761, "step": 96900 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 0.5725, "step": 97000 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 0.5731, "step": 97100 }, { "epoch": 5.42, "learning_rate": 0.0002, "loss": 0.5688, "step": 97200 }, { "epoch": 5.42, "learning_rate": 0.0002, "loss": 0.555, "step": 97300 }, { "epoch": 5.43, "learning_rate": 0.0002, "loss": 0.564, "step": 97400 }, { "epoch": 5.43, "learning_rate": 0.0002, "loss": 0.5729, "step": 97500 }, { "epoch": 5.44, "learning_rate": 0.0002, "loss": 0.5668, "step": 97600 }, { "epoch": 5.44, "learning_rate": 0.0002, "loss": 0.5735, "step": 97700 }, { "epoch": 5.45, "learning_rate": 0.0002, "loss": 0.566, "step": 97800 }, { "epoch": 5.46, "learning_rate": 0.0002, "loss": 0.5525, "step": 97900 }, { "epoch": 5.46, "learning_rate": 0.0002, "loss": 0.5592, "step": 98000 }, { "epoch": 5.47, "learning_rate": 0.0002, "loss": 0.5664, "step": 98100 }, { "epoch": 5.47, "learning_rate": 0.0002, "loss": 0.5622, "step": 98200 }, { "epoch": 5.48, "learning_rate": 0.0002, "loss": 0.5678, "step": 98300 }, { "epoch": 5.48, "learning_rate": 0.0002, "loss": 0.5802, "step": 98400 }, { "epoch": 5.49, "learning_rate": 0.0002, "loss": 0.5661, "step": 98500 }, { "epoch": 5.49, "learning_rate": 0.0002, "loss": 0.5603, "step": 98600 }, { "epoch": 5.5, "learning_rate": 0.0002, "loss": 0.5653, "step": 98700 }, { "epoch": 5.51, "learning_rate": 0.0002, "loss": 0.5568, "step": 98800 }, { "epoch": 5.51, "learning_rate": 0.0002, "loss": 0.5695, "step": 98900 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 0.5688, "step": 99000 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 0.5689, "step": 99100 }, { "epoch": 5.53, "learning_rate": 0.0002, "loss": 0.5637, "step": 99200 }, { "epoch": 5.53, "learning_rate": 0.0002, "loss": 0.5671, "step": 99300 }, { "epoch": 5.54, "learning_rate": 0.0002, "loss": 0.5667, "step": 99400 }, { "epoch": 5.54, "learning_rate": 0.0002, "loss": 0.5797, "step": 99500 }, { "epoch": 5.55, "learning_rate": 0.0002, "loss": 0.5687, "step": 99600 }, { "epoch": 5.56, "learning_rate": 0.0002, "loss": 0.5743, "step": 99700 }, { "epoch": 5.56, "learning_rate": 0.0002, "loss": 0.575, "step": 99800 }, { "epoch": 5.57, "learning_rate": 0.0002, "loss": 0.5626, "step": 99900 }, { "epoch": 5.57, "learning_rate": 0.0002, "loss": 0.5662, "step": 100000 }, { "epoch": 5.57, "eval_gen_len": 18.99985038898863, "eval_loss": 2.673166275024414, "eval_rouge1": 25.3971, "eval_rouge2": 12.406, "eval_rougeL": 20.9243, "eval_rougeLsum": 23.9681, "eval_runtime": 474.5023, "eval_samples_per_second": 28.173, "eval_steps_per_second": 1.762, "step": 100000 }, { "epoch": 5.58, "learning_rate": 0.0002, "loss": 0.5734, "step": 100100 }, { "epoch": 5.58, "learning_rate": 0.0002, "loss": 0.562, "step": 100200 }, { "epoch": 5.59, "learning_rate": 0.0002, "loss": 0.5777, "step": 100300 }, { "epoch": 5.59, "learning_rate": 0.0002, "loss": 0.5653, "step": 100400 }, { "epoch": 5.6, "learning_rate": 0.0002, "loss": 0.562, "step": 100500 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 0.5713, "step": 100600 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 0.5686, "step": 100700 }, { "epoch": 5.62, "learning_rate": 0.0002, "loss": 0.5673, "step": 100800 }, { "epoch": 5.62, "learning_rate": 0.0002, "loss": 0.5635, "step": 100900 }, { "epoch": 5.63, "learning_rate": 0.0002, "loss": 0.5625, "step": 101000 }, { "epoch": 5.63, "learning_rate": 0.0002, "loss": 0.5609, "step": 101100 }, { "epoch": 5.64, "learning_rate": 0.0002, "loss": 0.5761, "step": 101200 }, { "epoch": 5.65, "learning_rate": 0.0002, "loss": 0.5744, "step": 101300 }, { "epoch": 5.65, "learning_rate": 0.0002, "loss": 0.5672, "step": 101400 }, { "epoch": 5.66, "learning_rate": 0.0002, "loss": 0.5728, "step": 101500 }, { "epoch": 5.66, "learning_rate": 0.0002, "loss": 0.5725, "step": 101600 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 0.5741, "step": 101700 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 0.5661, "step": 101800 }, { "epoch": 5.68, "learning_rate": 0.0002, "loss": 0.5594, "step": 101900 }, { "epoch": 5.68, "learning_rate": 0.0002, "loss": 0.5622, "step": 102000 }, { "epoch": 5.69, "learning_rate": 0.0002, "loss": 0.5683, "step": 102100 }, { "epoch": 5.7, "learning_rate": 0.0002, "loss": 0.5642, "step": 102200 }, { "epoch": 5.7, "learning_rate": 0.0002, "loss": 0.5788, "step": 102300 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 0.564, "step": 102400 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 0.5563, "step": 102500 }, { "epoch": 5.72, "learning_rate": 0.0002, "loss": 0.5587, "step": 102600 }, { "epoch": 5.72, "learning_rate": 0.0002, "loss": 0.5744, "step": 102700 }, { "epoch": 5.73, "learning_rate": 0.0002, "loss": 0.5776, "step": 102800 }, { "epoch": 5.73, "learning_rate": 0.0002, "loss": 0.5711, "step": 102900 }, { "epoch": 5.74, "learning_rate": 0.0002, "loss": 0.5646, "step": 103000 }, { "epoch": 5.75, "learning_rate": 0.0002, "loss": 0.569, "step": 103100 }, { "epoch": 5.75, "learning_rate": 0.0002, "loss": 0.5704, "step": 103200 }, { "epoch": 5.76, "learning_rate": 0.0002, "loss": 0.5439, "step": 103300 }, { "epoch": 5.76, "learning_rate": 0.0002, "loss": 0.5685, "step": 103400 }, { "epoch": 5.77, "learning_rate": 0.0002, "loss": 0.5519, "step": 103500 }, { "epoch": 5.77, "learning_rate": 0.0002, "loss": 0.5806, "step": 103600 }, { "epoch": 5.78, "learning_rate": 0.0002, "loss": 0.5666, "step": 103700 }, { "epoch": 5.78, "learning_rate": 0.0002, "loss": 0.5706, "step": 103800 }, { "epoch": 5.79, "learning_rate": 0.0002, "loss": 0.5681, "step": 103900 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 0.5766, "step": 104000 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 0.584, "step": 104100 }, { "epoch": 5.81, "learning_rate": 0.0002, "loss": 0.5765, "step": 104200 }, { "epoch": 5.81, "learning_rate": 0.0002, "loss": 0.5586, "step": 104300 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 0.5694, "step": 104400 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 0.5556, "step": 104500 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 0.5757, "step": 104600 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 0.5501, "step": 104700 }, { "epoch": 5.84, "learning_rate": 0.0002, "loss": 0.5656, "step": 104800 }, { "epoch": 5.85, "learning_rate": 0.0002, "loss": 0.5775, "step": 104900 }, { "epoch": 5.85, "learning_rate": 0.0002, "loss": 0.5772, "step": 105000 }, { "epoch": 5.86, "learning_rate": 0.0002, "loss": 0.5664, "step": 105100 }, { "epoch": 5.86, "learning_rate": 0.0002, "loss": 0.5648, "step": 105200 }, { "epoch": 5.87, "learning_rate": 0.0002, "loss": 0.5565, "step": 105300 }, { "epoch": 5.87, "learning_rate": 0.0002, "loss": 0.5608, "step": 105400 }, { "epoch": 5.88, "learning_rate": 0.0002, "loss": 0.5614, "step": 105500 }, { "epoch": 5.88, "learning_rate": 0.0002, "loss": 0.5664, "step": 105600 }, { "epoch": 5.89, "learning_rate": 0.0002, "loss": 0.5822, "step": 105700 }, { "epoch": 5.9, "learning_rate": 0.0002, "loss": 0.5655, "step": 105800 }, { "epoch": 5.9, "learning_rate": 0.0002, "loss": 0.5814, "step": 105900 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 0.555, "step": 106000 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 0.5776, "step": 106100 }, { "epoch": 5.92, "learning_rate": 0.0002, "loss": 0.5832, "step": 106200 }, { "epoch": 5.92, "learning_rate": 0.0002, "loss": 0.5637, "step": 106300 }, { "epoch": 5.93, "learning_rate": 0.0002, "loss": 0.563, "step": 106400 }, { "epoch": 5.93, "learning_rate": 0.0002, "loss": 0.5648, "step": 106500 }, { "epoch": 5.94, "learning_rate": 0.0002, "loss": 0.5727, "step": 106600 }, { "epoch": 5.95, "learning_rate": 0.0002, "loss": 0.5745, "step": 106700 }, { "epoch": 5.95, "learning_rate": 0.0002, "loss": 0.5797, "step": 106800 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 0.5714, "step": 106900 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 0.5603, "step": 107000 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 0.5671, "step": 107100 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 0.56, "step": 107200 }, { "epoch": 5.98, "learning_rate": 0.0002, "loss": 0.5676, "step": 107300 }, { "epoch": 5.98, "learning_rate": 0.0002, "loss": 0.5907, "step": 107400 }, { "epoch": 5.99, "learning_rate": 0.0002, "loss": 0.5651, "step": 107500 }, { "epoch": 6.0, "learning_rate": 0.0002, "loss": 0.5675, "step": 107600 }, { "epoch": 6.0, "learning_rate": 0.0002, "loss": 0.5716, "step": 107700 }, { "epoch": 6.01, "learning_rate": 0.0002, "loss": 0.5342, "step": 107800 }, { "epoch": 6.01, "learning_rate": 0.0002, "loss": 0.5334, "step": 107900 }, { "epoch": 6.02, "learning_rate": 0.0002, "loss": 0.5439, "step": 108000 }, { "epoch": 6.02, "learning_rate": 0.0002, "loss": 0.5571, "step": 108100 }, { "epoch": 6.03, "learning_rate": 0.0002, "loss": 0.5569, "step": 108200 }, { "epoch": 6.04, "learning_rate": 0.0002, "loss": 0.5569, "step": 108300 }, { "epoch": 6.04, "learning_rate": 0.0002, "loss": 0.5481, "step": 108400 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 0.5507, "step": 108500 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 0.5438, "step": 108600 }, { "epoch": 6.06, "learning_rate": 0.0002, "loss": 0.5498, "step": 108700 }, { "epoch": 6.06, "learning_rate": 0.0002, "loss": 0.55, "step": 108800 }, { "epoch": 6.07, "learning_rate": 0.0002, "loss": 0.5625, "step": 108900 }, { "epoch": 6.07, "learning_rate": 0.0002, "loss": 0.547, "step": 109000 }, { "epoch": 6.08, "learning_rate": 0.0002, "loss": 0.5498, "step": 109100 }, { "epoch": 6.09, "learning_rate": 0.0002, "loss": 0.5467, "step": 109200 }, { "epoch": 6.09, "learning_rate": 0.0002, "loss": 0.5346, "step": 109300 }, { "epoch": 6.1, "learning_rate": 0.0002, "loss": 0.5439, "step": 109400 }, { "epoch": 6.1, "learning_rate": 0.0002, "loss": 0.5395, "step": 109500 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 0.543, "step": 109600 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 0.5494, "step": 109700 }, { "epoch": 6.12, "learning_rate": 0.0002, "loss": 0.5471, "step": 109800 }, { "epoch": 6.12, "learning_rate": 0.0002, "loss": 0.5491, "step": 109900 }, { "epoch": 6.13, "learning_rate": 0.0002, "loss": 0.5515, "step": 110000 }, { "epoch": 6.13, "eval_gen_len": 18.999925194494313, "eval_loss": 2.722730875015259, "eval_rouge1": 25.2536, "eval_rouge2": 12.3269, "eval_rougeL": 20.8448, "eval_rougeLsum": 23.8308, "eval_runtime": 477.19, "eval_samples_per_second": 28.014, "eval_steps_per_second": 1.752, "step": 110000 }, { "epoch": 6.14, "learning_rate": 0.0002, "loss": 0.5518, "step": 110100 }, { "epoch": 6.14, "learning_rate": 0.0002, "loss": 0.5394, "step": 110200 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 0.5362, "step": 110300 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 0.5357, "step": 110400 }, { "epoch": 6.16, "learning_rate": 0.0002, "loss": 0.5489, "step": 110500 }, { "epoch": 6.16, "learning_rate": 0.0002, "loss": 0.5506, "step": 110600 }, { "epoch": 6.17, "learning_rate": 0.0002, "loss": 0.5659, "step": 110700 }, { "epoch": 6.17, "learning_rate": 0.0002, "loss": 0.5502, "step": 110800 }, { "epoch": 6.18, "learning_rate": 0.0002, "loss": 0.5564, "step": 110900 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 0.5411, "step": 111000 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 0.542, "step": 111100 }, { "epoch": 6.2, "learning_rate": 0.0002, "loss": 0.5582, "step": 111200 }, { "epoch": 6.2, "learning_rate": 0.0002, "loss": 0.535, "step": 111300 }, { "epoch": 6.21, "learning_rate": 0.0002, "loss": 0.5458, "step": 111400 }, { "epoch": 6.21, "learning_rate": 0.0002, "loss": 0.5683, "step": 111500 }, { "epoch": 6.22, "learning_rate": 0.0002, "loss": 0.5659, "step": 111600 }, { "epoch": 6.22, "learning_rate": 0.0002, "loss": 0.5536, "step": 111700 }, { "epoch": 6.23, "learning_rate": 0.0002, "loss": 0.5594, "step": 111800 }, { "epoch": 6.24, "learning_rate": 0.0002, "loss": 0.5633, "step": 111900 }, { "epoch": 6.24, "learning_rate": 0.0002, "loss": 0.561, "step": 112000 }, { "epoch": 6.25, "learning_rate": 0.0002, "loss": 0.5485, "step": 112100 }, { "epoch": 6.25, "learning_rate": 0.0002, "loss": 0.5567, "step": 112200 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 0.5568, "step": 112300 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 0.5407, "step": 112400 }, { "epoch": 6.27, "learning_rate": 0.0002, "loss": 0.5433, "step": 112500 }, { "epoch": 6.27, "learning_rate": 0.0002, "loss": 0.5583, "step": 112600 }, { "epoch": 6.28, "learning_rate": 0.0002, "loss": 0.5516, "step": 112700 }, { "epoch": 6.29, "learning_rate": 0.0002, "loss": 0.5493, "step": 112800 }, { "epoch": 6.29, "learning_rate": 0.0002, "loss": 0.5513, "step": 112900 }, { "epoch": 6.3, "learning_rate": 0.0002, "loss": 0.5622, "step": 113000 }, { "epoch": 6.3, "learning_rate": 0.0002, "loss": 0.5431, "step": 113100 }, { "epoch": 6.31, "learning_rate": 0.0002, "loss": 0.5497, "step": 113200 }, { "epoch": 6.31, "learning_rate": 0.0002, "loss": 0.5398, "step": 113300 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 0.5613, "step": 113400 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 0.5565, "step": 113500 }, { "epoch": 6.33, "learning_rate": 0.0002, "loss": 0.5485, "step": 113600 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 0.554, "step": 113700 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 0.5594, "step": 113800 }, { "epoch": 6.35, "learning_rate": 0.0002, "loss": 0.5483, "step": 113900 }, { "epoch": 6.35, "learning_rate": 0.0002, "loss": 0.5468, "step": 114000 }, { "epoch": 6.36, "learning_rate": 0.0002, "loss": 0.5549, "step": 114100 }, { "epoch": 6.36, "learning_rate": 0.0002, "loss": 0.5605, "step": 114200 }, { "epoch": 6.37, "learning_rate": 0.0002, "loss": 0.5541, "step": 114300 }, { "epoch": 6.38, "learning_rate": 0.0002, "loss": 0.5553, "step": 114400 }, { "epoch": 6.38, "learning_rate": 0.0002, "loss": 0.5566, "step": 114500 }, { "epoch": 6.39, "learning_rate": 0.0002, "loss": 0.5599, "step": 114600 }, { "epoch": 6.39, "learning_rate": 0.0002, "loss": 0.5422, "step": 114700 }, { "epoch": 6.4, "learning_rate": 0.0002, "loss": 0.5648, "step": 114800 }, { "epoch": 6.4, "learning_rate": 0.0002, "loss": 0.5511, "step": 114900 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 0.553, "step": 115000 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 0.5412, "step": 115100 }, { "epoch": 6.42, "learning_rate": 0.0002, "loss": 0.5762, "step": 115200 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 0.5528, "step": 115300 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 0.5404, "step": 115400 }, { "epoch": 6.44, "learning_rate": 0.0002, "loss": 0.5462, "step": 115500 }, { "epoch": 6.44, "learning_rate": 0.0002, "loss": 0.5516, "step": 115600 }, { "epoch": 6.45, "learning_rate": 0.0002, "loss": 0.5538, "step": 115700 }, { "epoch": 6.45, "learning_rate": 0.0002, "loss": 0.5509, "step": 115800 }, { "epoch": 6.46, "learning_rate": 0.0002, "loss": 0.5516, "step": 115900 }, { "epoch": 6.46, "learning_rate": 0.0002, "loss": 0.5522, "step": 116000 }, { "epoch": 6.47, "learning_rate": 0.0002, "loss": 0.556, "step": 116100 }, { "epoch": 6.48, "learning_rate": 0.0002, "loss": 0.5657, "step": 116200 }, { "epoch": 6.48, "learning_rate": 0.0002, "loss": 0.5643, "step": 116300 }, { "epoch": 6.49, "learning_rate": 0.0002, "loss": 0.5467, "step": 116400 }, { "epoch": 6.49, "learning_rate": 0.0002, "loss": 0.5562, "step": 116500 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 0.5572, "step": 116600 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 0.5423, "step": 116700 }, { "epoch": 6.51, "learning_rate": 0.0002, "loss": 0.5541, "step": 116800 }, { "epoch": 6.51, "learning_rate": 0.0002, "loss": 0.5601, "step": 116900 }, { "epoch": 6.52, "learning_rate": 0.0002, "loss": 0.5675, "step": 117000 }, { "epoch": 6.53, "learning_rate": 0.0002, "loss": 0.5427, "step": 117100 }, { "epoch": 6.53, "learning_rate": 0.0002, "loss": 0.554, "step": 117200 }, { "epoch": 6.54, "learning_rate": 0.0002, "loss": 0.5649, "step": 117300 }, { "epoch": 6.54, "learning_rate": 0.0002, "loss": 0.556, "step": 117400 }, { "epoch": 6.55, "learning_rate": 0.0002, "loss": 0.5549, "step": 117500 }, { "epoch": 6.55, "learning_rate": 0.0002, "loss": 0.5614, "step": 117600 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 0.5721, "step": 117700 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 0.5596, "step": 117800 }, { "epoch": 6.57, "learning_rate": 0.0002, "loss": 0.5656, "step": 117900 }, { "epoch": 6.58, "learning_rate": 0.0002, "loss": 0.5459, "step": 118000 }, { "epoch": 6.58, "learning_rate": 0.0002, "loss": 0.552, "step": 118100 }, { "epoch": 6.59, "learning_rate": 0.0002, "loss": 0.5642, "step": 118200 }, { "epoch": 6.59, "learning_rate": 0.0002, "loss": 0.5729, "step": 118300 }, { "epoch": 6.6, "learning_rate": 0.0002, "loss": 0.5577, "step": 118400 }, { "epoch": 6.6, "learning_rate": 0.0002, "loss": 0.5576, "step": 118500 }, { "epoch": 6.61, "learning_rate": 0.0002, "loss": 0.5848, "step": 118600 }, { "epoch": 6.61, "learning_rate": 0.0002, "loss": 0.5571, "step": 118700 }, { "epoch": 6.62, "learning_rate": 0.0002, "loss": 0.5502, "step": 118800 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 0.5676, "step": 118900 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 0.5556, "step": 119000 }, { "epoch": 6.64, "learning_rate": 0.0002, "loss": 0.5538, "step": 119100 }, { "epoch": 6.64, "learning_rate": 0.0002, "loss": 0.5527, "step": 119200 }, { "epoch": 6.65, "learning_rate": 0.0002, "loss": 0.5675, "step": 119300 }, { "epoch": 6.65, "learning_rate": 0.0002, "loss": 0.543, "step": 119400 }, { "epoch": 6.66, "learning_rate": 0.0002, "loss": 0.5614, "step": 119500 }, { "epoch": 6.66, "learning_rate": 0.0002, "loss": 0.5577, "step": 119600 }, { "epoch": 6.67, "learning_rate": 0.0002, "loss": 0.5507, "step": 119700 }, { "epoch": 6.68, "learning_rate": 0.0002, "loss": 0.562, "step": 119800 }, { "epoch": 6.68, "learning_rate": 0.0002, "loss": 0.554, "step": 119900 }, { "epoch": 6.69, "learning_rate": 0.0002, "loss": 0.5651, "step": 120000 }, { "epoch": 6.69, "eval_gen_len": 18.99955116696589, "eval_loss": 2.6814467906951904, "eval_rouge1": 25.3277, "eval_rouge2": 12.4166, "eval_rougeL": 20.9334, "eval_rougeLsum": 23.9166, "eval_runtime": 477.4979, "eval_samples_per_second": 27.996, "eval_steps_per_second": 1.751, "step": 120000 }, { "epoch": 6.69, "learning_rate": 0.0002, "loss": 0.5714, "step": 120100 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 0.5739, "step": 120200 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 0.552, "step": 120300 }, { "epoch": 6.71, "learning_rate": 0.0002, "loss": 0.5548, "step": 120400 }, { "epoch": 6.71, "learning_rate": 0.0002, "loss": 0.5658, "step": 120500 }, { "epoch": 6.72, "learning_rate": 0.0002, "loss": 0.5729, "step": 120600 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 0.5614, "step": 120700 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 0.5745, "step": 120800 }, { "epoch": 6.74, "learning_rate": 0.0002, "loss": 0.5595, "step": 120900 }, { "epoch": 6.74, "learning_rate": 0.0002, "loss": 0.5583, "step": 121000 }, { "epoch": 6.75, "learning_rate": 0.0002, "loss": 0.553, "step": 121100 }, { "epoch": 6.75, "learning_rate": 0.0002, "loss": 0.5788, "step": 121200 }, { "epoch": 6.76, "learning_rate": 0.0002, "loss": 0.5535, "step": 121300 }, { "epoch": 6.77, "learning_rate": 0.0002, "loss": 0.5666, "step": 121400 }, { "epoch": 6.77, "learning_rate": 0.0002, "loss": 0.5616, "step": 121500 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 0.5619, "step": 121600 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 0.559, "step": 121700 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 0.5806, "step": 121800 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 0.5696, "step": 121900 }, { "epoch": 6.8, "learning_rate": 0.0002, "loss": 0.5667, "step": 122000 }, { "epoch": 6.8, "learning_rate": 0.0002, "loss": 0.5452, "step": 122100 }, { "epoch": 6.81, "learning_rate": 0.0002, "loss": 0.5609, "step": 122200 }, { "epoch": 6.82, "learning_rate": 0.0002, "loss": 0.5703, "step": 122300 }, { "epoch": 6.82, "learning_rate": 0.0002, "loss": 0.5568, "step": 122400 }, { "epoch": 6.83, "learning_rate": 0.0002, "loss": 0.565, "step": 122500 }, { "epoch": 6.83, "learning_rate": 0.0002, "loss": 0.5556, "step": 122600 }, { "epoch": 6.84, "learning_rate": 0.0002, "loss": 0.5577, "step": 122700 }, { "epoch": 6.84, "learning_rate": 0.0002, "loss": 0.5625, "step": 122800 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 0.5589, "step": 122900 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 0.5728, "step": 123000 }, { "epoch": 6.86, "learning_rate": 0.0002, "loss": 0.5534, "step": 123100 }, { "epoch": 6.87, "learning_rate": 0.0002, "loss": 0.5592, "step": 123200 }, { "epoch": 6.87, "learning_rate": 0.0002, "loss": 0.5646, "step": 123300 }, { "epoch": 6.88, "learning_rate": 0.0002, "loss": 0.5774, "step": 123400 }, { "epoch": 6.88, "learning_rate": 0.0002, "loss": 0.5629, "step": 123500 }, { "epoch": 6.89, "learning_rate": 0.0002, "loss": 0.5676, "step": 123600 }, { "epoch": 6.89, "learning_rate": 0.0002, "loss": 0.5734, "step": 123700 }, { "epoch": 6.9, "learning_rate": 0.0002, "loss": 0.5566, "step": 123800 }, { "epoch": 6.9, "learning_rate": 0.0002, "loss": 0.5603, "step": 123900 }, { "epoch": 6.91, "learning_rate": 0.0002, "loss": 0.5758, "step": 124000 }, { "epoch": 6.92, "learning_rate": 0.0002, "loss": 0.57, "step": 124100 }, { "epoch": 6.92, "learning_rate": 0.0002, "loss": 0.5823, "step": 124200 }, { "epoch": 6.93, "learning_rate": 0.0002, "loss": 0.5699, "step": 124300 }, { "epoch": 6.93, "learning_rate": 0.0002, "loss": 0.5487, "step": 124400 }, { "epoch": 6.94, "learning_rate": 0.0002, "loss": 0.561, "step": 124500 }, { "epoch": 6.94, "learning_rate": 0.0002, "loss": 0.585, "step": 124600 }, { "epoch": 6.95, "learning_rate": 0.0002, "loss": 0.5693, "step": 124700 }, { "epoch": 6.95, "learning_rate": 0.0002, "loss": 0.5728, "step": 124800 }, { "epoch": 6.96, "learning_rate": 0.0002, "loss": 0.5794, "step": 124900 }, { "epoch": 6.97, "learning_rate": 0.0002, "loss": 0.5483, "step": 125000 }, { "epoch": 6.97, "learning_rate": 0.0002, "loss": 0.56, "step": 125100 }, { "epoch": 6.98, "learning_rate": 0.0002, "loss": 0.5635, "step": 125200 }, { "epoch": 6.98, "learning_rate": 0.0002, "loss": 0.5735, "step": 125300 }, { "epoch": 6.99, "learning_rate": 0.0002, "loss": 0.5811, "step": 125400 }, { "epoch": 6.99, "learning_rate": 0.0002, "loss": 0.5757, "step": 125500 }, { "epoch": 7.0, "learning_rate": 0.0002, "loss": 0.5567, "step": 125600 }, { "epoch": 7.0, "learning_rate": 0.0002, "loss": 0.5306, "step": 125700 }, { "epoch": 7.01, "learning_rate": 0.0002, "loss": 0.5365, "step": 125800 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 0.5386, "step": 125900 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 0.5402, "step": 126000 }, { "epoch": 7.03, "learning_rate": 0.0002, "loss": 0.5341, "step": 126100 }, { "epoch": 7.03, "learning_rate": 0.0002, "loss": 0.5384, "step": 126200 }, { "epoch": 7.04, "learning_rate": 0.0002, "loss": 0.5285, "step": 126300 }, { "epoch": 7.04, "learning_rate": 0.0002, "loss": 0.553, "step": 126400 }, { "epoch": 7.05, "learning_rate": 0.0002, "loss": 0.5334, "step": 126500 }, { "epoch": 7.05, "learning_rate": 0.0002, "loss": 0.5277, "step": 126600 }, { "epoch": 7.06, "learning_rate": 0.0002, "loss": 0.56, "step": 126700 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 0.5326, "step": 126800 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 0.5438, "step": 126900 }, { "epoch": 7.08, "learning_rate": 0.0002, "loss": 0.551, "step": 127000 }, { "epoch": 7.08, "learning_rate": 0.0002, "loss": 0.5461, "step": 127100 }, { "epoch": 7.09, "learning_rate": 0.0002, "loss": 0.5438, "step": 127200 }, { "epoch": 7.09, "learning_rate": 0.0002, "loss": 0.5459, "step": 127300 }, { "epoch": 7.1, "learning_rate": 0.0002, "loss": 0.5509, "step": 127400 }, { "epoch": 7.11, "learning_rate": 0.0002, "loss": 0.5329, "step": 127500 }, { "epoch": 7.11, "learning_rate": 0.0002, "loss": 0.5525, "step": 127600 }, { "epoch": 7.12, "learning_rate": 0.0002, "loss": 0.5337, "step": 127700 }, { "epoch": 7.12, "learning_rate": 0.0002, "loss": 0.541, "step": 127800 }, { "epoch": 7.13, "learning_rate": 0.0002, "loss": 0.5577, "step": 127900 }, { "epoch": 7.13, "learning_rate": 0.0002, "loss": 0.546, "step": 128000 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 0.5478, "step": 128100 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 0.5394, "step": 128200 }, { "epoch": 7.15, "learning_rate": 0.0002, "loss": 0.5302, "step": 128300 }, { "epoch": 7.16, "learning_rate": 0.0002, "loss": 0.5545, "step": 128400 }, { "epoch": 7.16, "learning_rate": 0.0002, "loss": 0.532, "step": 128500 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 0.5468, "step": 128600 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 0.5394, "step": 128700 }, { "epoch": 7.18, "learning_rate": 0.0002, "loss": 0.542, "step": 128800 }, { "epoch": 7.18, "learning_rate": 0.0002, "loss": 0.5319, "step": 128900 }, { "epoch": 7.19, "learning_rate": 0.0002, "loss": 0.5431, "step": 129000 }, { "epoch": 7.19, "learning_rate": 0.0002, "loss": 0.5413, "step": 129100 }, { "epoch": 7.2, "learning_rate": 0.0002, "loss": 0.5368, "step": 129200 }, { "epoch": 7.21, "learning_rate": 0.0002, "loss": 0.5458, "step": 129300 }, { "epoch": 7.21, "learning_rate": 0.0002, "loss": 0.5405, "step": 129400 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 0.5543, "step": 129500 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 0.5318, "step": 129600 }, { "epoch": 7.23, "learning_rate": 0.0002, "loss": 0.5489, "step": 129700 }, { "epoch": 7.23, "learning_rate": 0.0002, "loss": 0.5346, "step": 129800 }, { "epoch": 7.24, "learning_rate": 0.0002, "loss": 0.5403, "step": 129900 }, { "epoch": 7.24, "learning_rate": 0.0002, "loss": 0.5442, "step": 130000 }, { "epoch": 7.24, "eval_gen_len": 18.99970077797726, "eval_loss": 2.7132627964019775, "eval_rouge1": 25.3009, "eval_rouge2": 12.3948, "eval_rougeL": 20.9085, "eval_rougeLsum": 23.8773, "eval_runtime": 477.2595, "eval_samples_per_second": 28.01, "eval_steps_per_second": 1.752, "step": 130000 }, { "epoch": 7.25, "learning_rate": 0.0002, "loss": 0.5588, "step": 130100 }, { "epoch": 7.26, "learning_rate": 0.0002, "loss": 0.5397, "step": 130200 }, { "epoch": 7.26, "learning_rate": 0.0002, "loss": 0.5392, "step": 130300 }, { "epoch": 7.27, "learning_rate": 0.0002, "loss": 0.538, "step": 130400 }, { "epoch": 7.27, "learning_rate": 0.0002, "loss": 0.5464, "step": 130500 }, { "epoch": 7.28, "learning_rate": 0.0002, "loss": 0.5475, "step": 130600 }, { "epoch": 7.28, "learning_rate": 0.0002, "loss": 0.547, "step": 130700 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 0.5494, "step": 130800 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 0.5357, "step": 130900 }, { "epoch": 7.3, "learning_rate": 0.0002, "loss": 0.5518, "step": 131000 }, { "epoch": 7.31, "learning_rate": 0.0002, "loss": 0.546, "step": 131100 }, { "epoch": 7.31, "learning_rate": 0.0002, "loss": 0.5416, "step": 131200 }, { "epoch": 7.32, "learning_rate": 0.0002, "loss": 0.5489, "step": 131300 }, { "epoch": 7.32, "learning_rate": 0.0002, "loss": 0.5483, "step": 131400 }, { "epoch": 7.33, "learning_rate": 0.0002, "loss": 0.5557, "step": 131500 }, { "epoch": 7.33, "learning_rate": 0.0002, "loss": 0.5605, "step": 131600 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 0.5532, "step": 131700 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 0.5482, "step": 131800 }, { "epoch": 7.35, "learning_rate": 0.0002, "loss": 0.5482, "step": 131900 }, { "epoch": 7.36, "learning_rate": 0.0002, "loss": 0.5458, "step": 132000 }, { "epoch": 7.36, "learning_rate": 0.0002, "loss": 0.5499, "step": 132100 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 0.5537, "step": 132200 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 0.5375, "step": 132300 }, { "epoch": 7.38, "learning_rate": 0.0002, "loss": 0.5515, "step": 132400 }, { "epoch": 7.38, "learning_rate": 0.0002, "loss": 0.5499, "step": 132500 }, { "epoch": 7.39, "learning_rate": 0.0002, "loss": 0.535, "step": 132600 }, { "epoch": 7.39, "learning_rate": 0.0002, "loss": 0.5537, "step": 132700 }, { "epoch": 7.4, "learning_rate": 0.0002, "loss": 0.5498, "step": 132800 }, { "epoch": 7.41, "learning_rate": 0.0002, "loss": 0.5648, "step": 132900 }, { "epoch": 7.41, "learning_rate": 0.0002, "loss": 0.5372, "step": 133000 }, { "epoch": 7.42, "learning_rate": 0.0002, "loss": 0.5622, "step": 133100 }, { "epoch": 7.42, "learning_rate": 0.0002, "loss": 0.5463, "step": 133200 }, { "epoch": 7.43, "learning_rate": 0.0002, "loss": 0.5639, "step": 133300 }, { "epoch": 7.43, "learning_rate": 0.0002, "loss": 0.5437, "step": 133400 }, { "epoch": 7.44, "learning_rate": 0.0002, "loss": 0.542, "step": 133500 }, { "epoch": 7.44, "learning_rate": 0.0002, "loss": 0.5435, "step": 133600 }, { "epoch": 7.45, "learning_rate": 0.0002, "loss": 0.5531, "step": 133700 }, { "epoch": 7.46, "learning_rate": 0.0002, "loss": 0.5522, "step": 133800 }, { "epoch": 7.46, "learning_rate": 0.0002, "loss": 0.5569, "step": 133900 }, { "epoch": 7.47, "learning_rate": 0.0002, "loss": 0.5489, "step": 134000 }, { "epoch": 7.47, "learning_rate": 0.0002, "loss": 0.5459, "step": 134100 }, { "epoch": 7.48, "learning_rate": 0.0002, "loss": 0.5584, "step": 134200 }, { "epoch": 7.48, "learning_rate": 0.0002, "loss": 0.5563, "step": 134300 }, { "epoch": 7.49, "learning_rate": 0.0002, "loss": 0.5459, "step": 134400 }, { "epoch": 7.5, "learning_rate": 0.0002, "loss": 0.5389, "step": 134500 }, { "epoch": 7.5, "learning_rate": 0.0002, "loss": 0.5451, "step": 134600 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 0.5435, "step": 134700 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 0.5538, "step": 134800 }, { "epoch": 7.52, "learning_rate": 0.0002, "loss": 0.5466, "step": 134900 }, { "epoch": 7.52, "learning_rate": 0.0002, "loss": 0.5522, "step": 135000 }, { "epoch": 7.53, "learning_rate": 0.0002, "loss": 0.5564, "step": 135100 }, { "epoch": 7.53, "learning_rate": 0.0002, "loss": 0.5531, "step": 135200 }, { "epoch": 7.54, "learning_rate": 0.0002, "loss": 0.5478, "step": 135300 }, { "epoch": 7.55, "learning_rate": 0.0002, "loss": 0.5715, "step": 135400 }, { "epoch": 7.55, "learning_rate": 0.0002, "loss": 0.5547, "step": 135500 }, { "epoch": 7.56, "learning_rate": 0.0002, "loss": 0.5514, "step": 135600 }, { "epoch": 7.56, "learning_rate": 0.0002, "loss": 0.5506, "step": 135700 }, { "epoch": 7.57, "learning_rate": 0.0002, "loss": 0.547, "step": 135800 }, { "epoch": 7.57, "learning_rate": 0.0002, "loss": 0.5566, "step": 135900 }, { "epoch": 7.58, "learning_rate": 0.0002, "loss": 0.5594, "step": 136000 }, { "epoch": 7.58, "learning_rate": 0.0002, "loss": 0.5524, "step": 136100 }, { "epoch": 7.59, "learning_rate": 0.0002, "loss": 0.5635, "step": 136200 }, { "epoch": 7.6, "learning_rate": 0.0002, "loss": 0.5551, "step": 136300 }, { "epoch": 7.6, "learning_rate": 0.0002, "loss": 0.553, "step": 136400 }, { "epoch": 7.61, "learning_rate": 0.0002, "loss": 0.5664, "step": 136500 }, { "epoch": 7.61, "learning_rate": 0.0002, "loss": 0.5394, "step": 136600 }, { "epoch": 7.62, "learning_rate": 0.0002, "loss": 0.5626, "step": 136700 }, { "epoch": 7.62, "learning_rate": 0.0002, "loss": 0.5541, "step": 136800 }, { "epoch": 7.63, "learning_rate": 0.0002, "loss": 0.5575, "step": 136900 }, { "epoch": 7.63, "learning_rate": 0.0002, "loss": 0.5567, "step": 137000 }, { "epoch": 7.64, "learning_rate": 0.0002, "loss": 0.5554, "step": 137100 }, { "epoch": 7.65, "learning_rate": 0.0002, "loss": 0.5423, "step": 137200 }, { "epoch": 7.65, "learning_rate": 0.0002, "loss": 0.5524, "step": 137300 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 0.5636, "step": 137400 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 0.5629, "step": 137500 }, { "epoch": 7.67, "learning_rate": 0.0002, "loss": 0.5536, "step": 137600 }, { "epoch": 7.67, "learning_rate": 0.0002, "loss": 0.5506, "step": 137700 }, { "epoch": 7.68, "learning_rate": 0.0002, "loss": 0.5601, "step": 137800 }, { "epoch": 7.68, "learning_rate": 0.0002, "loss": 0.5649, "step": 137900 }, { "epoch": 7.69, "learning_rate": 0.0002, "loss": 0.5438, "step": 138000 }, { "epoch": 7.7, "learning_rate": 0.0002, "loss": 0.5511, "step": 138100 }, { "epoch": 7.7, "learning_rate": 0.0002, "loss": 0.5453, "step": 138200 }, { "epoch": 7.71, "learning_rate": 0.0002, "loss": 0.5592, "step": 138300 }, { "epoch": 7.71, "learning_rate": 0.0002, "loss": 0.5518, "step": 138400 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 0.5637, "step": 138500 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 0.5533, "step": 138600 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 0.5615, "step": 138700 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 0.549, "step": 138800 }, { "epoch": 7.74, "learning_rate": 0.0002, "loss": 0.5433, "step": 138900 }, { "epoch": 7.75, "learning_rate": 0.0002, "loss": 0.5614, "step": 139000 }, { "epoch": 7.75, "learning_rate": 0.0002, "loss": 0.5621, "step": 139100 }, { "epoch": 7.76, "learning_rate": 0.0002, "loss": 0.5581, "step": 139200 }, { "epoch": 7.76, "learning_rate": 0.0002, "loss": 0.5498, "step": 139300 }, { "epoch": 7.77, "learning_rate": 0.0002, "loss": 0.5725, "step": 139400 }, { "epoch": 7.77, "learning_rate": 0.0002, "loss": 0.5553, "step": 139500 }, { "epoch": 7.78, "learning_rate": 0.0002, "loss": 0.556, "step": 139600 }, { "epoch": 7.78, "learning_rate": 0.0002, "loss": 0.5517, "step": 139700 }, { "epoch": 7.79, "learning_rate": 0.0002, "loss": 0.5554, "step": 139800 }, { "epoch": 7.8, "learning_rate": 0.0002, "loss": 0.5647, "step": 139900 }, { "epoch": 7.8, "learning_rate": 0.0002, "loss": 0.5574, "step": 140000 }, { "epoch": 7.8, "eval_gen_len": 18.999027528426094, "eval_loss": 2.6958916187286377, "eval_rouge1": 25.3491, "eval_rouge2": 12.3925, "eval_rougeL": 20.911, "eval_rougeLsum": 23.9331, "eval_runtime": 475.0144, "eval_samples_per_second": 28.142, "eval_steps_per_second": 1.76, "step": 140000 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 0.5604, "step": 140100 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 0.5708, "step": 140200 }, { "epoch": 7.82, "learning_rate": 0.0002, "loss": 0.5585, "step": 140300 }, { "epoch": 7.82, "learning_rate": 0.0002, "loss": 0.5554, "step": 140400 }, { "epoch": 7.83, "learning_rate": 0.0002, "loss": 0.5517, "step": 140500 }, { "epoch": 7.84, "learning_rate": 0.0002, "loss": 0.5564, "step": 140600 }, { "epoch": 7.84, "learning_rate": 0.0002, "loss": 0.5588, "step": 140700 }, { "epoch": 7.85, "learning_rate": 0.0002, "loss": 0.5506, "step": 140800 }, { "epoch": 7.85, "learning_rate": 0.0002, "loss": 0.55, "step": 140900 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 0.5656, "step": 141000 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 0.5566, "step": 141100 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 0.5513, "step": 141200 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 0.5572, "step": 141300 }, { "epoch": 7.88, "learning_rate": 0.0002, "loss": 0.5448, "step": 141400 }, { "epoch": 7.89, "learning_rate": 0.0002, "loss": 0.5676, "step": 141500 }, { "epoch": 7.89, "learning_rate": 0.0002, "loss": 0.5592, "step": 141600 }, { "epoch": 7.9, "learning_rate": 0.0002, "loss": 0.5619, "step": 141700 }, { "epoch": 7.9, "learning_rate": 0.0002, "loss": 0.5511, "step": 141800 }, { "epoch": 7.91, "learning_rate": 0.0002, "loss": 0.557, "step": 141900 }, { "epoch": 7.91, "learning_rate": 0.0002, "loss": 0.5537, "step": 142000 }, { "epoch": 7.92, "learning_rate": 0.0002, "loss": 0.5544, "step": 142100 }, { "epoch": 7.92, "learning_rate": 0.0002, "loss": 0.5628, "step": 142200 }, { "epoch": 7.93, "learning_rate": 0.0002, "loss": 0.5547, "step": 142300 }, { "epoch": 7.94, "learning_rate": 0.0002, "loss": 0.5657, "step": 142400 }, { "epoch": 7.94, "learning_rate": 0.0002, "loss": 0.5575, "step": 142500 }, { "epoch": 7.95, "learning_rate": 0.0002, "loss": 0.5755, "step": 142600 }, { "epoch": 7.95, "learning_rate": 0.0002, "loss": 0.5491, "step": 142700 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 0.5622, "step": 142800 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 0.5463, "step": 142900 }, { "epoch": 7.97, "learning_rate": 0.0002, "loss": 0.5626, "step": 143000 }, { "epoch": 7.97, "learning_rate": 0.0002, "loss": 0.5753, "step": 143100 }, { "epoch": 7.98, "learning_rate": 0.0002, "loss": 0.5586, "step": 143200 }, { "epoch": 7.99, "learning_rate": 0.0002, "loss": 0.5518, "step": 143300 }, { "epoch": 7.99, "learning_rate": 0.0002, "loss": 0.5666, "step": 143400 }, { "epoch": 8.0, "learning_rate": 0.0002, "loss": 0.5732, "step": 143500 }, { "epoch": 8.0, "learning_rate": 0.0002, "loss": 0.5422, "step": 143600 }, { "epoch": 8.01, "learning_rate": 0.0002, "loss": 0.5353, "step": 143700 }, { "epoch": 8.01, "learning_rate": 0.0002, "loss": 0.5337, "step": 143800 }, { "epoch": 8.02, "learning_rate": 0.0002, "loss": 0.5239, "step": 143900 }, { "epoch": 8.02, "learning_rate": 0.0002, "loss": 0.526, "step": 144000 }, { "epoch": 8.03, "learning_rate": 0.0002, "loss": 0.5331, "step": 144100 }, { "epoch": 8.04, "learning_rate": 0.0002, "loss": 0.537, "step": 144200 }, { "epoch": 8.04, "learning_rate": 0.0002, "loss": 0.5256, "step": 144300 }, { "epoch": 8.05, "learning_rate": 0.0002, "loss": 0.5276, "step": 144400 }, { "epoch": 8.05, "learning_rate": 0.0002, "loss": 0.5404, "step": 144500 }, { "epoch": 8.06, "learning_rate": 0.0002, "loss": 0.53, "step": 144600 }, { "epoch": 8.06, "learning_rate": 0.0002, "loss": 0.5364, "step": 144700 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 0.5441, "step": 144800 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 0.5423, "step": 144900 }, { "epoch": 8.08, "learning_rate": 0.0002, "loss": 0.5399, "step": 145000 }, { "epoch": 8.09, "learning_rate": 0.0002, "loss": 0.5367, "step": 145100 }, { "epoch": 8.09, "learning_rate": 0.0002, "loss": 0.5313, "step": 145200 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 0.5339, "step": 145300 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 0.5247, "step": 145400 }, { "epoch": 8.11, "learning_rate": 0.0002, "loss": 0.545, "step": 145500 }, { "epoch": 8.11, "learning_rate": 0.0002, "loss": 0.53, "step": 145600 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 0.5461, "step": 145700 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 0.5278, "step": 145800 }, { "epoch": 8.13, "learning_rate": 0.0002, "loss": 0.5479, "step": 145900 }, { "epoch": 8.14, "learning_rate": 0.0002, "loss": 0.5442, "step": 146000 }, { "epoch": 8.14, "learning_rate": 0.0002, "loss": 0.5381, "step": 146100 }, { "epoch": 8.15, "learning_rate": 0.0002, "loss": 0.5421, "step": 146200 }, { "epoch": 8.15, "learning_rate": 0.0002, "loss": 0.5307, "step": 146300 }, { "epoch": 8.16, "learning_rate": 0.0002, "loss": 0.5392, "step": 146400 }, { "epoch": 8.16, "learning_rate": 0.0002, "loss": 0.5243, "step": 146500 }, { "epoch": 8.17, "learning_rate": 0.0002, "loss": 0.5124, "step": 146600 }, { "epoch": 8.17, "learning_rate": 0.0002, "loss": 0.5479, "step": 146700 }, { "epoch": 8.18, "learning_rate": 0.0002, "loss": 0.5614, "step": 146800 }, { "epoch": 8.19, "learning_rate": 0.0002, "loss": 0.5362, "step": 146900 }, { "epoch": 8.19, "learning_rate": 0.0002, "loss": 0.5495, "step": 147000 }, { "epoch": 8.2, "learning_rate": 0.0002, "loss": 0.5425, "step": 147100 }, { "epoch": 8.2, "learning_rate": 0.0002, "loss": 0.541, "step": 147200 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 0.5362, "step": 147300 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 0.542, "step": 147400 }, { "epoch": 8.22, "learning_rate": 0.0002, "loss": 0.5384, "step": 147500 }, { "epoch": 8.23, "learning_rate": 0.0002, "loss": 0.5356, "step": 147600 }, { "epoch": 8.23, "learning_rate": 0.0002, "loss": 0.5508, "step": 147700 }, { "epoch": 8.24, "learning_rate": 0.0002, "loss": 0.528, "step": 147800 }, { "epoch": 8.24, "learning_rate": 0.0002, "loss": 0.5416, "step": 147900 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 0.5389, "step": 148000 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 0.5429, "step": 148100 }, { "epoch": 8.26, "learning_rate": 0.0002, "loss": 0.5313, "step": 148200 }, { "epoch": 8.26, "learning_rate": 0.0002, "loss": 0.5464, "step": 148300 }, { "epoch": 8.27, "learning_rate": 0.0002, "loss": 0.5389, "step": 148400 }, { "epoch": 8.28, "learning_rate": 0.0002, "loss": 0.541, "step": 148500 }, { "epoch": 8.28, "learning_rate": 0.0002, "loss": 0.5405, "step": 148600 }, { "epoch": 8.29, "learning_rate": 0.0002, "loss": 0.5413, "step": 148700 }, { "epoch": 8.29, "learning_rate": 0.0002, "loss": 0.5379, "step": 148800 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 0.5388, "step": 148900 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 0.5401, "step": 149000 }, { "epoch": 8.31, "learning_rate": 0.0002, "loss": 0.5521, "step": 149100 }, { "epoch": 8.31, "learning_rate": 0.0002, "loss": 0.5362, "step": 149200 }, { "epoch": 8.32, "learning_rate": 0.0002, "loss": 0.5296, "step": 149300 }, { "epoch": 8.33, "learning_rate": 0.0002, "loss": 0.5261, "step": 149400 }, { "epoch": 8.33, "learning_rate": 0.0002, "loss": 0.5475, "step": 149500 }, { "epoch": 8.34, "learning_rate": 0.0002, "loss": 0.5374, "step": 149600 }, { "epoch": 8.34, "learning_rate": 0.0002, "loss": 0.5505, "step": 149700 }, { "epoch": 8.35, "learning_rate": 0.0002, "loss": 0.5434, "step": 149800 }, { "epoch": 8.35, "learning_rate": 0.0002, "loss": 0.5448, "step": 149900 }, { "epoch": 8.36, "learning_rate": 0.0002, "loss": 0.5518, "step": 150000 }, { "epoch": 8.36, "eval_gen_len": 18.999251944943147, "eval_loss": 2.71974515914917, "eval_rouge1": 25.2364, "eval_rouge2": 12.3372, "eval_rougeL": 20.8569, "eval_rougeLsum": 23.8285, "eval_runtime": 474.6128, "eval_samples_per_second": 28.166, "eval_steps_per_second": 1.761, "step": 150000 }, { "epoch": 8.36, "learning_rate": 0.0002, "loss": 0.5358, "step": 150100 }, { "epoch": 8.37, "learning_rate": 0.0002, "loss": 0.5428, "step": 150200 }, { "epoch": 8.38, "learning_rate": 0.0002, "loss": 0.5452, "step": 150300 }, { "epoch": 8.38, "learning_rate": 0.0002, "loss": 0.5507, "step": 150400 }, { "epoch": 8.39, "learning_rate": 0.0002, "loss": 0.5443, "step": 150500 }, { "epoch": 8.39, "learning_rate": 0.0002, "loss": 0.533, "step": 150600 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 0.5419, "step": 150700 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 0.5339, "step": 150800 }, { "epoch": 8.41, "learning_rate": 0.0002, "loss": 0.5349, "step": 150900 }, { "epoch": 8.41, "learning_rate": 0.0002, "loss": 0.5347, "step": 151000 }, { "epoch": 8.42, "learning_rate": 0.0002, "loss": 0.5357, "step": 151100 }, { "epoch": 8.43, "learning_rate": 0.0002, "loss": 0.5466, "step": 151200 }, { "epoch": 8.43, "learning_rate": 0.0002, "loss": 0.5467, "step": 151300 }, { "epoch": 8.44, "learning_rate": 0.0002, "loss": 0.5596, "step": 151400 }, { "epoch": 8.44, "learning_rate": 0.0002, "loss": 0.552, "step": 151500 }, { "epoch": 8.45, "learning_rate": 0.0002, "loss": 0.5514, "step": 151600 }, { "epoch": 8.45, "learning_rate": 0.0002, "loss": 0.5506, "step": 151700 }, { "epoch": 8.46, "learning_rate": 0.0002, "loss": 0.5414, "step": 151800 }, { "epoch": 8.46, "learning_rate": 0.0002, "loss": 0.5394, "step": 151900 }, { "epoch": 8.47, "learning_rate": 0.0002, "loss": 0.548, "step": 152000 }, { "epoch": 8.48, "learning_rate": 0.0002, "loss": 0.5334, "step": 152100 }, { "epoch": 8.48, "learning_rate": 0.0002, "loss": 0.5516, "step": 152200 }, { "epoch": 8.49, "learning_rate": 0.0002, "loss": 0.5587, "step": 152300 }, { "epoch": 8.49, "learning_rate": 0.0002, "loss": 0.5423, "step": 152400 }, { "epoch": 8.5, "learning_rate": 0.0002, "loss": 0.5519, "step": 152500 }, { "epoch": 8.5, "learning_rate": 0.0002, "loss": 0.5395, "step": 152600 }, { "epoch": 8.51, "learning_rate": 0.0002, "loss": 0.5368, "step": 152700 }, { "epoch": 8.51, "learning_rate": 0.0002, "loss": 0.554, "step": 152800 }, { "epoch": 8.52, "learning_rate": 0.0002, "loss": 0.5437, "step": 152900 }, { "epoch": 8.53, "learning_rate": 0.0002, "loss": 0.5485, "step": 153000 }, { "epoch": 8.53, "learning_rate": 0.0002, "loss": 0.5463, "step": 153100 }, { "epoch": 8.54, "learning_rate": 0.0002, "loss": 0.5429, "step": 153200 }, { "epoch": 8.54, "learning_rate": 0.0002, "loss": 0.5436, "step": 153300 }, { "epoch": 8.55, "learning_rate": 0.0002, "loss": 0.5445, "step": 153400 }, { "epoch": 8.55, "learning_rate": 0.0002, "loss": 0.5458, "step": 153500 }, { "epoch": 8.56, "learning_rate": 0.0002, "loss": 0.5417, "step": 153600 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 0.5412, "step": 153700 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 0.54, "step": 153800 }, { "epoch": 8.58, "learning_rate": 0.0002, "loss": 0.5561, "step": 153900 }, { "epoch": 8.58, "learning_rate": 0.0002, "loss": 0.5431, "step": 154000 }, { "epoch": 8.59, "learning_rate": 0.0002, "loss": 0.5445, "step": 154100 }, { "epoch": 8.59, "learning_rate": 0.0002, "loss": 0.5549, "step": 154200 }, { "epoch": 8.6, "learning_rate": 0.0002, "loss": 0.5478, "step": 154300 }, { "epoch": 8.6, "learning_rate": 0.0002, "loss": 0.5493, "step": 154400 }, { "epoch": 8.61, "learning_rate": 0.0002, "loss": 0.5445, "step": 154500 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 0.55, "step": 154600 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 0.5478, "step": 154700 }, { "epoch": 8.63, "learning_rate": 0.0002, "loss": 0.5339, "step": 154800 }, { "epoch": 8.63, "learning_rate": 0.0002, "loss": 0.5501, "step": 154900 }, { "epoch": 8.64, "learning_rate": 0.0002, "loss": 0.5357, "step": 155000 }, { "epoch": 8.64, "learning_rate": 0.0002, "loss": 0.5444, "step": 155100 }, { "epoch": 8.65, "learning_rate": 0.0002, "loss": 0.5563, "step": 155200 }, { "epoch": 8.65, "learning_rate": 0.0002, "loss": 0.5588, "step": 155300 }, { "epoch": 8.66, "learning_rate": 0.0002, "loss": 0.5416, "step": 155400 }, { "epoch": 8.67, "learning_rate": 0.0002, "loss": 0.5581, "step": 155500 }, { "epoch": 8.67, "learning_rate": 0.0002, "loss": 0.5446, "step": 155600 }, { "epoch": 8.68, "learning_rate": 0.0002, "loss": 0.5425, "step": 155700 }, { "epoch": 8.68, "learning_rate": 0.0002, "loss": 0.551, "step": 155800 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 0.5342, "step": 155900 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 0.5547, "step": 156000 }, { "epoch": 8.7, "learning_rate": 0.0002, "loss": 0.555, "step": 156100 }, { "epoch": 8.7, "learning_rate": 0.0002, "loss": 0.5554, "step": 156200 }, { "epoch": 8.71, "learning_rate": 0.0002, "loss": 0.5408, "step": 156300 }, { "epoch": 8.72, "learning_rate": 0.0002, "loss": 0.5596, "step": 156400 }, { "epoch": 8.72, "learning_rate": 0.0002, "loss": 0.555, "step": 156500 }, { "epoch": 8.73, "learning_rate": 0.0002, "loss": 0.555, "step": 156600 }, { "epoch": 8.73, "learning_rate": 0.0002, "loss": 0.54, "step": 156700 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 0.5525, "step": 156800 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 0.548, "step": 156900 }, { "epoch": 8.75, "learning_rate": 0.0002, "loss": 0.5516, "step": 157000 }, { "epoch": 8.75, "learning_rate": 0.0002, "loss": 0.5529, "step": 157100 }, { "epoch": 8.76, "learning_rate": 0.0002, "loss": 0.5571, "step": 157200 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 0.5417, "step": 157300 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 0.5547, "step": 157400 }, { "epoch": 8.78, "learning_rate": 0.0002, "loss": 0.5384, "step": 157500 }, { "epoch": 8.78, "learning_rate": 0.0002, "loss": 0.5475, "step": 157600 }, { "epoch": 8.79, "learning_rate": 0.0002, "loss": 0.5433, "step": 157700 }, { "epoch": 8.79, "learning_rate": 0.0002, "loss": 0.5399, "step": 157800 }, { "epoch": 8.8, "learning_rate": 0.0002, "loss": 0.5513, "step": 157900 }, { "epoch": 8.8, "learning_rate": 0.0002, "loss": 0.5509, "step": 158000 }, { "epoch": 8.81, "learning_rate": 0.0002, "loss": 0.5535, "step": 158100 }, { "epoch": 8.82, "learning_rate": 0.0002, "loss": 0.5473, "step": 158200 }, { "epoch": 8.82, "learning_rate": 0.0002, "loss": 0.5421, "step": 158300 }, { "epoch": 8.83, "learning_rate": 0.0002, "loss": 0.5507, "step": 158400 }, { "epoch": 8.83, "learning_rate": 0.0002, "loss": 0.5534, "step": 158500 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 0.5578, "step": 158600 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 0.5521, "step": 158700 }, { "epoch": 8.85, "learning_rate": 0.0002, "loss": 0.5543, "step": 158800 }, { "epoch": 8.85, "learning_rate": 0.0002, "loss": 0.5527, "step": 158900 }, { "epoch": 8.86, "learning_rate": 0.0002, "loss": 0.564, "step": 159000 }, { "epoch": 8.87, "learning_rate": 0.0002, "loss": 0.5587, "step": 159100 }, { "epoch": 8.87, "learning_rate": 0.0002, "loss": 0.5576, "step": 159200 }, { "epoch": 8.88, "learning_rate": 0.0002, "loss": 0.5505, "step": 159300 }, { "epoch": 8.88, "learning_rate": 0.0002, "loss": 0.5528, "step": 159400 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 0.5384, "step": 159500 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 0.5531, "step": 159600 }, { "epoch": 8.9, "learning_rate": 0.0002, "loss": 0.5595, "step": 159700 }, { "epoch": 8.9, "learning_rate": 0.0002, "loss": 0.5588, "step": 159800 }, { "epoch": 8.91, "learning_rate": 0.0002, "loss": 0.5555, "step": 159900 }, { "epoch": 8.92, "learning_rate": 0.0002, "loss": 0.5573, "step": 160000 }, { "epoch": 8.92, "eval_gen_len": 18.99955116696589, "eval_loss": 2.7115721702575684, "eval_rouge1": 25.334, "eval_rouge2": 12.363, "eval_rougeL": 20.8863, "eval_rougeLsum": 23.93, "eval_runtime": 474.6063, "eval_samples_per_second": 28.167, "eval_steps_per_second": 1.761, "step": 160000 }, { "epoch": 8.92, "learning_rate": 0.0002, "loss": 0.5481, "step": 160100 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 0.5629, "step": 160200 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 0.5519, "step": 160300 }, { "epoch": 8.94, "learning_rate": 0.0002, "loss": 0.5581, "step": 160400 }, { "epoch": 8.94, "learning_rate": 0.0002, "loss": 0.5452, "step": 160500 }, { "epoch": 8.95, "learning_rate": 0.0002, "loss": 0.55, "step": 160600 }, { "epoch": 8.96, "learning_rate": 0.0002, "loss": 0.5602, "step": 160700 }, { "epoch": 8.96, "learning_rate": 0.0002, "loss": 0.5394, "step": 160800 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 0.5509, "step": 160900 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 0.5535, "step": 161000 }, { "epoch": 8.98, "learning_rate": 0.0002, "loss": 0.5718, "step": 161100 }, { "epoch": 8.98, "learning_rate": 0.0002, "loss": 0.5674, "step": 161200 }, { "epoch": 8.99, "learning_rate": 0.0002, "loss": 0.547, "step": 161300 }, { "epoch": 8.99, "learning_rate": 0.0002, "loss": 0.5581, "step": 161400 }, { "epoch": 9.0, "learning_rate": 0.0002, "loss": 0.5519, "step": 161500 }, { "epoch": 9.01, "learning_rate": 0.0002, "loss": 0.5362, "step": 161600 }, { "epoch": 9.01, "learning_rate": 0.0002, "loss": 0.5365, "step": 161700 }, { "epoch": 9.02, "learning_rate": 0.0002, "loss": 0.5084, "step": 161800 }, { "epoch": 9.02, "learning_rate": 0.0002, "loss": 0.5278, "step": 161900 }, { "epoch": 9.03, "learning_rate": 0.0002, "loss": 0.5293, "step": 162000 }, { "epoch": 9.03, "learning_rate": 0.0002, "loss": 0.5331, "step": 162100 }, { "epoch": 9.04, "learning_rate": 0.0002, "loss": 0.5307, "step": 162200 }, { "epoch": 9.04, "learning_rate": 0.0002, "loss": 0.5459, "step": 162300 }, { "epoch": 9.05, "learning_rate": 0.0002, "loss": 0.5259, "step": 162400 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 0.532, "step": 162500 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 0.5166, "step": 162600 }, { "epoch": 9.07, "learning_rate": 0.0002, "loss": 0.5336, "step": 162700 }, { "epoch": 9.07, "learning_rate": 0.0002, "loss": 0.5253, "step": 162800 }, { "epoch": 9.08, "learning_rate": 0.0002, "loss": 0.5324, "step": 162900 }, { "epoch": 9.08, "learning_rate": 0.0002, "loss": 0.5249, "step": 163000 }, { "epoch": 9.09, "learning_rate": 0.0002, "loss": 0.5111, "step": 163100 }, { "epoch": 9.09, "learning_rate": 0.0002, "loss": 0.5426, "step": 163200 }, { "epoch": 9.1, "learning_rate": 0.0002, "loss": 0.531, "step": 163300 }, { "epoch": 9.11, "learning_rate": 0.0002, "loss": 0.5279, "step": 163400 }, { "epoch": 9.11, "learning_rate": 0.0002, "loss": 0.5135, "step": 163500 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 0.5335, "step": 163600 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 0.5267, "step": 163700 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 0.5336, "step": 163800 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 0.5383, "step": 163900 }, { "epoch": 9.14, "learning_rate": 0.0002, "loss": 0.5256, "step": 164000 }, { "epoch": 9.14, "learning_rate": 0.0002, "loss": 0.5333, "step": 164100 }, { "epoch": 9.15, "learning_rate": 0.0002, "loss": 0.5335, "step": 164200 }, { "epoch": 9.16, "learning_rate": 0.0002, "loss": 0.5282, "step": 164300 }, { "epoch": 9.16, "learning_rate": 0.0002, "loss": 0.535, "step": 164400 }, { "epoch": 9.17, "learning_rate": 0.0002, "loss": 0.5298, "step": 164500 }, { "epoch": 9.17, "learning_rate": 0.0002, "loss": 0.5353, "step": 164600 }, { "epoch": 9.18, "learning_rate": 0.0002, "loss": 0.5209, "step": 164700 }, { "epoch": 9.18, "learning_rate": 0.0002, "loss": 0.5476, "step": 164800 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 0.5329, "step": 164900 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 0.5145, "step": 165000 }, { "epoch": 9.2, "learning_rate": 0.0002, "loss": 0.5455, "step": 165100 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 0.5414, "step": 165200 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 0.5301, "step": 165300 }, { "epoch": 9.22, "learning_rate": 0.0002, "loss": 0.5325, "step": 165400 }, { "epoch": 9.22, "learning_rate": 0.0002, "loss": 0.5183, "step": 165500 }, { "epoch": 9.23, "learning_rate": 0.0002, "loss": 0.5309, "step": 165600 }, { "epoch": 9.23, "learning_rate": 0.0002, "loss": 0.5325, "step": 165700 }, { "epoch": 9.24, "learning_rate": 0.0002, "loss": 0.5233, "step": 165800 }, { "epoch": 9.24, "learning_rate": 0.0002, "loss": 0.5404, "step": 165900 }, { "epoch": 9.25, "learning_rate": 0.0002, "loss": 0.5505, "step": 166000 }, { "epoch": 9.26, "learning_rate": 0.0002, "loss": 0.5324, "step": 166100 }, { "epoch": 9.26, "learning_rate": 0.0002, "loss": 0.5407, "step": 166200 }, { "epoch": 9.27, "learning_rate": 0.0002, "loss": 0.525, "step": 166300 }, { "epoch": 9.27, "learning_rate": 0.0002, "loss": 0.5371, "step": 166400 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 0.5302, "step": 166500 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 0.5252, "step": 166600 }, { "epoch": 9.29, "learning_rate": 0.0002, "loss": 0.5418, "step": 166700 }, { "epoch": 9.3, "learning_rate": 0.0002, "loss": 0.5428, "step": 166800 }, { "epoch": 9.3, "learning_rate": 0.0002, "loss": 0.5319, "step": 166900 }, { "epoch": 9.31, "learning_rate": 0.0002, "loss": 0.5379, "step": 167000 }, { "epoch": 9.31, "learning_rate": 0.0002, "loss": 0.5344, "step": 167100 }, { "epoch": 9.32, "learning_rate": 0.0002, "loss": 0.5416, "step": 167200 }, { "epoch": 9.32, "learning_rate": 0.0002, "loss": 0.5404, "step": 167300 }, { "epoch": 9.33, "learning_rate": 0.0002, "loss": 0.5353, "step": 167400 }, { "epoch": 9.33, "learning_rate": 0.0002, "loss": 0.5403, "step": 167500 }, { "epoch": 9.34, "learning_rate": 0.0002, "loss": 0.5418, "step": 167600 }, { "epoch": 9.35, "learning_rate": 0.0002, "loss": 0.535, "step": 167700 }, { "epoch": 9.35, "learning_rate": 0.0002, "loss": 0.5351, "step": 167800 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 0.5278, "step": 167900 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 0.5318, "step": 168000 }, { "epoch": 9.37, "learning_rate": 0.0002, "loss": 0.5521, "step": 168100 }, { "epoch": 9.37, "learning_rate": 0.0002, "loss": 0.5358, "step": 168200 }, { "epoch": 9.38, "learning_rate": 0.0002, "loss": 0.5364, "step": 168300 }, { "epoch": 9.38, "learning_rate": 0.0002, "loss": 0.5399, "step": 168400 }, { "epoch": 9.39, "learning_rate": 0.0002, "loss": 0.558, "step": 168500 }, { "epoch": 9.4, "learning_rate": 0.0002, "loss": 0.5534, "step": 168600 }, { "epoch": 9.4, "learning_rate": 0.0002, "loss": 0.526, "step": 168700 }, { "epoch": 9.41, "learning_rate": 0.0002, "loss": 0.5424, "step": 168800 }, { "epoch": 9.41, "learning_rate": 0.0002, "loss": 0.5253, "step": 168900 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 0.5365, "step": 169000 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 0.5369, "step": 169100 }, { "epoch": 9.43, "learning_rate": 0.0002, "loss": 0.5361, "step": 169200 }, { "epoch": 9.43, "learning_rate": 0.0002, "loss": 0.5415, "step": 169300 }, { "epoch": 9.44, "learning_rate": 0.0002, "loss": 0.5368, "step": 169400 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 0.5502, "step": 169500 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 0.5378, "step": 169600 }, { "epoch": 9.46, "learning_rate": 0.0002, "loss": 0.524, "step": 169700 }, { "epoch": 9.46, "learning_rate": 0.0002, "loss": 0.5337, "step": 169800 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 0.5375, "step": 169900 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 0.5336, "step": 170000 }, { "epoch": 9.47, "eval_gen_len": 18.99970077797726, "eval_loss": 2.7171103954315186, "eval_rouge1": 25.2508, "eval_rouge2": 12.3825, "eval_rougeL": 20.8692, "eval_rougeLsum": 23.8414, "eval_runtime": 472.9738, "eval_samples_per_second": 28.264, "eval_steps_per_second": 1.768, "step": 170000 }, { "epoch": 9.48, "learning_rate": 0.0002, "loss": 0.547, "step": 170100 }, { "epoch": 9.48, "learning_rate": 0.0002, "loss": 0.5296, "step": 170200 }, { "epoch": 9.49, "learning_rate": 0.0002, "loss": 0.5416, "step": 170300 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 0.5479, "step": 170400 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 0.5469, "step": 170500 }, { "epoch": 9.51, "learning_rate": 0.0002, "loss": 0.534, "step": 170600 }, { "epoch": 9.51, "learning_rate": 0.0002, "loss": 0.5279, "step": 170700 }, { "epoch": 9.52, "learning_rate": 0.0002, "loss": 0.5364, "step": 170800 }, { "epoch": 9.52, "learning_rate": 0.0002, "loss": 0.5243, "step": 170900 }, { "epoch": 9.53, "learning_rate": 0.0002, "loss": 0.52, "step": 171000 }, { "epoch": 9.53, "learning_rate": 0.0002, "loss": 0.5427, "step": 171100 }, { "epoch": 9.54, "learning_rate": 0.0002, "loss": 0.5384, "step": 171200 }, { "epoch": 9.55, "learning_rate": 0.0002, "loss": 0.5301, "step": 171300 }, { "epoch": 9.55, "learning_rate": 0.0002, "loss": 0.5386, "step": 171400 }, { "epoch": 9.56, "learning_rate": 0.0002, "loss": 0.5555, "step": 171500 }, { "epoch": 9.56, "learning_rate": 0.0002, "loss": 0.5462, "step": 171600 }, { "epoch": 9.57, "learning_rate": 0.0002, "loss": 0.5394, "step": 171700 }, { "epoch": 9.57, "learning_rate": 0.0002, "loss": 0.5438, "step": 171800 }, { "epoch": 9.58, "learning_rate": 0.0002, "loss": 0.5386, "step": 171900 }, { "epoch": 9.58, "learning_rate": 0.0002, "loss": 0.5469, "step": 172000 }, { "epoch": 9.59, "learning_rate": 0.0002, "loss": 0.5511, "step": 172100 }, { "epoch": 9.6, "learning_rate": 0.0002, "loss": 0.5418, "step": 172200 }, { "epoch": 9.6, "learning_rate": 0.0002, "loss": 0.5376, "step": 172300 }, { "epoch": 9.61, "learning_rate": 0.0002, "loss": 0.5583, "step": 172400 }, { "epoch": 9.61, "learning_rate": 0.0002, "loss": 0.5459, "step": 172500 }, { "epoch": 9.62, "learning_rate": 0.0002, "loss": 0.5393, "step": 172600 }, { "epoch": 9.62, "learning_rate": 0.0002, "loss": 0.5434, "step": 172700 }, { "epoch": 9.63, "learning_rate": 0.0002, "loss": 0.5472, "step": 172800 }, { "epoch": 9.63, "learning_rate": 0.0002, "loss": 0.5535, "step": 172900 }, { "epoch": 9.64, "learning_rate": 0.0002, "loss": 0.5424, "step": 173000 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 0.5458, "step": 173100 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 0.5552, "step": 173200 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 0.5353, "step": 173300 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 0.5362, "step": 173400 }, { "epoch": 9.67, "learning_rate": 0.0002, "loss": 0.5329, "step": 173500 }, { "epoch": 9.67, "learning_rate": 0.0002, "loss": 0.544, "step": 173600 }, { "epoch": 9.68, "learning_rate": 0.0002, "loss": 0.5365, "step": 173700 }, { "epoch": 9.69, "learning_rate": 0.0002, "loss": 0.5506, "step": 173800 }, { "epoch": 9.69, "learning_rate": 0.0002, "loss": 0.5403, "step": 173900 }, { "epoch": 9.7, "learning_rate": 0.0002, "loss": 0.5453, "step": 174000 }, { "epoch": 9.7, "learning_rate": 0.0002, "loss": 0.5467, "step": 174100 }, { "epoch": 9.71, "learning_rate": 0.0002, "loss": 0.5368, "step": 174200 }, { "epoch": 9.71, "learning_rate": 0.0002, "loss": 0.5376, "step": 174300 }, { "epoch": 9.72, "learning_rate": 0.0002, "loss": 0.5534, "step": 174400 }, { "epoch": 9.72, "learning_rate": 0.0002, "loss": 0.5407, "step": 174500 }, { "epoch": 9.73, "learning_rate": 0.0002, "loss": 0.5508, "step": 174600 }, { "epoch": 9.74, "learning_rate": 0.0002, "loss": 0.5367, "step": 174700 }, { "epoch": 9.74, "learning_rate": 0.0002, "loss": 0.5506, "step": 174800 }, { "epoch": 9.75, "learning_rate": 0.0002, "loss": 0.5397, "step": 174900 }, { "epoch": 9.75, "learning_rate": 0.0002, "loss": 0.5403, "step": 175000 }, { "epoch": 9.76, "learning_rate": 0.0002, "loss": 0.5395, "step": 175100 }, { "epoch": 9.76, "learning_rate": 0.0002, "loss": 0.5429, "step": 175200 }, { "epoch": 9.77, "learning_rate": 0.0002, "loss": 0.554, "step": 175300 }, { "epoch": 9.77, "learning_rate": 0.0002, "loss": 0.5445, "step": 175400 }, { "epoch": 9.78, "learning_rate": 0.0002, "loss": 0.5426, "step": 175500 }, { "epoch": 9.79, "learning_rate": 0.0002, "loss": 0.5454, "step": 175600 }, { "epoch": 9.79, "learning_rate": 0.0002, "loss": 0.5651, "step": 175700 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 0.5494, "step": 175800 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 0.5408, "step": 175900 }, { "epoch": 9.81, "learning_rate": 0.0002, "loss": 0.5528, "step": 176000 }, { "epoch": 9.81, "learning_rate": 0.0002, "loss": 0.5511, "step": 176100 }, { "epoch": 9.82, "learning_rate": 0.0002, "loss": 0.5406, "step": 176200 }, { "epoch": 9.82, "learning_rate": 0.0002, "loss": 0.5588, "step": 176300 }, { "epoch": 9.83, "learning_rate": 0.0002, "loss": 0.56, "step": 176400 }, { "epoch": 9.84, "learning_rate": 0.0002, "loss": 0.5472, "step": 176500 }, { "epoch": 9.84, "learning_rate": 0.0002, "loss": 0.5445, "step": 176600 }, { "epoch": 9.85, "learning_rate": 0.0002, "loss": 0.5527, "step": 176700 }, { "epoch": 9.85, "learning_rate": 0.0002, "loss": 0.5385, "step": 176800 }, { "epoch": 9.86, "learning_rate": 0.0002, "loss": 0.5327, "step": 176900 }, { "epoch": 9.86, "learning_rate": 0.0002, "loss": 0.55, "step": 177000 }, { "epoch": 9.87, "learning_rate": 0.0002, "loss": 0.5555, "step": 177100 }, { "epoch": 9.87, "learning_rate": 0.0002, "loss": 0.5413, "step": 177200 }, { "epoch": 9.88, "learning_rate": 0.0002, "loss": 0.5533, "step": 177300 }, { "epoch": 9.89, "learning_rate": 0.0002, "loss": 0.5504, "step": 177400 }, { "epoch": 9.89, "learning_rate": 0.0002, "loss": 0.559, "step": 177500 }, { "epoch": 9.9, "learning_rate": 0.0002, "loss": 0.5433, "step": 177600 }, { "epoch": 9.9, "learning_rate": 0.0002, "loss": 0.5495, "step": 177700 }, { "epoch": 9.91, "learning_rate": 0.0002, "loss": 0.5308, "step": 177800 }, { "epoch": 9.91, "learning_rate": 0.0002, "loss": 0.5479, "step": 177900 }, { "epoch": 9.92, "learning_rate": 0.0002, "loss": 0.5562, "step": 178000 }, { "epoch": 9.92, "learning_rate": 0.0002, "loss": 0.5418, "step": 178100 }, { "epoch": 9.93, "learning_rate": 0.0002, "loss": 0.5496, "step": 178200 }, { "epoch": 9.94, "learning_rate": 0.0002, "loss": 0.5532, "step": 178300 }, { "epoch": 9.94, "learning_rate": 0.0002, "loss": 0.54, "step": 178400 }, { "epoch": 9.95, "learning_rate": 0.0002, "loss": 0.549, "step": 178500 }, { "epoch": 9.95, "learning_rate": 0.0002, "loss": 0.5341, "step": 178600 }, { "epoch": 9.96, "learning_rate": 0.0002, "loss": 0.5487, "step": 178700 }, { "epoch": 9.96, "learning_rate": 0.0002, "loss": 0.5403, "step": 178800 }, { "epoch": 9.97, "learning_rate": 0.0002, "loss": 0.5369, "step": 178900 }, { "epoch": 9.97, "learning_rate": 0.0002, "loss": 0.5506, "step": 179000 }, { "epoch": 9.98, "learning_rate": 0.0002, "loss": 0.5423, "step": 179100 }, { "epoch": 9.99, "learning_rate": 0.0002, "loss": 0.5442, "step": 179200 }, { "epoch": 9.99, "learning_rate": 0.0002, "loss": 0.5455, "step": 179300 }, { "epoch": 10.0, "learning_rate": 0.0002, "loss": 0.565, "step": 179400 }, { "epoch": 10.0, "learning_rate": 0.0002, "loss": 0.5376, "step": 179500 }, { "epoch": 10.01, "learning_rate": 0.0002, "loss": 0.5155, "step": 179600 }, { "epoch": 10.01, "learning_rate": 0.0002, "loss": 0.5199, "step": 179700 }, { "epoch": 10.02, "learning_rate": 0.0002, "loss": 0.5276, "step": 179800 }, { "epoch": 10.03, "learning_rate": 0.0002, "loss": 0.5285, "step": 179900 }, { "epoch": 10.03, "learning_rate": 0.0002, "loss": 0.513, "step": 180000 }, { "epoch": 10.03, "eval_gen_len": 18.99970077797726, "eval_loss": 2.771777391433716, "eval_rouge1": 25.3651, "eval_rouge2": 12.4707, "eval_rougeL": 20.9642, "eval_rougeLsum": 23.9479, "eval_runtime": 473.669, "eval_samples_per_second": 28.222, "eval_steps_per_second": 1.765, "step": 180000 }, { "epoch": 10.04, "learning_rate": 0.0002, "loss": 0.531, "step": 180100 }, { "epoch": 10.04, "learning_rate": 0.0002, "loss": 0.5042, "step": 180200 }, { "epoch": 10.05, "learning_rate": 0.0002, "loss": 0.5018, "step": 180300 }, { "epoch": 10.05, "learning_rate": 0.0002, "loss": 0.5239, "step": 180400 }, { "epoch": 10.06, "learning_rate": 0.0002, "loss": 0.5154, "step": 180500 }, { "epoch": 10.06, "learning_rate": 0.0002, "loss": 0.5311, "step": 180600 }, { "epoch": 10.07, "learning_rate": 0.0002, "loss": 0.5206, "step": 180700 }, { "epoch": 10.08, "learning_rate": 0.0002, "loss": 0.5234, "step": 180800 }, { "epoch": 10.08, "learning_rate": 0.0002, "loss": 0.5209, "step": 180900 }, { "epoch": 10.09, "learning_rate": 0.0002, "loss": 0.533, "step": 181000 }, { "epoch": 10.09, "learning_rate": 0.0002, "loss": 0.5323, "step": 181100 }, { "epoch": 10.1, "learning_rate": 0.0002, "loss": 0.5174, "step": 181200 }, { "epoch": 10.1, "learning_rate": 0.0002, "loss": 0.5279, "step": 181300 }, { "epoch": 10.11, "learning_rate": 0.0002, "loss": 0.5141, "step": 181400 }, { "epoch": 10.11, "learning_rate": 0.0002, "loss": 0.533, "step": 181500 }, { "epoch": 10.12, "learning_rate": 0.0002, "loss": 0.5359, "step": 181600 }, { "epoch": 10.13, "learning_rate": 0.0002, "loss": 0.5312, "step": 181700 }, { "epoch": 10.13, "learning_rate": 0.0002, "loss": 0.51, "step": 181800 }, { "epoch": 10.14, "learning_rate": 0.0002, "loss": 0.5172, "step": 181900 }, { "epoch": 10.14, "learning_rate": 0.0002, "loss": 0.5128, "step": 182000 }, { "epoch": 10.15, "learning_rate": 0.0002, "loss": 0.5275, "step": 182100 }, { "epoch": 10.15, "learning_rate": 0.0002, "loss": 0.5317, "step": 182200 }, { "epoch": 10.16, "learning_rate": 0.0002, "loss": 0.5378, "step": 182300 }, { "epoch": 10.16, "learning_rate": 0.0002, "loss": 0.5298, "step": 182400 }, { "epoch": 10.17, "learning_rate": 0.0002, "loss": 0.5181, "step": 182500 }, { "epoch": 10.18, "learning_rate": 0.0002, "loss": 0.5086, "step": 182600 }, { "epoch": 10.18, "learning_rate": 0.0002, "loss": 0.5257, "step": 182700 }, { "epoch": 10.19, "learning_rate": 0.0002, "loss": 0.5237, "step": 182800 }, { "epoch": 10.19, "learning_rate": 0.0002, "loss": 0.5226, "step": 182900 }, { "epoch": 10.2, "learning_rate": 0.0002, "loss": 0.526, "step": 183000 }, { "epoch": 10.2, "learning_rate": 0.0002, "loss": 0.5257, "step": 183100 }, { "epoch": 10.21, "learning_rate": 0.0002, "loss": 0.5188, "step": 183200 }, { "epoch": 10.21, "learning_rate": 0.0002, "loss": 0.5265, "step": 183300 }, { "epoch": 10.22, "learning_rate": 0.0002, "loss": 0.5292, "step": 183400 }, { "epoch": 10.23, "learning_rate": 0.0002, "loss": 0.5394, "step": 183500 }, { "epoch": 10.23, "learning_rate": 0.0002, "loss": 0.533, "step": 183600 }, { "epoch": 10.24, "learning_rate": 0.0002, "loss": 0.5327, "step": 183700 }, { "epoch": 10.24, "learning_rate": 0.0002, "loss": 0.5291, "step": 183800 }, { "epoch": 10.25, "learning_rate": 0.0002, "loss": 0.5398, "step": 183900 }, { "epoch": 10.25, "learning_rate": 0.0002, "loss": 0.5236, "step": 184000 }, { "epoch": 10.26, "learning_rate": 0.0002, "loss": 0.5248, "step": 184100 }, { "epoch": 10.26, "learning_rate": 0.0002, "loss": 0.5148, "step": 184200 }, { "epoch": 10.27, "learning_rate": 0.0002, "loss": 0.5402, "step": 184300 }, { "epoch": 10.28, "learning_rate": 0.0002, "loss": 0.5241, "step": 184400 }, { "epoch": 10.28, "learning_rate": 0.0002, "loss": 0.5356, "step": 184500 }, { "epoch": 10.29, "learning_rate": 0.0002, "loss": 0.5171, "step": 184600 }, { "epoch": 10.29, "learning_rate": 0.0002, "loss": 0.5339, "step": 184700 }, { "epoch": 10.3, "learning_rate": 0.0002, "loss": 0.5228, "step": 184800 }, { "epoch": 10.3, "learning_rate": 0.0002, "loss": 0.5405, "step": 184900 }, { "epoch": 10.31, "learning_rate": 0.0002, "loss": 0.5441, "step": 185000 }, { "epoch": 10.31, "learning_rate": 0.0002, "loss": 0.5335, "step": 185100 }, { "epoch": 10.32, "learning_rate": 0.0002, "loss": 0.5289, "step": 185200 }, { "epoch": 10.33, "learning_rate": 0.0002, "loss": 0.5374, "step": 185300 }, { "epoch": 10.33, "learning_rate": 0.0002, "loss": 0.5185, "step": 185400 }, { "epoch": 10.34, "learning_rate": 0.0002, "loss": 0.5413, "step": 185500 }, { "epoch": 10.34, "learning_rate": 0.0002, "loss": 0.5399, "step": 185600 }, { "epoch": 10.35, "learning_rate": 0.0002, "loss": 0.521, "step": 185700 }, { "epoch": 10.35, "learning_rate": 0.0002, "loss": 0.5314, "step": 185800 }, { "epoch": 10.36, "learning_rate": 0.0002, "loss": 0.5355, "step": 185900 }, { "epoch": 10.37, "learning_rate": 0.0002, "loss": 0.537, "step": 186000 }, { "epoch": 10.37, "learning_rate": 0.0002, "loss": 0.5304, "step": 186100 }, { "epoch": 10.38, "learning_rate": 0.0002, "loss": 0.532, "step": 186200 }, { "epoch": 10.38, "learning_rate": 0.0002, "loss": 0.5198, "step": 186300 }, { "epoch": 10.39, "learning_rate": 0.0002, "loss": 0.5347, "step": 186400 }, { "epoch": 10.39, "learning_rate": 0.0002, "loss": 0.5288, "step": 186500 }, { "epoch": 10.4, "learning_rate": 0.0002, "loss": 0.5347, "step": 186600 }, { "epoch": 10.4, "learning_rate": 0.0002, "loss": 0.5324, "step": 186700 }, { "epoch": 10.41, "learning_rate": 0.0002, "loss": 0.5292, "step": 186800 }, { "epoch": 10.42, "learning_rate": 0.0002, "loss": 0.5339, "step": 186900 }, { "epoch": 10.42, "learning_rate": 0.0002, "loss": 0.5327, "step": 187000 }, { "epoch": 10.43, "learning_rate": 0.0002, "loss": 0.5309, "step": 187100 }, { "epoch": 10.43, "learning_rate": 0.0002, "loss": 0.5216, "step": 187200 }, { "epoch": 10.44, "learning_rate": 0.0002, "loss": 0.5325, "step": 187300 }, { "epoch": 10.44, "learning_rate": 0.0002, "loss": 0.5251, "step": 187400 }, { "epoch": 10.45, "learning_rate": 0.0002, "loss": 0.5321, "step": 187500 }, { "epoch": 10.45, "learning_rate": 0.0002, "loss": 0.5425, "step": 187600 }, { "epoch": 10.46, "learning_rate": 0.0002, "loss": 0.5305, "step": 187700 }, { "epoch": 10.47, "learning_rate": 0.0002, "loss": 0.5306, "step": 187800 }, { "epoch": 10.47, "learning_rate": 0.0002, "loss": 0.5345, "step": 187900 }, { "epoch": 10.48, "learning_rate": 0.0002, "loss": 0.5374, "step": 188000 }, { "epoch": 10.48, "learning_rate": 0.0002, "loss": 0.5142, "step": 188100 }, { "epoch": 10.49, "learning_rate": 0.0002, "loss": 0.5354, "step": 188200 }, { "epoch": 10.49, "learning_rate": 0.0002, "loss": 0.5265, "step": 188300 }, { "epoch": 10.5, "learning_rate": 0.0002, "loss": 0.5264, "step": 188400 }, { "epoch": 10.5, "learning_rate": 0.0002, "loss": 0.5377, "step": 188500 }, { "epoch": 10.51, "learning_rate": 0.0002, "loss": 0.5393, "step": 188600 }, { "epoch": 10.52, "learning_rate": 0.0002, "loss": 0.532, "step": 188700 }, { "epoch": 10.52, "learning_rate": 0.0002, "loss": 0.5439, "step": 188800 }, { "epoch": 10.53, "learning_rate": 0.0002, "loss": 0.5392, "step": 188900 }, { "epoch": 10.53, "learning_rate": 0.0002, "loss": 0.5407, "step": 189000 }, { "epoch": 10.54, "learning_rate": 0.0002, "loss": 0.5352, "step": 189100 }, { "epoch": 10.54, "learning_rate": 0.0002, "loss": 0.5384, "step": 189200 }, { "epoch": 10.55, "learning_rate": 0.0002, "loss": 0.5453, "step": 189300 }, { "epoch": 10.55, "learning_rate": 0.0002, "loss": 0.5289, "step": 189400 }, { "epoch": 10.56, "learning_rate": 0.0002, "loss": 0.5363, "step": 189500 }, { "epoch": 10.57, "learning_rate": 0.0002, "loss": 0.5311, "step": 189600 }, { "epoch": 10.57, "learning_rate": 0.0002, "loss": 0.5483, "step": 189700 }, { "epoch": 10.58, "learning_rate": 0.0002, "loss": 0.5446, "step": 189800 }, { "epoch": 10.58, "learning_rate": 0.0002, "loss": 0.5417, "step": 189900 }, { "epoch": 10.59, "learning_rate": 0.0002, "loss": 0.5348, "step": 190000 }, { "epoch": 10.59, "eval_gen_len": 18.99970077797726, "eval_loss": 2.70188045501709, "eval_rouge1": 25.274, "eval_rouge2": 12.3837, "eval_rougeL": 20.8714, "eval_rougeLsum": 23.8766, "eval_runtime": 474.7941, "eval_samples_per_second": 28.155, "eval_steps_per_second": 1.761, "step": 190000 }, { "epoch": 10.59, "learning_rate": 0.0002, "loss": 0.549, "step": 190100 }, { "epoch": 10.6, "learning_rate": 0.0002, "loss": 0.5398, "step": 190200 }, { "epoch": 10.6, "learning_rate": 0.0002, "loss": 0.5492, "step": 190300 }, { "epoch": 10.61, "learning_rate": 0.0002, "loss": 0.5443, "step": 190400 }, { "epoch": 10.62, "learning_rate": 0.0002, "loss": 0.5407, "step": 190500 }, { "epoch": 10.62, "learning_rate": 0.0002, "loss": 0.5348, "step": 190600 }, { "epoch": 10.63, "learning_rate": 0.0002, "loss": 0.5335, "step": 190700 }, { "epoch": 10.63, "learning_rate": 0.0002, "loss": 0.5479, "step": 190800 }, { "epoch": 10.64, "learning_rate": 0.0002, "loss": 0.5297, "step": 190900 }, { "epoch": 10.64, "learning_rate": 0.0002, "loss": 0.5317, "step": 191000 }, { "epoch": 10.65, "learning_rate": 0.0002, "loss": 0.5418, "step": 191100 }, { "epoch": 10.65, "learning_rate": 0.0002, "loss": 0.5397, "step": 191200 }, { "epoch": 10.66, "learning_rate": 0.0002, "loss": 0.5381, "step": 191300 }, { "epoch": 10.67, "learning_rate": 0.0002, "loss": 0.5266, "step": 191400 }, { "epoch": 10.67, "learning_rate": 0.0002, "loss": 0.5321, "step": 191500 }, { "epoch": 10.68, "learning_rate": 0.0002, "loss": 0.5549, "step": 191600 }, { "epoch": 10.68, "learning_rate": 0.0002, "loss": 0.5473, "step": 191700 }, { "epoch": 10.69, "learning_rate": 0.0002, "loss": 0.5292, "step": 191800 }, { "epoch": 10.69, "learning_rate": 0.0002, "loss": 0.5326, "step": 191900 }, { "epoch": 10.7, "learning_rate": 0.0002, "loss": 0.5344, "step": 192000 }, { "epoch": 10.7, "learning_rate": 0.0002, "loss": 0.5493, "step": 192100 }, { "epoch": 10.71, "learning_rate": 0.0002, "loss": 0.5407, "step": 192200 }, { "epoch": 10.72, "learning_rate": 0.0002, "loss": 0.5373, "step": 192300 }, { "epoch": 10.72, "learning_rate": 0.0002, "loss": 0.5357, "step": 192400 }, { "epoch": 10.73, "learning_rate": 0.0002, "loss": 0.5288, "step": 192500 }, { "epoch": 10.73, "learning_rate": 0.0002, "loss": 0.5439, "step": 192600 }, { "epoch": 10.74, "learning_rate": 0.0002, "loss": 0.5521, "step": 192700 }, { "epoch": 10.74, "learning_rate": 0.0002, "loss": 0.551, "step": 192800 }, { "epoch": 10.75, "learning_rate": 0.0002, "loss": 0.5298, "step": 192900 }, { "epoch": 10.76, "learning_rate": 0.0002, "loss": 0.5484, "step": 193000 }, { "epoch": 10.76, "learning_rate": 0.0002, "loss": 0.5351, "step": 193100 }, { "epoch": 10.77, "learning_rate": 0.0002, "loss": 0.5399, "step": 193200 }, { "epoch": 10.77, "learning_rate": 0.0002, "loss": 0.5424, "step": 193300 }, { "epoch": 10.78, "learning_rate": 0.0002, "loss": 0.5401, "step": 193400 }, { "epoch": 10.78, "learning_rate": 0.0002, "loss": 0.5417, "step": 193500 }, { "epoch": 10.79, "learning_rate": 0.0002, "loss": 0.5265, "step": 193600 }, { "epoch": 10.79, "learning_rate": 0.0002, "loss": 0.5315, "step": 193700 }, { "epoch": 10.8, "learning_rate": 0.0002, "loss": 0.5348, "step": 193800 }, { "epoch": 10.81, "learning_rate": 0.0002, "loss": 0.5475, "step": 193900 }, { "epoch": 10.81, "learning_rate": 0.0002, "loss": 0.5381, "step": 194000 }, { "epoch": 10.82, "learning_rate": 0.0002, "loss": 0.5387, "step": 194100 }, { "epoch": 10.82, "learning_rate": 0.0002, "loss": 0.5442, "step": 194200 }, { "epoch": 10.83, "learning_rate": 0.0002, "loss": 0.5332, "step": 194300 }, { "epoch": 10.83, "learning_rate": 0.0002, "loss": 0.5377, "step": 194400 }, { "epoch": 10.84, "learning_rate": 0.0002, "loss": 0.5426, "step": 194500 }, { "epoch": 10.84, "learning_rate": 0.0002, "loss": 0.5512, "step": 194600 }, { "epoch": 10.85, "learning_rate": 0.0002, "loss": 0.5364, "step": 194700 }, { "epoch": 10.86, "learning_rate": 0.0002, "loss": 0.5418, "step": 194800 }, { "epoch": 10.86, "learning_rate": 0.0002, "loss": 0.5566, "step": 194900 }, { "epoch": 10.87, "learning_rate": 0.0002, "loss": 0.5395, "step": 195000 }, { "epoch": 10.87, "learning_rate": 0.0002, "loss": 0.5356, "step": 195100 }, { "epoch": 10.88, "learning_rate": 0.0002, "loss": 0.5351, "step": 195200 }, { "epoch": 10.88, "learning_rate": 0.0002, "loss": 0.5411, "step": 195300 }, { "epoch": 10.89, "learning_rate": 0.0002, "loss": 0.5446, "step": 195400 }, { "epoch": 10.89, "learning_rate": 0.0002, "loss": 0.5496, "step": 195500 }, { "epoch": 10.9, "learning_rate": 0.0002, "loss": 0.5284, "step": 195600 }, { "epoch": 10.91, "learning_rate": 0.0002, "loss": 0.5406, "step": 195700 }, { "epoch": 10.91, "learning_rate": 0.0002, "loss": 0.5461, "step": 195800 }, { "epoch": 10.92, "learning_rate": 0.0002, "loss": 0.5376, "step": 195900 }, { "epoch": 10.92, "learning_rate": 0.0002, "loss": 0.5552, "step": 196000 }, { "epoch": 10.93, "learning_rate": 0.0002, "loss": 0.5493, "step": 196100 }, { "epoch": 10.93, "learning_rate": 0.0002, "loss": 0.5328, "step": 196200 }, { "epoch": 10.94, "learning_rate": 0.0002, "loss": 0.5427, "step": 196300 }, { "epoch": 10.94, "learning_rate": 0.0002, "loss": 0.5296, "step": 196400 }, { "epoch": 10.95, "learning_rate": 0.0002, "loss": 0.5362, "step": 196500 }, { "epoch": 10.96, "learning_rate": 0.0002, "loss": 0.5336, "step": 196600 }, { "epoch": 10.96, "learning_rate": 0.0002, "loss": 0.5329, "step": 196700 }, { "epoch": 10.97, "learning_rate": 0.0002, "loss": 0.5411, "step": 196800 }, { "epoch": 10.97, "learning_rate": 0.0002, "loss": 0.5385, "step": 196900 }, { "epoch": 10.98, "learning_rate": 0.0002, "loss": 0.5469, "step": 197000 }, { "epoch": 10.98, "learning_rate": 0.0002, "loss": 0.5579, "step": 197100 }, { "epoch": 10.99, "learning_rate": 0.0002, "loss": 0.548, "step": 197200 }, { "epoch": 10.99, "learning_rate": 0.0002, "loss": 0.5447, "step": 197300 }, { "epoch": 11.0, "learning_rate": 0.0002, "loss": 0.5431, "step": 197400 }, { "epoch": 11.01, "learning_rate": 0.0002, "loss": 0.5184, "step": 197500 }, { "epoch": 11.01, "learning_rate": 0.0002, "loss": 0.5116, "step": 197600 }, { "epoch": 11.02, "learning_rate": 0.0002, "loss": 0.5287, "step": 197700 }, { "epoch": 11.02, "learning_rate": 0.0002, "loss": 0.5241, "step": 197800 }, { "epoch": 11.03, "learning_rate": 0.0002, "loss": 0.5229, "step": 197900 }, { "epoch": 11.03, "learning_rate": 0.0002, "loss": 0.5249, "step": 198000 }, { "epoch": 11.04, "learning_rate": 0.0002, "loss": 0.5093, "step": 198100 }, { "epoch": 11.04, "learning_rate": 0.0002, "loss": 0.5304, "step": 198200 }, { "epoch": 11.05, "learning_rate": 0.0002, "loss": 0.5147, "step": 198300 }, { "epoch": 11.06, "learning_rate": 0.0002, "loss": 0.5274, "step": 198400 }, { "epoch": 11.06, "learning_rate": 0.0002, "loss": 0.5266, "step": 198500 }, { "epoch": 11.07, "learning_rate": 0.0002, "loss": 0.507, "step": 198600 }, { "epoch": 11.07, "learning_rate": 0.0002, "loss": 0.5189, "step": 198700 }, { "epoch": 11.08, "learning_rate": 0.0002, "loss": 0.5388, "step": 198800 }, { "epoch": 11.08, "learning_rate": 0.0002, "loss": 0.5069, "step": 198900 }, { "epoch": 11.09, "learning_rate": 0.0002, "loss": 0.5212, "step": 199000 }, { "epoch": 11.1, "learning_rate": 0.0002, "loss": 0.5088, "step": 199100 }, { "epoch": 11.1, "learning_rate": 0.0002, "loss": 0.5189, "step": 199200 }, { "epoch": 11.11, "learning_rate": 0.0002, "loss": 0.5218, "step": 199300 }, { "epoch": 11.11, "learning_rate": 0.0002, "loss": 0.5197, "step": 199400 }, { "epoch": 11.12, "learning_rate": 0.0002, "loss": 0.5246, "step": 199500 }, { "epoch": 11.12, "learning_rate": 0.0002, "loss": 0.5255, "step": 199600 }, { "epoch": 11.13, "learning_rate": 0.0002, "loss": 0.5164, "step": 199700 }, { "epoch": 11.13, "learning_rate": 0.0002, "loss": 0.5201, "step": 199800 }, { "epoch": 11.14, "learning_rate": 0.0002, "loss": 0.5277, "step": 199900 }, { "epoch": 11.15, "learning_rate": 0.0002, "loss": 0.5277, "step": 200000 }, { "epoch": 11.15, "eval_gen_len": 18.999476361460204, "eval_loss": 2.7242300510406494, "eval_rouge1": 25.376, "eval_rouge2": 12.4944, "eval_rougeL": 20.9774, "eval_rougeLsum": 23.985, "eval_runtime": 476.9451, "eval_samples_per_second": 28.028, "eval_steps_per_second": 1.753, "step": 200000 } ], "max_steps": 897250, "num_train_epochs": 50, "total_flos": 8.661379023313306e+17, "trial_name": null, "trial_params": null }