{ "best_metric": null, "best_model_checkpoint": null, "epoch": 39.20953575909661, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0005, "loss": 2.4473, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 2.3281, "step": 200 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 2.3071, "step": 300 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 2.2586, "step": 400 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.2505, "step": 500 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.2206, "step": 600 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.2542, "step": 700 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.2092, "step": 800 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.1569, "step": 900 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 2.1698, "step": 1000 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.1675, "step": 1100 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.1551, "step": 1200 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.1548, "step": 1300 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.1338, "step": 1400 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.0949, "step": 1500 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.1218, "step": 1600 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.1147, "step": 1700 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.1143, "step": 1800 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.1062, "step": 1900 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.0921, "step": 2000 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.0893, "step": 2100 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.1006, "step": 2200 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.0757, "step": 2300 }, { "epoch": 0.19, "learning_rate": 0.0005, "loss": 2.0428, "step": 2400 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.059, "step": 2500 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.0686, "step": 2600 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.057, "step": 2700 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.0589, "step": 2800 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.0715, "step": 2900 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.0384, "step": 3000 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.0547, "step": 3100 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.0339, "step": 3200 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.0184, "step": 3300 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.0153, "step": 3400 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.0178, "step": 3500 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.0025, "step": 3600 }, { "epoch": 0.29, "learning_rate": 0.0005, "loss": 2.0144, "step": 3700 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.0053, "step": 3800 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 1.99, "step": 3900 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 1.9965, "step": 4000 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 2.0051, "step": 4100 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.014, "step": 4200 }, { "epoch": 0.34, "learning_rate": 0.0005, "loss": 1.973, "step": 4300 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 1.9985, "step": 4400 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.0279, "step": 4500 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 1.9903, "step": 4600 }, { "epoch": 0.37, "learning_rate": 0.0005, "loss": 1.9699, "step": 4700 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 1.9834, "step": 4800 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 1.9638, "step": 4900 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 1.9783, "step": 5000 }, { "epoch": 0.4, "learning_rate": 0.0005, "loss": 1.9678, "step": 5100 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 1.9462, "step": 5200 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 1.9501, "step": 5300 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 1.9362, "step": 5400 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 1.976, "step": 5500 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 1.9635, "step": 5600 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 1.9401, "step": 5700 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 1.9454, "step": 5800 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 1.9544, "step": 5900 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 1.9394, "step": 6000 }, { "epoch": 0.48, "learning_rate": 0.0005, "loss": 1.943, "step": 6100 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 1.9451, "step": 6200 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 1.9259, "step": 6300 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 1.937, "step": 6400 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 1.9343, "step": 6500 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 1.9644, "step": 6600 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 1.9195, "step": 6700 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 1.9413, "step": 6800 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 1.9294, "step": 6900 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 1.9241, "step": 7000 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 1.9278, "step": 7100 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 1.9228, "step": 7200 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 1.8982, "step": 7300 }, { "epoch": 0.58, "learning_rate": 0.0005, "loss": 1.8915, "step": 7400 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 1.9206, "step": 7500 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 1.9225, "step": 7600 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 1.9032, "step": 7700 }, { "epoch": 0.61, "learning_rate": 0.0005, "loss": 1.9072, "step": 7800 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 1.9003, "step": 7900 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 1.9222, "step": 8000 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 1.8978, "step": 8100 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 1.8991, "step": 8200 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 1.8978, "step": 8300 }, { "epoch": 0.66, "learning_rate": 0.0005, "loss": 1.878, "step": 8400 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 1.921, "step": 8500 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 1.878, "step": 8600 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 1.9043, "step": 8700 }, { "epoch": 0.69, "learning_rate": 0.0005, "loss": 1.8767, "step": 8800 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 1.879, "step": 8900 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 1.9042, "step": 9000 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 1.8796, "step": 9100 }, { "epoch": 0.72, "learning_rate": 0.0005, "loss": 1.8692, "step": 9200 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 1.8757, "step": 9300 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 1.8602, "step": 9400 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 1.8847, "step": 9500 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 1.8655, "step": 9600 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 1.8683, "step": 9700 }, { "epoch": 0.77, "learning_rate": 0.0005, "loss": 1.8654, "step": 9800 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 1.8712, "step": 9900 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 1.8636, "step": 10000 }, { "epoch": 0.78, "eval_gen_len": 18.695153173832434, "eval_loss": 2.3997342586517334, "eval_rouge1": 31.2993, "eval_rouge2": 10.395, "eval_rougeL": 25.2713, "eval_rougeLsum": 25.2611, "eval_runtime": 363.1513, "eval_samples_per_second": 31.191, "eval_steps_per_second": 1.95, "step": 10000 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 1.8802, "step": 10100 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 1.8608, "step": 10200 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 1.8651, "step": 10300 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 1.8559, "step": 10400 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 1.8508, "step": 10500 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 1.872, "step": 10600 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 1.8624, "step": 10700 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 1.8603, "step": 10800 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 1.85, "step": 10900 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 1.8705, "step": 11000 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 1.8259, "step": 11100 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 1.8653, "step": 11200 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 1.8511, "step": 11300 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 1.8401, "step": 11400 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 1.8473, "step": 11500 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 1.8077, "step": 11600 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 1.8412, "step": 11700 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 1.8516, "step": 11800 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 1.8604, "step": 11900 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 1.8361, "step": 12000 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.8098, "step": 12100 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 1.839, "step": 12200 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 1.8477, "step": 12300 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.8398, "step": 12400 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 1.8353, "step": 12500 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.8528, "step": 12600 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.8446, "step": 12700 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.7852, "step": 12800 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 1.7505, "step": 12900 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.7305, "step": 13000 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.7235, "step": 13100 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.7329, "step": 13200 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.7175, "step": 13300 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.7238, "step": 13400 }, { "epoch": 1.06, "learning_rate": 0.0005, "loss": 1.7691, "step": 13500 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.7501, "step": 13600 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.7305, "step": 13700 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.7305, "step": 13800 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 1.7437, "step": 13900 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.7608, "step": 14000 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.7506, "step": 14100 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.7368, "step": 14200 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 1.7633, "step": 14300 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.7264, "step": 14400 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.7292, "step": 14500 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.7532, "step": 14600 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.741, "step": 14700 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.7643, "step": 14800 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 1.7447, "step": 14900 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.7263, "step": 15000 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.7093, "step": 15100 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.73, "step": 15200 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 1.7178, "step": 15300 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.7379, "step": 15400 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.7404, "step": 15500 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.7497, "step": 15600 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.7417, "step": 15700 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.7136, "step": 15800 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.7516, "step": 15900 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.7399, "step": 16000 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.7556, "step": 16100 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.73, "step": 16200 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 1.742, "step": 16300 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.7195, "step": 16400 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.7433, "step": 16500 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 1.7207, "step": 16600 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.7332, "step": 16700 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.7288, "step": 16800 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.7386, "step": 16900 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.7213, "step": 17000 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 1.7384, "step": 17100 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.7298, "step": 17200 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.7055, "step": 17300 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.7337, "step": 17400 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.7299, "step": 17500 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 1.7272, "step": 17600 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.7242, "step": 17700 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.7312, "step": 17800 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.7404, "step": 17900 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 1.7056, "step": 18000 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.7389, "step": 18100 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 1.7082, "step": 18200 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.6982, "step": 18300 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.732, "step": 18400 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.7388, "step": 18500 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 1.709, "step": 18600 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.7028, "step": 18700 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.7392, "step": 18800 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 1.748, "step": 18900 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 1.7054, "step": 19000 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.7273, "step": 19100 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.7165, "step": 19200 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.7423, "step": 19300 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.7209, "step": 19400 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.7042, "step": 19500 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.7183, "step": 19600 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.7267, "step": 19700 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.7182, "step": 19800 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.7098, "step": 19900 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 1.7148, "step": 20000 }, { "epoch": 1.57, "eval_gen_len": 18.688443541979343, "eval_loss": 2.347717523574829, "eval_rouge1": 32.4357, "eval_rouge2": 11.3106, "eval_rougeL": 26.2749, "eval_rougeLsum": 26.2677, "eval_runtime": 345.5194, "eval_samples_per_second": 32.783, "eval_steps_per_second": 2.049, "step": 20000 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.7099, "step": 20100 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.7115, "step": 20200 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 1.7282, "step": 20300 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.7062, "step": 20400 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.7236, "step": 20500 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.7141, "step": 20600 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.7273, "step": 20700 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 1.7074, "step": 20800 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.7209, "step": 20900 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.7173, "step": 21000 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.6918, "step": 21100 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.6768, "step": 21200 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 1.7153, "step": 21300 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.738, "step": 21400 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.6996, "step": 21500 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.7035, "step": 21600 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 1.6942, "step": 21700 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.6875, "step": 21800 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.697, "step": 21900 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.6911, "step": 22000 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.6806, "step": 22100 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.7186, "step": 22200 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 1.7074, "step": 22300 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.7027, "step": 22400 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.7251, "step": 22500 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 1.6959, "step": 22600 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 1.7194, "step": 22700 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.6768, "step": 22800 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.6864, "step": 22900 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.7057, "step": 23000 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 1.702, "step": 23100 }, { "epoch": 1.82, "learning_rate": 0.0005, "loss": 1.7105, "step": 23200 }, { "epoch": 1.83, "learning_rate": 0.0005, "loss": 1.7015, "step": 23300 }, { "epoch": 1.84, "learning_rate": 0.0005, "loss": 1.7065, "step": 23400 }, { "epoch": 1.84, "learning_rate": 0.0005, "loss": 1.6636, "step": 23500 }, { "epoch": 1.85, "learning_rate": 0.0005, "loss": 1.6822, "step": 23600 }, { "epoch": 1.86, "learning_rate": 0.0005, "loss": 1.7033, "step": 23700 }, { "epoch": 1.87, "learning_rate": 0.0005, "loss": 1.7382, "step": 23800 }, { "epoch": 1.87, "learning_rate": 0.0005, "loss": 1.6929, "step": 23900 }, { "epoch": 1.88, "learning_rate": 0.0005, "loss": 1.6973, "step": 24000 }, { "epoch": 1.89, "learning_rate": 0.0005, "loss": 1.6824, "step": 24100 }, { "epoch": 1.9, "learning_rate": 0.0005, "loss": 1.7013, "step": 24200 }, { "epoch": 1.91, "learning_rate": 0.0005, "loss": 1.7004, "step": 24300 }, { "epoch": 1.91, "learning_rate": 0.0005, "loss": 1.6694, "step": 24400 }, { "epoch": 1.92, "learning_rate": 0.0005, "loss": 1.6998, "step": 24500 }, { "epoch": 1.93, "learning_rate": 0.0005, "loss": 1.683, "step": 24600 }, { "epoch": 1.94, "learning_rate": 0.0005, "loss": 1.6925, "step": 24700 }, { "epoch": 1.94, "learning_rate": 0.0005, "loss": 1.6659, "step": 24800 }, { "epoch": 1.95, "learning_rate": 0.0005, "loss": 1.6905, "step": 24900 }, { "epoch": 1.96, "learning_rate": 0.0005, "loss": 1.6997, "step": 25000 }, { "epoch": 1.97, "learning_rate": 0.0005, "loss": 1.6695, "step": 25100 }, { "epoch": 1.98, "learning_rate": 0.0005, "loss": 1.6833, "step": 25200 }, { "epoch": 1.98, "learning_rate": 0.0005, "loss": 1.6832, "step": 25300 }, { "epoch": 1.99, "learning_rate": 0.0005, "loss": 1.7041, "step": 25400 }, { "epoch": 2.0, "learning_rate": 0.0005, "loss": 1.6848, "step": 25500 }, { "epoch": 2.01, "learning_rate": 0.0005, "loss": 1.5853, "step": 25600 }, { "epoch": 2.02, "learning_rate": 0.0005, "loss": 1.5959, "step": 25700 }, { "epoch": 2.02, "learning_rate": 0.0005, "loss": 1.6016, "step": 25800 }, { "epoch": 2.03, "learning_rate": 0.0005, "loss": 1.5721, "step": 25900 }, { "epoch": 2.04, "learning_rate": 0.0005, "loss": 1.5981, "step": 26000 }, { "epoch": 2.05, "learning_rate": 0.0005, "loss": 1.5845, "step": 26100 }, { "epoch": 2.05, "learning_rate": 0.0005, "loss": 1.6169, "step": 26200 }, { "epoch": 2.06, "learning_rate": 0.0005, "loss": 1.5921, "step": 26300 }, { "epoch": 2.07, "learning_rate": 0.0005, "loss": 1.6167, "step": 26400 }, { "epoch": 2.08, "learning_rate": 0.0005, "loss": 1.6052, "step": 26500 }, { "epoch": 2.09, "learning_rate": 0.0005, "loss": 1.6189, "step": 26600 }, { "epoch": 2.09, "learning_rate": 0.0005, "loss": 1.6091, "step": 26700 }, { "epoch": 2.1, "learning_rate": 0.0005, "loss": 1.6312, "step": 26800 }, { "epoch": 2.11, "learning_rate": 0.0005, "loss": 1.5886, "step": 26900 }, { "epoch": 2.12, "learning_rate": 0.0005, "loss": 1.6133, "step": 27000 }, { "epoch": 2.13, "learning_rate": 0.0005, "loss": 1.6036, "step": 27100 }, { "epoch": 2.13, "learning_rate": 0.0005, "loss": 1.5927, "step": 27200 }, { "epoch": 2.14, "learning_rate": 0.0005, "loss": 1.5909, "step": 27300 }, { "epoch": 2.15, "learning_rate": 0.0005, "loss": 1.6093, "step": 27400 }, { "epoch": 2.16, "learning_rate": 0.0005, "loss": 1.5991, "step": 27500 }, { "epoch": 2.16, "learning_rate": 0.0005, "loss": 1.5617, "step": 27600 }, { "epoch": 2.17, "learning_rate": 0.0005, "loss": 1.5947, "step": 27700 }, { "epoch": 2.18, "learning_rate": 0.0005, "loss": 1.6055, "step": 27800 }, { "epoch": 2.19, "learning_rate": 0.0005, "loss": 1.6014, "step": 27900 }, { "epoch": 2.2, "learning_rate": 0.0005, "loss": 1.6091, "step": 28000 }, { "epoch": 2.2, "learning_rate": 0.0005, "loss": 1.6327, "step": 28100 }, { "epoch": 2.21, "learning_rate": 0.0005, "loss": 1.6125, "step": 28200 }, { "epoch": 2.22, "learning_rate": 0.0005, "loss": 1.5928, "step": 28300 }, { "epoch": 2.23, "learning_rate": 0.0005, "loss": 1.608, "step": 28400 }, { "epoch": 2.23, "learning_rate": 0.0005, "loss": 1.5971, "step": 28500 }, { "epoch": 2.24, "learning_rate": 0.0005, "loss": 1.6227, "step": 28600 }, { "epoch": 2.25, "learning_rate": 0.0005, "loss": 1.6162, "step": 28700 }, { "epoch": 2.26, "learning_rate": 0.0005, "loss": 1.5908, "step": 28800 }, { "epoch": 2.27, "learning_rate": 0.0005, "loss": 1.6052, "step": 28900 }, { "epoch": 2.27, "learning_rate": 0.0005, "loss": 1.6251, "step": 29000 }, { "epoch": 2.28, "learning_rate": 0.0005, "loss": 1.6072, "step": 29100 }, { "epoch": 2.29, "learning_rate": 0.0005, "loss": 1.598, "step": 29200 }, { "epoch": 2.3, "learning_rate": 0.0005, "loss": 1.6139, "step": 29300 }, { "epoch": 2.31, "learning_rate": 0.0005, "loss": 1.6048, "step": 29400 }, { "epoch": 2.31, "learning_rate": 0.0005, "loss": 1.6331, "step": 29500 }, { "epoch": 2.32, "learning_rate": 0.0005, "loss": 1.6119, "step": 29600 }, { "epoch": 2.33, "learning_rate": 0.0005, "loss": 1.6086, "step": 29700 }, { "epoch": 2.34, "learning_rate": 0.0005, "loss": 1.6392, "step": 29800 }, { "epoch": 2.34, "learning_rate": 0.0005, "loss": 1.6186, "step": 29900 }, { "epoch": 2.35, "learning_rate": 0.0005, "loss": 1.5952, "step": 30000 }, { "epoch": 2.35, "eval_gen_len": 18.797475059592124, "eval_loss": 2.3347554206848145, "eval_rouge1": 32.7907, "eval_rouge2": 11.73, "eval_rougeL": 26.6762, "eval_rougeLsum": 26.6615, "eval_runtime": 345.6461, "eval_samples_per_second": 32.771, "eval_steps_per_second": 2.048, "step": 30000 }, { "epoch": 2.36, "learning_rate": 0.0005, "loss": 1.604, "step": 30100 }, { "epoch": 2.37, "learning_rate": 0.0005, "loss": 1.5966, "step": 30200 }, { "epoch": 2.38, "learning_rate": 0.0005, "loss": 1.6006, "step": 30300 }, { "epoch": 2.38, "learning_rate": 0.0005, "loss": 1.5954, "step": 30400 }, { "epoch": 2.39, "learning_rate": 0.0005, "loss": 1.5982, "step": 30500 }, { "epoch": 2.4, "learning_rate": 0.0005, "loss": 1.5859, "step": 30600 }, { "epoch": 2.41, "learning_rate": 0.0005, "loss": 1.5999, "step": 30700 }, { "epoch": 2.42, "learning_rate": 0.0005, "loss": 1.6032, "step": 30800 }, { "epoch": 2.42, "learning_rate": 0.0005, "loss": 1.5834, "step": 30900 }, { "epoch": 2.43, "learning_rate": 0.0005, "loss": 1.5798, "step": 31000 }, { "epoch": 2.44, "learning_rate": 0.0005, "loss": 1.5805, "step": 31100 }, { "epoch": 2.45, "learning_rate": 0.0005, "loss": 1.6271, "step": 31200 }, { "epoch": 2.45, "learning_rate": 0.0005, "loss": 1.5944, "step": 31300 }, { "epoch": 2.46, "learning_rate": 0.0005, "loss": 1.6272, "step": 31400 }, { "epoch": 2.47, "learning_rate": 0.0005, "loss": 1.6026, "step": 31500 }, { "epoch": 2.48, "learning_rate": 0.0005, "loss": 1.6003, "step": 31600 }, { "epoch": 2.49, "learning_rate": 0.0005, "loss": 1.5817, "step": 31700 }, { "epoch": 2.49, "learning_rate": 0.0005, "loss": 1.5677, "step": 31800 }, { "epoch": 2.5, "learning_rate": 0.0005, "loss": 1.578, "step": 31900 }, { "epoch": 2.51, "learning_rate": 0.0005, "loss": 1.6262, "step": 32000 }, { "epoch": 2.52, "learning_rate": 0.0005, "loss": 1.6017, "step": 32100 }, { "epoch": 2.53, "learning_rate": 0.0005, "loss": 1.62, "step": 32200 }, { "epoch": 2.53, "learning_rate": 0.0005, "loss": 1.623, "step": 32300 }, { "epoch": 2.54, "learning_rate": 0.0005, "loss": 1.594, "step": 32400 }, { "epoch": 2.55, "learning_rate": 0.0005, "loss": 1.589, "step": 32500 }, { "epoch": 2.56, "learning_rate": 0.0005, "loss": 1.5782, "step": 32600 }, { "epoch": 2.56, "learning_rate": 0.0005, "loss": 1.5941, "step": 32700 }, { "epoch": 2.57, "learning_rate": 0.0005, "loss": 1.6033, "step": 32800 }, { "epoch": 2.58, "learning_rate": 0.0005, "loss": 1.6129, "step": 32900 }, { "epoch": 2.59, "learning_rate": 0.0005, "loss": 1.6148, "step": 33000 }, { "epoch": 2.6, "learning_rate": 0.0005, "loss": 1.6021, "step": 33100 }, { "epoch": 2.6, "learning_rate": 0.0005, "loss": 1.6022, "step": 33200 }, { "epoch": 2.61, "learning_rate": 0.0005, "loss": 1.5881, "step": 33300 }, { "epoch": 2.62, "learning_rate": 0.0005, "loss": 1.5908, "step": 33400 }, { "epoch": 2.63, "learning_rate": 0.0005, "loss": 1.6188, "step": 33500 }, { "epoch": 2.63, "learning_rate": 0.0005, "loss": 1.583, "step": 33600 }, { "epoch": 2.64, "learning_rate": 0.0005, "loss": 1.587, "step": 33700 }, { "epoch": 2.65, "learning_rate": 0.0005, "loss": 1.5957, "step": 33800 }, { "epoch": 2.66, "learning_rate": 0.0005, "loss": 1.6009, "step": 33900 }, { "epoch": 2.67, "learning_rate": 0.0005, "loss": 1.62, "step": 34000 }, { "epoch": 2.67, "learning_rate": 0.0005, "loss": 1.6047, "step": 34100 }, { "epoch": 2.68, "learning_rate": 0.0005, "loss": 1.5877, "step": 34200 }, { "epoch": 2.69, "learning_rate": 0.0005, "loss": 1.6295, "step": 34300 }, { "epoch": 2.7, "learning_rate": 0.0005, "loss": 1.6012, "step": 34400 }, { "epoch": 2.71, "learning_rate": 0.0005, "loss": 1.6089, "step": 34500 }, { "epoch": 2.71, "learning_rate": 0.0005, "loss": 1.5831, "step": 34600 }, { "epoch": 2.72, "learning_rate": 0.0005, "loss": 1.6055, "step": 34700 }, { "epoch": 2.73, "learning_rate": 0.0005, "loss": 1.6082, "step": 34800 }, { "epoch": 2.74, "learning_rate": 0.0005, "loss": 1.5876, "step": 34900 }, { "epoch": 2.74, "learning_rate": 0.0005, "loss": 1.5792, "step": 35000 }, { "epoch": 2.75, "learning_rate": 0.0005, "loss": 1.5907, "step": 35100 }, { "epoch": 2.76, "learning_rate": 0.0005, "loss": 1.5866, "step": 35200 }, { "epoch": 2.77, "learning_rate": 0.0005, "loss": 1.5791, "step": 35300 }, { "epoch": 2.78, "learning_rate": 0.0005, "loss": 1.5899, "step": 35400 }, { "epoch": 2.78, "learning_rate": 0.0005, "loss": 1.6027, "step": 35500 }, { "epoch": 2.79, "learning_rate": 0.0005, "loss": 1.6122, "step": 35600 }, { "epoch": 2.8, "learning_rate": 0.0005, "loss": 1.5758, "step": 35700 }, { "epoch": 2.81, "learning_rate": 0.0005, "loss": 1.5936, "step": 35800 }, { "epoch": 2.82, "learning_rate": 0.0005, "loss": 1.5834, "step": 35900 }, { "epoch": 2.82, "learning_rate": 0.0005, "loss": 1.5836, "step": 36000 }, { "epoch": 2.83, "learning_rate": 0.0005, "loss": 1.6074, "step": 36100 }, { "epoch": 2.84, "learning_rate": 0.0005, "loss": 1.5874, "step": 36200 }, { "epoch": 2.85, "learning_rate": 0.0005, "loss": 1.5947, "step": 36300 }, { "epoch": 2.85, "learning_rate": 0.0005, "loss": 1.6059, "step": 36400 }, { "epoch": 2.86, "learning_rate": 0.0005, "loss": 1.6163, "step": 36500 }, { "epoch": 2.87, "learning_rate": 0.0005, "loss": 1.621, "step": 36600 }, { "epoch": 2.88, "learning_rate": 0.0005, "loss": 1.6084, "step": 36700 }, { "epoch": 2.89, "learning_rate": 0.0005, "loss": 1.577, "step": 36800 }, { "epoch": 2.89, "learning_rate": 0.0005, "loss": 1.6052, "step": 36900 }, { "epoch": 2.9, "learning_rate": 0.0005, "loss": 1.596, "step": 37000 }, { "epoch": 2.91, "learning_rate": 0.0005, "loss": 1.5978, "step": 37100 }, { "epoch": 2.92, "learning_rate": 0.0005, "loss": 1.5814, "step": 37200 }, { "epoch": 2.93, "learning_rate": 0.0005, "loss": 1.5706, "step": 37300 }, { "epoch": 2.93, "learning_rate": 0.0005, "loss": 1.5826, "step": 37400 }, { "epoch": 2.94, "learning_rate": 0.0005, "loss": 1.5973, "step": 37500 }, { "epoch": 2.95, "learning_rate": 0.0005, "loss": 1.5973, "step": 37600 }, { "epoch": 2.96, "learning_rate": 0.0005, "loss": 1.5915, "step": 37700 }, { "epoch": 2.96, "learning_rate": 0.0005, "loss": 1.566, "step": 37800 }, { "epoch": 2.97, "learning_rate": 0.0005, "loss": 1.5978, "step": 37900 }, { "epoch": 2.98, "learning_rate": 0.0005, "loss": 1.6077, "step": 38000 }, { "epoch": 2.99, "learning_rate": 0.0005, "loss": 1.5832, "step": 38100 }, { "epoch": 3.0, "learning_rate": 0.0005, "loss": 1.598, "step": 38200 }, { "epoch": 3.0, "learning_rate": 0.0005, "loss": 1.5534, "step": 38300 }, { "epoch": 3.01, "learning_rate": 0.0005, "loss": 1.5079, "step": 38400 }, { "epoch": 3.02, "learning_rate": 0.0005, "loss": 1.5098, "step": 38500 }, { "epoch": 3.03, "learning_rate": 0.0005, "loss": 1.5127, "step": 38600 }, { "epoch": 3.03, "learning_rate": 0.0005, "loss": 1.506, "step": 38700 }, { "epoch": 3.04, "learning_rate": 0.0005, "loss": 1.483, "step": 38800 }, { "epoch": 3.05, "learning_rate": 0.0005, "loss": 1.5151, "step": 38900 }, { "epoch": 3.06, "learning_rate": 0.0005, "loss": 1.4912, "step": 39000 }, { "epoch": 3.07, "learning_rate": 0.0005, "loss": 1.5238, "step": 39100 }, { "epoch": 3.07, "learning_rate": 0.0005, "loss": 1.4906, "step": 39200 }, { "epoch": 3.08, "learning_rate": 0.0005, "loss": 1.5101, "step": 39300 }, { "epoch": 3.09, "learning_rate": 0.0005, "loss": 1.4918, "step": 39400 }, { "epoch": 3.1, "learning_rate": 0.0005, "loss": 1.5124, "step": 39500 }, { "epoch": 3.11, "learning_rate": 0.0005, "loss": 1.5103, "step": 39600 }, { "epoch": 3.11, "learning_rate": 0.0005, "loss": 1.5291, "step": 39700 }, { "epoch": 3.12, "learning_rate": 0.0005, "loss": 1.5165, "step": 39800 }, { "epoch": 3.13, "learning_rate": 0.0005, "loss": 1.4857, "step": 39900 }, { "epoch": 3.14, "learning_rate": 0.0005, "loss": 1.517, "step": 40000 }, { "epoch": 3.14, "eval_gen_len": 18.765516023660282, "eval_loss": 2.329390525817871, "eval_rouge1": 33.5163, "eval_rouge2": 12.3458, "eval_rougeL": 27.2141, "eval_rougeLsum": 27.2023, "eval_runtime": 346.5778, "eval_samples_per_second": 32.682, "eval_steps_per_second": 2.043, "step": 40000 }, { "epoch": 3.14, "learning_rate": 0.0005, "loss": 1.541, "step": 40100 }, { "epoch": 3.15, "learning_rate": 0.0005, "loss": 1.4803, "step": 40200 }, { "epoch": 3.16, "learning_rate": 0.0005, "loss": 1.5181, "step": 40300 }, { "epoch": 3.17, "learning_rate": 0.0005, "loss": 1.514, "step": 40400 }, { "epoch": 3.18, "learning_rate": 0.0005, "loss": 1.5116, "step": 40500 }, { "epoch": 3.18, "learning_rate": 0.0005, "loss": 1.4864, "step": 40600 }, { "epoch": 3.19, "learning_rate": 0.0005, "loss": 1.5472, "step": 40700 }, { "epoch": 3.2, "learning_rate": 0.0005, "loss": 1.5104, "step": 40800 }, { "epoch": 3.21, "learning_rate": 0.0005, "loss": 1.4961, "step": 40900 }, { "epoch": 3.22, "learning_rate": 0.0005, "loss": 1.4963, "step": 41000 }, { "epoch": 3.22, "learning_rate": 0.0005, "loss": 1.523, "step": 41100 }, { "epoch": 3.23, "learning_rate": 0.0005, "loss": 1.5071, "step": 41200 }, { "epoch": 3.24, "learning_rate": 0.0005, "loss": 1.5086, "step": 41300 }, { "epoch": 3.25, "learning_rate": 0.0005, "loss": 1.5413, "step": 41400 }, { "epoch": 3.25, "learning_rate": 0.0005, "loss": 1.5059, "step": 41500 }, { "epoch": 3.26, "learning_rate": 0.0005, "loss": 1.4852, "step": 41600 }, { "epoch": 3.27, "learning_rate": 0.0005, "loss": 1.5075, "step": 41700 }, { "epoch": 3.28, "learning_rate": 0.0005, "loss": 1.5452, "step": 41800 }, { "epoch": 3.29, "learning_rate": 0.0005, "loss": 1.519, "step": 41900 }, { "epoch": 3.29, "learning_rate": 0.0005, "loss": 1.5199, "step": 42000 }, { "epoch": 3.3, "learning_rate": 0.0005, "loss": 1.5053, "step": 42100 }, { "epoch": 3.31, "learning_rate": 0.0005, "loss": 1.5043, "step": 42200 }, { "epoch": 3.32, "learning_rate": 0.0005, "loss": 1.5195, "step": 42300 }, { "epoch": 3.32, "learning_rate": 0.0005, "loss": 1.4911, "step": 42400 }, { "epoch": 3.33, "learning_rate": 0.0005, "loss": 1.5378, "step": 42500 }, { "epoch": 3.34, "learning_rate": 0.0005, "loss": 1.5144, "step": 42600 }, { "epoch": 3.35, "learning_rate": 0.0005, "loss": 1.5232, "step": 42700 }, { "epoch": 3.36, "learning_rate": 0.0005, "loss": 1.5226, "step": 42800 }, { "epoch": 3.36, "learning_rate": 0.0005, "loss": 1.5166, "step": 42900 }, { "epoch": 3.37, "learning_rate": 0.0005, "loss": 1.5221, "step": 43000 }, { "epoch": 3.38, "learning_rate": 0.0005, "loss": 1.5047, "step": 43100 }, { "epoch": 3.39, "learning_rate": 0.0005, "loss": 1.5198, "step": 43200 }, { "epoch": 3.4, "learning_rate": 0.0005, "loss": 1.5085, "step": 43300 }, { "epoch": 3.4, "learning_rate": 0.0005, "loss": 1.5374, "step": 43400 }, { "epoch": 3.41, "learning_rate": 0.0005, "loss": 1.5015, "step": 43500 }, { "epoch": 3.42, "learning_rate": 0.0005, "loss": 1.5371, "step": 43600 }, { "epoch": 3.43, "learning_rate": 0.0005, "loss": 1.5167, "step": 43700 }, { "epoch": 3.43, "learning_rate": 0.0005, "loss": 1.5109, "step": 43800 }, { "epoch": 3.44, "learning_rate": 0.0005, "loss": 1.5245, "step": 43900 }, { "epoch": 3.45, "learning_rate": 0.0005, "loss": 1.516, "step": 44000 }, { "epoch": 3.46, "learning_rate": 0.0005, "loss": 1.5132, "step": 44100 }, { "epoch": 3.47, "learning_rate": 0.0005, "loss": 1.524, "step": 44200 }, { "epoch": 3.47, "learning_rate": 0.0005, "loss": 1.5232, "step": 44300 }, { "epoch": 3.48, "learning_rate": 0.0005, "loss": 1.4996, "step": 44400 }, { "epoch": 3.49, "learning_rate": 0.0005, "loss": 1.5283, "step": 44500 }, { "epoch": 3.5, "learning_rate": 0.0005, "loss": 1.5137, "step": 44600 }, { "epoch": 3.51, "learning_rate": 0.0005, "loss": 1.5107, "step": 44700 }, { "epoch": 3.51, "learning_rate": 0.0005, "loss": 1.5086, "step": 44800 }, { "epoch": 3.52, "learning_rate": 0.0005, "loss": 1.5199, "step": 44900 }, { "epoch": 3.53, "learning_rate": 0.0005, "loss": 1.4955, "step": 45000 }, { "epoch": 3.54, "learning_rate": 0.0005, "loss": 1.5341, "step": 45100 }, { "epoch": 3.54, "learning_rate": 0.0005, "loss": 1.5085, "step": 45200 }, { "epoch": 3.55, "learning_rate": 0.0005, "loss": 1.5202, "step": 45300 }, { "epoch": 3.56, "learning_rate": 0.0005, "loss": 1.533, "step": 45400 }, { "epoch": 3.57, "learning_rate": 0.0005, "loss": 1.5463, "step": 45500 }, { "epoch": 3.58, "learning_rate": 0.0005, "loss": 1.5416, "step": 45600 }, { "epoch": 3.58, "learning_rate": 0.0005, "loss": 1.541, "step": 45700 }, { "epoch": 3.59, "learning_rate": 0.0005, "loss": 1.5129, "step": 45800 }, { "epoch": 3.6, "learning_rate": 0.0005, "loss": 1.5249, "step": 45900 }, { "epoch": 3.61, "learning_rate": 0.0005, "loss": 1.5039, "step": 46000 }, { "epoch": 3.62, "learning_rate": 0.0005, "loss": 1.5128, "step": 46100 }, { "epoch": 3.62, "learning_rate": 0.0005, "loss": 1.51, "step": 46200 }, { "epoch": 3.63, "learning_rate": 0.0005, "loss": 1.5107, "step": 46300 }, { "epoch": 3.64, "learning_rate": 0.0005, "loss": 1.5166, "step": 46400 }, { "epoch": 3.65, "learning_rate": 0.0005, "loss": 1.5162, "step": 46500 }, { "epoch": 3.65, "learning_rate": 0.0005, "loss": 1.5309, "step": 46600 }, { "epoch": 3.66, "learning_rate": 0.0005, "loss": 1.5266, "step": 46700 }, { "epoch": 3.67, "learning_rate": 0.0005, "loss": 1.5208, "step": 46800 }, { "epoch": 3.68, "learning_rate": 0.0005, "loss": 1.5088, "step": 46900 }, { "epoch": 3.69, "learning_rate": 0.0005, "loss": 1.5148, "step": 47000 }, { "epoch": 3.69, "learning_rate": 0.0005, "loss": 1.5374, "step": 47100 }, { "epoch": 3.7, "learning_rate": 0.0005, "loss": 1.5115, "step": 47200 }, { "epoch": 3.71, "learning_rate": 0.0005, "loss": 1.5082, "step": 47300 }, { "epoch": 3.72, "learning_rate": 0.0005, "loss": 1.516, "step": 47400 }, { "epoch": 3.72, "learning_rate": 0.0005, "loss": 1.5106, "step": 47500 }, { "epoch": 3.73, "learning_rate": 0.0005, "loss": 1.5326, "step": 47600 }, { "epoch": 3.74, "learning_rate": 0.0005, "loss": 1.5427, "step": 47700 }, { "epoch": 3.75, "learning_rate": 0.0005, "loss": 1.5213, "step": 47800 }, { "epoch": 3.76, "learning_rate": 0.0005, "loss": 1.5051, "step": 47900 }, { "epoch": 3.76, "learning_rate": 0.0005, "loss": 1.5078, "step": 48000 }, { "epoch": 3.77, "learning_rate": 0.0005, "loss": 1.5257, "step": 48100 }, { "epoch": 3.78, "learning_rate": 0.0005, "loss": 1.4985, "step": 48200 }, { "epoch": 3.79, "learning_rate": 0.0005, "loss": 1.5218, "step": 48300 }, { "epoch": 3.8, "learning_rate": 0.0005, "loss": 1.5171, "step": 48400 }, { "epoch": 3.8, "learning_rate": 0.0005, "loss": 1.5148, "step": 48500 }, { "epoch": 3.81, "learning_rate": 0.0005, "loss": 1.4931, "step": 48600 }, { "epoch": 3.82, "learning_rate": 0.0005, "loss": 1.5306, "step": 48700 }, { "epoch": 3.83, "learning_rate": 0.0005, "loss": 1.5006, "step": 48800 }, { "epoch": 3.83, "learning_rate": 0.0005, "loss": 1.5175, "step": 48900 }, { "epoch": 3.84, "learning_rate": 0.0005, "loss": 1.5317, "step": 49000 }, { "epoch": 3.85, "learning_rate": 0.0005, "loss": 1.5371, "step": 49100 }, { "epoch": 3.86, "learning_rate": 0.0005, "loss": 1.5198, "step": 49200 }, { "epoch": 3.87, "learning_rate": 0.0005, "loss": 1.5099, "step": 49300 }, { "epoch": 3.87, "learning_rate": 0.0005, "loss": 1.5223, "step": 49400 }, { "epoch": 3.88, "learning_rate": 0.0005, "loss": 1.5273, "step": 49500 }, { "epoch": 3.89, "learning_rate": 0.0005, "loss": 1.5459, "step": 49600 }, { "epoch": 3.9, "learning_rate": 0.0005, "loss": 1.5136, "step": 49700 }, { "epoch": 3.91, "learning_rate": 0.0005, "loss": 1.5381, "step": 49800 }, { "epoch": 3.91, "learning_rate": 0.0005, "loss": 1.5148, "step": 49900 }, { "epoch": 3.92, "learning_rate": 0.0005, "loss": 1.5429, "step": 50000 }, { "epoch": 3.92, "eval_gen_len": 18.583826255848855, "eval_loss": 2.3042964935302734, "eval_rouge1": 33.7669, "eval_rouge2": 12.7128, "eval_rougeL": 27.641, "eval_rougeLsum": 27.6383, "eval_runtime": 356.4473, "eval_samples_per_second": 31.777, "eval_steps_per_second": 1.986, "step": 50000 }, { "epoch": 3.93, "learning_rate": 0.0005, "loss": 1.5023, "step": 50100 }, { "epoch": 3.94, "learning_rate": 0.0005, "loss": 1.537, "step": 50200 }, { "epoch": 3.94, "learning_rate": 0.0005, "loss": 1.5346, "step": 50300 }, { "epoch": 3.95, "learning_rate": 0.0005, "loss": 1.5038, "step": 50400 }, { "epoch": 3.96, "learning_rate": 0.0005, "loss": 1.5142, "step": 50500 }, { "epoch": 3.97, "learning_rate": 0.0005, "loss": 1.5001, "step": 50600 }, { "epoch": 3.98, "learning_rate": 0.0005, "loss": 1.515, "step": 50700 }, { "epoch": 3.98, "learning_rate": 0.0005, "loss": 1.5021, "step": 50800 }, { "epoch": 3.99, "learning_rate": 0.0005, "loss": 1.5119, "step": 50900 }, { "epoch": 4.0, "learning_rate": 0.0005, "loss": 1.5179, "step": 51000 }, { "epoch": 4.01, "learning_rate": 0.0005, "loss": 1.4309, "step": 51100 }, { "epoch": 4.02, "learning_rate": 0.0005, "loss": 1.4105, "step": 51200 }, { "epoch": 4.02, "learning_rate": 0.0005, "loss": 1.4356, "step": 51300 }, { "epoch": 4.03, "learning_rate": 0.0005, "loss": 1.4311, "step": 51400 }, { "epoch": 4.04, "learning_rate": 0.0005, "loss": 1.4365, "step": 51500 }, { "epoch": 4.05, "learning_rate": 0.0005, "loss": 1.4153, "step": 51600 }, { "epoch": 4.05, "learning_rate": 0.0005, "loss": 1.4293, "step": 51700 }, { "epoch": 4.06, "learning_rate": 0.0005, "loss": 1.4669, "step": 51800 }, { "epoch": 4.07, "learning_rate": 0.0005, "loss": 1.437, "step": 51900 }, { "epoch": 4.08, "learning_rate": 0.0005, "loss": 1.4437, "step": 52000 }, { "epoch": 4.09, "learning_rate": 0.0005, "loss": 1.4431, "step": 52100 }, { "epoch": 4.09, "learning_rate": 0.0005, "loss": 1.4264, "step": 52200 }, { "epoch": 4.1, "learning_rate": 0.0005, "loss": 1.4178, "step": 52300 }, { "epoch": 4.11, "learning_rate": 0.0005, "loss": 1.4409, "step": 52400 }, { "epoch": 4.12, "learning_rate": 0.0005, "loss": 1.4422, "step": 52500 }, { "epoch": 4.12, "learning_rate": 0.0005, "loss": 1.4344, "step": 52600 }, { "epoch": 4.13, "learning_rate": 0.0005, "loss": 1.4418, "step": 52700 }, { "epoch": 4.14, "learning_rate": 0.0005, "loss": 1.4359, "step": 52800 }, { "epoch": 4.15, "learning_rate": 0.0005, "loss": 1.4548, "step": 52900 }, { "epoch": 4.16, "learning_rate": 0.0005, "loss": 1.4474, "step": 53000 }, { "epoch": 4.16, "learning_rate": 0.0005, "loss": 1.4544, "step": 53100 }, { "epoch": 4.17, "learning_rate": 0.0005, "loss": 1.4201, "step": 53200 }, { "epoch": 4.18, "learning_rate": 0.0005, "loss": 1.4328, "step": 53300 }, { "epoch": 4.19, "learning_rate": 0.0005, "loss": 1.4372, "step": 53400 }, { "epoch": 4.2, "learning_rate": 0.0005, "loss": 1.42, "step": 53500 }, { "epoch": 4.2, "learning_rate": 0.0005, "loss": 1.4338, "step": 53600 }, { "epoch": 4.21, "learning_rate": 0.0005, "loss": 1.4679, "step": 53700 }, { "epoch": 4.22, "learning_rate": 0.0005, "loss": 1.4378, "step": 53800 }, { "epoch": 4.23, "learning_rate": 0.0005, "loss": 1.4466, "step": 53900 }, { "epoch": 4.23, "learning_rate": 0.0005, "loss": 1.4482, "step": 54000 }, { "epoch": 4.24, "learning_rate": 0.0005, "loss": 1.4473, "step": 54100 }, { "epoch": 4.25, "learning_rate": 0.0005, "loss": 1.4631, "step": 54200 }, { "epoch": 4.26, "learning_rate": 0.0005, "loss": 1.4578, "step": 54300 }, { "epoch": 4.27, "learning_rate": 0.0005, "loss": 1.4458, "step": 54400 }, { "epoch": 4.27, "learning_rate": 0.0005, "loss": 1.4371, "step": 54500 }, { "epoch": 4.28, "learning_rate": 0.0005, "loss": 1.439, "step": 54600 }, { "epoch": 4.29, "learning_rate": 0.0005, "loss": 1.4404, "step": 54700 }, { "epoch": 4.3, "learning_rate": 0.0005, "loss": 1.4423, "step": 54800 }, { "epoch": 4.31, "learning_rate": 0.0005, "loss": 1.4443, "step": 54900 }, { "epoch": 4.31, "learning_rate": 0.0005, "loss": 1.4579, "step": 55000 }, { "epoch": 4.32, "learning_rate": 0.0005, "loss": 1.4229, "step": 55100 }, { "epoch": 4.33, "learning_rate": 0.0005, "loss": 1.4595, "step": 55200 }, { "epoch": 4.34, "learning_rate": 0.0005, "loss": 1.4533, "step": 55300 }, { "epoch": 4.34, "learning_rate": 0.0005, "loss": 1.4332, "step": 55400 }, { "epoch": 4.35, "learning_rate": 0.0005, "loss": 1.4315, "step": 55500 }, { "epoch": 4.36, "learning_rate": 0.0005, "loss": 1.4342, "step": 55600 }, { "epoch": 4.37, "learning_rate": 0.0005, "loss": 1.4532, "step": 55700 }, { "epoch": 4.38, "learning_rate": 0.0005, "loss": 1.4271, "step": 55800 }, { "epoch": 4.38, "learning_rate": 0.0005, "loss": 1.4403, "step": 55900 }, { "epoch": 4.39, "learning_rate": 0.0005, "loss": 1.4561, "step": 56000 }, { "epoch": 4.4, "learning_rate": 0.0005, "loss": 1.45, "step": 56100 }, { "epoch": 4.41, "learning_rate": 0.0005, "loss": 1.4653, "step": 56200 }, { "epoch": 4.41, "learning_rate": 0.0005, "loss": 1.4416, "step": 56300 }, { "epoch": 4.42, "learning_rate": 0.0005, "loss": 1.4449, "step": 56400 }, { "epoch": 4.43, "learning_rate": 0.0005, "loss": 1.4261, "step": 56500 }, { "epoch": 4.44, "learning_rate": 0.0005, "loss": 1.4527, "step": 56600 }, { "epoch": 4.45, "learning_rate": 0.0005, "loss": 1.4751, "step": 56700 }, { "epoch": 4.45, "learning_rate": 0.0005, "loss": 1.4417, "step": 56800 }, { "epoch": 4.46, "learning_rate": 0.0005, "loss": 1.4775, "step": 56900 }, { "epoch": 4.47, "learning_rate": 0.0005, "loss": 1.4505, "step": 57000 }, { "epoch": 4.48, "learning_rate": 0.0005, "loss": 1.4606, "step": 57100 }, { "epoch": 4.49, "learning_rate": 0.0005, "loss": 1.4669, "step": 57200 }, { "epoch": 4.49, "learning_rate": 0.0005, "loss": 1.4589, "step": 57300 }, { "epoch": 4.5, "learning_rate": 0.0005, "loss": 1.4662, "step": 57400 }, { "epoch": 4.51, "learning_rate": 0.0005, "loss": 1.4423, "step": 57500 }, { "epoch": 4.52, "learning_rate": 0.0005, "loss": 1.4523, "step": 57600 }, { "epoch": 4.52, "learning_rate": 0.0005, "loss": 1.462, "step": 57700 }, { "epoch": 4.53, "learning_rate": 0.0005, "loss": 1.4748, "step": 57800 }, { "epoch": 4.54, "learning_rate": 0.0005, "loss": 1.4717, "step": 57900 }, { "epoch": 4.55, "learning_rate": 0.0005, "loss": 1.4641, "step": 58000 }, { "epoch": 4.56, "learning_rate": 0.0005, "loss": 1.4514, "step": 58100 }, { "epoch": 4.56, "learning_rate": 0.0005, "loss": 1.4559, "step": 58200 }, { "epoch": 4.57, "learning_rate": 0.0005, "loss": 1.4707, "step": 58300 }, { "epoch": 4.58, "learning_rate": 0.0005, "loss": 1.4649, "step": 58400 }, { "epoch": 4.59, "learning_rate": 0.0005, "loss": 1.4394, "step": 58500 }, { "epoch": 4.6, "learning_rate": 0.0005, "loss": 1.4809, "step": 58600 }, { "epoch": 4.6, "learning_rate": 0.0005, "loss": 1.4622, "step": 58700 }, { "epoch": 4.61, "learning_rate": 0.0005, "loss": 1.479, "step": 58800 }, { "epoch": 4.62, "learning_rate": 0.0005, "loss": 1.4401, "step": 58900 }, { "epoch": 4.63, "learning_rate": 0.0005, "loss": 1.4464, "step": 59000 }, { "epoch": 4.63, "learning_rate": 0.0005, "loss": 1.4539, "step": 59100 }, { "epoch": 4.64, "learning_rate": 0.0005, "loss": 1.4582, "step": 59200 }, { "epoch": 4.65, "learning_rate": 0.0005, "loss": 1.4579, "step": 59300 }, { "epoch": 4.66, "learning_rate": 0.0005, "loss": 1.4641, "step": 59400 }, { "epoch": 4.67, "learning_rate": 0.0005, "loss": 1.4532, "step": 59500 }, { "epoch": 4.67, "learning_rate": 0.0005, "loss": 1.4389, "step": 59600 }, { "epoch": 4.68, "learning_rate": 0.0005, "loss": 1.4542, "step": 59700 }, { "epoch": 4.69, "learning_rate": 0.0005, "loss": 1.473, "step": 59800 }, { "epoch": 4.7, "learning_rate": 0.0005, "loss": 1.4598, "step": 59900 }, { "epoch": 4.71, "learning_rate": 0.0005, "loss": 1.475, "step": 60000 }, { "epoch": 4.71, "eval_gen_len": 18.74971307495365, "eval_loss": 2.3022000789642334, "eval_rouge1": 34.0539, "eval_rouge2": 12.9764, "eval_rougeL": 27.8823, "eval_rougeLsum": 27.8695, "eval_runtime": 353.7063, "eval_samples_per_second": 32.024, "eval_steps_per_second": 2.002, "step": 60000 }, { "epoch": 4.71, "learning_rate": 0.0005, "loss": 1.4611, "step": 60100 }, { "epoch": 4.72, "learning_rate": 0.0005, "loss": 1.4512, "step": 60200 }, { "epoch": 4.73, "learning_rate": 0.0005, "loss": 1.4714, "step": 60300 }, { "epoch": 4.74, "learning_rate": 0.0005, "loss": 1.4443, "step": 60400 }, { "epoch": 4.74, "learning_rate": 0.0005, "loss": 1.4727, "step": 60500 }, { "epoch": 4.75, "learning_rate": 0.0005, "loss": 1.4707, "step": 60600 }, { "epoch": 4.76, "learning_rate": 0.0005, "loss": 1.4508, "step": 60700 }, { "epoch": 4.77, "learning_rate": 0.0005, "loss": 1.4455, "step": 60800 }, { "epoch": 4.78, "learning_rate": 0.0005, "loss": 1.4334, "step": 60900 }, { "epoch": 4.78, "learning_rate": 0.0005, "loss": 1.4428, "step": 61000 }, { "epoch": 4.79, "learning_rate": 0.0005, "loss": 1.4751, "step": 61100 }, { "epoch": 4.8, "learning_rate": 0.0005, "loss": 1.4547, "step": 61200 }, { "epoch": 4.81, "learning_rate": 0.0005, "loss": 1.4461, "step": 61300 }, { "epoch": 4.81, "learning_rate": 0.0005, "loss": 1.4598, "step": 61400 }, { "epoch": 4.82, "learning_rate": 0.0005, "loss": 1.4487, "step": 61500 }, { "epoch": 4.83, "learning_rate": 0.0005, "loss": 1.4386, "step": 61600 }, { "epoch": 4.84, "learning_rate": 0.0005, "loss": 1.4465, "step": 61700 }, { "epoch": 4.85, "learning_rate": 0.0005, "loss": 1.4423, "step": 61800 }, { "epoch": 4.85, "learning_rate": 0.0005, "loss": 1.4615, "step": 61900 }, { "epoch": 4.86, "learning_rate": 0.0005, "loss": 1.4242, "step": 62000 }, { "epoch": 4.87, "learning_rate": 0.0005, "loss": 1.4547, "step": 62100 }, { "epoch": 4.88, "learning_rate": 0.0005, "loss": 1.4248, "step": 62200 }, { "epoch": 4.89, "learning_rate": 0.0005, "loss": 1.4664, "step": 62300 }, { "epoch": 4.89, "learning_rate": 0.0005, "loss": 1.4831, "step": 62400 }, { "epoch": 4.9, "learning_rate": 0.0005, "loss": 1.4716, "step": 62500 }, { "epoch": 4.91, "learning_rate": 0.0005, "loss": 1.482, "step": 62600 }, { "epoch": 4.92, "learning_rate": 0.0005, "loss": 1.4642, "step": 62700 }, { "epoch": 4.92, "learning_rate": 0.0005, "loss": 1.4536, "step": 62800 }, { "epoch": 4.93, "learning_rate": 0.0005, "loss": 1.4631, "step": 62900 }, { "epoch": 4.94, "learning_rate": 0.0005, "loss": 1.4642, "step": 63000 }, { "epoch": 4.95, "learning_rate": 0.0005, "loss": 1.4832, "step": 63100 }, { "epoch": 4.96, "learning_rate": 0.0005, "loss": 1.4393, "step": 63200 }, { "epoch": 4.96, "learning_rate": 0.0005, "loss": 1.4678, "step": 63300 }, { "epoch": 4.97, "learning_rate": 0.0005, "loss": 1.4476, "step": 63400 }, { "epoch": 4.98, "learning_rate": 0.0005, "loss": 1.4689, "step": 63500 }, { "epoch": 4.99, "learning_rate": 0.0005, "loss": 1.4466, "step": 63600 }, { "epoch": 5.0, "learning_rate": 0.0005, "loss": 1.4605, "step": 63700 }, { "epoch": 5.0, "learning_rate": 0.0005, "loss": 1.431, "step": 63800 }, { "epoch": 5.01, "learning_rate": 0.0005, "loss": 1.3646, "step": 63900 }, { "epoch": 5.02, "learning_rate": 0.0005, "loss": 1.376, "step": 64000 }, { "epoch": 5.03, "learning_rate": 0.0005, "loss": 1.3577, "step": 64100 }, { "epoch": 5.03, "learning_rate": 0.0005, "loss": 1.3447, "step": 64200 }, { "epoch": 5.04, "learning_rate": 0.0005, "loss": 1.3696, "step": 64300 }, { "epoch": 5.05, "learning_rate": 0.0005, "loss": 1.4079, "step": 64400 }, { "epoch": 5.06, "learning_rate": 0.0005, "loss": 1.367, "step": 64500 }, { "epoch": 5.07, "learning_rate": 0.0005, "loss": 1.3771, "step": 64600 }, { "epoch": 5.07, "learning_rate": 0.0005, "loss": 1.4109, "step": 64700 }, { "epoch": 5.08, "learning_rate": 0.0005, "loss": 1.38, "step": 64800 }, { "epoch": 5.09, "learning_rate": 0.0005, "loss": 1.3861, "step": 64900 }, { "epoch": 5.1, "learning_rate": 0.0005, "loss": 1.3871, "step": 65000 }, { "epoch": 5.11, "learning_rate": 0.0005, "loss": 1.3557, "step": 65100 }, { "epoch": 5.11, "learning_rate": 0.0005, "loss": 1.3918, "step": 65200 }, { "epoch": 5.12, "learning_rate": 0.0005, "loss": 1.3841, "step": 65300 }, { "epoch": 5.13, "learning_rate": 0.0005, "loss": 1.4053, "step": 65400 }, { "epoch": 5.14, "learning_rate": 0.0005, "loss": 1.3789, "step": 65500 }, { "epoch": 5.14, "learning_rate": 0.0005, "loss": 1.3924, "step": 65600 }, { "epoch": 5.15, "learning_rate": 0.0005, "loss": 1.3657, "step": 65700 }, { "epoch": 5.16, "learning_rate": 0.0005, "loss": 1.3801, "step": 65800 }, { "epoch": 5.17, "learning_rate": 0.0005, "loss": 1.3889, "step": 65900 }, { "epoch": 5.18, "learning_rate": 0.0005, "loss": 1.3745, "step": 66000 }, { "epoch": 5.18, "learning_rate": 0.0005, "loss": 1.3834, "step": 66100 }, { "epoch": 5.19, "learning_rate": 0.0005, "loss": 1.3891, "step": 66200 }, { "epoch": 5.2, "learning_rate": 0.0005, "loss": 1.3836, "step": 66300 }, { "epoch": 5.21, "learning_rate": 0.0005, "loss": 1.3732, "step": 66400 }, { "epoch": 5.21, "learning_rate": 0.0005, "loss": 1.3928, "step": 66500 }, { "epoch": 5.22, "learning_rate": 0.0005, "loss": 1.3993, "step": 66600 }, { "epoch": 5.23, "learning_rate": 0.0005, "loss": 1.3795, "step": 66700 }, { "epoch": 5.24, "learning_rate": 0.0005, "loss": 1.4197, "step": 66800 }, { "epoch": 5.25, "learning_rate": 0.0005, "loss": 1.3677, "step": 66900 }, { "epoch": 5.25, "learning_rate": 0.0005, "loss": 1.3964, "step": 67000 }, { "epoch": 5.26, "learning_rate": 0.0005, "loss": 1.3717, "step": 67100 }, { "epoch": 5.27, "learning_rate": 0.0005, "loss": 1.3891, "step": 67200 }, { "epoch": 5.28, "learning_rate": 0.0005, "loss": 1.4038, "step": 67300 }, { "epoch": 5.29, "learning_rate": 0.0005, "loss": 1.3955, "step": 67400 }, { "epoch": 5.29, "learning_rate": 0.0005, "loss": 1.3901, "step": 67500 }, { "epoch": 5.3, "learning_rate": 0.0005, "loss": 1.389, "step": 67600 }, { "epoch": 5.31, "learning_rate": 0.0005, "loss": 1.4091, "step": 67700 }, { "epoch": 5.32, "learning_rate": 0.0005, "loss": 1.3881, "step": 67800 }, { "epoch": 5.32, "learning_rate": 0.0005, "loss": 1.4026, "step": 67900 }, { "epoch": 5.33, "learning_rate": 0.0005, "loss": 1.3844, "step": 68000 }, { "epoch": 5.34, "learning_rate": 0.0005, "loss": 1.3687, "step": 68100 }, { "epoch": 5.35, "learning_rate": 0.0005, "loss": 1.379, "step": 68200 }, { "epoch": 5.36, "learning_rate": 0.0005, "loss": 1.3795, "step": 68300 }, { "epoch": 5.36, "learning_rate": 0.0005, "loss": 1.4076, "step": 68400 }, { "epoch": 5.37, "learning_rate": 0.0005, "loss": 1.3999, "step": 68500 }, { "epoch": 5.38, "learning_rate": 0.0005, "loss": 1.3924, "step": 68600 }, { "epoch": 5.39, "learning_rate": 0.0005, "loss": 1.4324, "step": 68700 }, { "epoch": 5.4, "learning_rate": 0.0005, "loss": 1.406, "step": 68800 }, { "epoch": 5.4, "learning_rate": 0.0005, "loss": 1.404, "step": 68900 }, { "epoch": 5.41, "learning_rate": 0.0005, "loss": 1.4119, "step": 69000 }, { "epoch": 5.42, "learning_rate": 0.0005, "loss": 1.4178, "step": 69100 }, { "epoch": 5.43, "learning_rate": 0.0005, "loss": 1.4095, "step": 69200 }, { "epoch": 5.43, "learning_rate": 0.0005, "loss": 1.3986, "step": 69300 }, { "epoch": 5.44, "learning_rate": 0.0005, "loss": 1.3808, "step": 69400 }, { "epoch": 5.45, "learning_rate": 0.0005, "loss": 1.3877, "step": 69500 }, { "epoch": 5.46, "learning_rate": 0.0005, "loss": 1.4135, "step": 69600 }, { "epoch": 5.47, "learning_rate": 0.0005, "loss": 1.3957, "step": 69700 }, { "epoch": 5.47, "learning_rate": 0.0005, "loss": 1.4108, "step": 69800 }, { "epoch": 5.48, "learning_rate": 0.0005, "loss": 1.4068, "step": 69900 }, { "epoch": 5.49, "learning_rate": 0.0005, "loss": 1.3804, "step": 70000 }, { "epoch": 5.49, "eval_gen_len": 18.74026661958153, "eval_loss": 2.3253302574157715, "eval_rouge1": 34.1086, "eval_rouge2": 13.1195, "eval_rougeL": 28.0176, "eval_rougeLsum": 28.0186, "eval_runtime": 348.2658, "eval_samples_per_second": 32.524, "eval_steps_per_second": 2.033, "step": 70000 }, { "epoch": 5.5, "learning_rate": 0.0005, "loss": 1.3911, "step": 70100 }, { "epoch": 5.51, "learning_rate": 0.0005, "loss": 1.4149, "step": 70200 }, { "epoch": 5.51, "learning_rate": 0.0005, "loss": 1.3961, "step": 70300 }, { "epoch": 5.52, "learning_rate": 0.0005, "loss": 1.3899, "step": 70400 }, { "epoch": 5.53, "learning_rate": 0.0005, "loss": 1.4053, "step": 70500 }, { "epoch": 5.54, "learning_rate": 0.0005, "loss": 1.4002, "step": 70600 }, { "epoch": 5.54, "learning_rate": 0.0005, "loss": 1.4142, "step": 70700 }, { "epoch": 5.55, "learning_rate": 0.0005, "loss": 1.4121, "step": 70800 }, { "epoch": 5.56, "learning_rate": 0.0005, "loss": 1.4094, "step": 70900 }, { "epoch": 5.57, "learning_rate": 0.0005, "loss": 1.3936, "step": 71000 }, { "epoch": 5.58, "learning_rate": 0.0005, "loss": 1.4091, "step": 71100 }, { "epoch": 5.58, "learning_rate": 0.0005, "loss": 1.4007, "step": 71200 }, { "epoch": 5.59, "learning_rate": 0.0005, "loss": 1.4159, "step": 71300 }, { "epoch": 5.6, "learning_rate": 0.0005, "loss": 1.3911, "step": 71400 }, { "epoch": 5.61, "learning_rate": 0.0005, "loss": 1.4275, "step": 71500 }, { "epoch": 5.61, "learning_rate": 0.0005, "loss": 1.4008, "step": 71600 }, { "epoch": 5.62, "learning_rate": 0.0005, "loss": 1.4069, "step": 71700 }, { "epoch": 5.63, "learning_rate": 0.0005, "loss": 1.3904, "step": 71800 }, { "epoch": 5.64, "learning_rate": 0.0005, "loss": 1.4138, "step": 71900 }, { "epoch": 5.65, "learning_rate": 0.0005, "loss": 1.3912, "step": 72000 }, { "epoch": 5.65, "learning_rate": 0.0005, "loss": 1.3971, "step": 72100 }, { "epoch": 5.66, "learning_rate": 0.0005, "loss": 1.4153, "step": 72200 }, { "epoch": 5.67, "learning_rate": 0.0005, "loss": 1.3954, "step": 72300 }, { "epoch": 5.68, "learning_rate": 0.0005, "loss": 1.3777, "step": 72400 }, { "epoch": 5.69, "learning_rate": 0.0005, "loss": 1.4101, "step": 72500 }, { "epoch": 5.69, "learning_rate": 0.0005, "loss": 1.4203, "step": 72600 }, { "epoch": 5.7, "learning_rate": 0.0005, "loss": 1.4302, "step": 72700 }, { "epoch": 5.71, "learning_rate": 0.0005, "loss": 1.3765, "step": 72800 }, { "epoch": 5.72, "learning_rate": 0.0005, "loss": 1.3977, "step": 72900 }, { "epoch": 5.72, "learning_rate": 0.0005, "loss": 1.3909, "step": 73000 }, { "epoch": 5.73, "learning_rate": 0.0005, "loss": 1.3792, "step": 73100 }, { "epoch": 5.74, "learning_rate": 0.0005, "loss": 1.4065, "step": 73200 }, { "epoch": 5.75, "learning_rate": 0.0005, "loss": 1.3779, "step": 73300 }, { "epoch": 5.76, "learning_rate": 0.0005, "loss": 1.4049, "step": 73400 }, { "epoch": 5.76, "learning_rate": 0.0005, "loss": 1.4121, "step": 73500 }, { "epoch": 5.77, "learning_rate": 0.0005, "loss": 1.4046, "step": 73600 }, { "epoch": 5.78, "learning_rate": 0.0005, "loss": 1.4133, "step": 73700 }, { "epoch": 5.79, "learning_rate": 0.0005, "loss": 1.4056, "step": 73800 }, { "epoch": 5.8, "learning_rate": 0.0005, "loss": 1.4131, "step": 73900 }, { "epoch": 5.8, "learning_rate": 0.0005, "loss": 1.4183, "step": 74000 }, { "epoch": 5.81, "learning_rate": 0.0005, "loss": 1.39, "step": 74100 }, { "epoch": 5.82, "learning_rate": 0.0005, "loss": 1.3884, "step": 74200 }, { "epoch": 5.83, "learning_rate": 0.0005, "loss": 1.3922, "step": 74300 }, { "epoch": 5.83, "learning_rate": 0.0005, "loss": 1.3974, "step": 74400 }, { "epoch": 5.84, "learning_rate": 0.0005, "loss": 1.3898, "step": 74500 }, { "epoch": 5.85, "learning_rate": 0.0005, "loss": 1.422, "step": 74600 }, { "epoch": 5.86, "learning_rate": 0.0005, "loss": 1.4073, "step": 74700 }, { "epoch": 5.87, "learning_rate": 0.0005, "loss": 1.4182, "step": 74800 }, { "epoch": 5.87, "learning_rate": 0.0005, "loss": 1.404, "step": 74900 }, { "epoch": 5.88, "learning_rate": 0.0005, "loss": 1.404, "step": 75000 }, { "epoch": 5.89, "learning_rate": 0.0005, "loss": 1.417, "step": 75100 }, { "epoch": 5.9, "learning_rate": 0.0005, "loss": 1.3935, "step": 75200 }, { "epoch": 5.9, "learning_rate": 0.0005, "loss": 1.4128, "step": 75300 }, { "epoch": 5.91, "learning_rate": 0.0005, "loss": 1.4266, "step": 75400 }, { "epoch": 5.92, "learning_rate": 0.0005, "loss": 1.4013, "step": 75500 }, { "epoch": 5.93, "learning_rate": 0.0005, "loss": 1.4308, "step": 75600 }, { "epoch": 5.94, "learning_rate": 0.0005, "loss": 1.3954, "step": 75700 }, { "epoch": 5.94, "learning_rate": 0.0005, "loss": 1.4102, "step": 75800 }, { "epoch": 5.95, "learning_rate": 0.0005, "loss": 1.4205, "step": 75900 }, { "epoch": 5.96, "learning_rate": 0.0005, "loss": 1.4083, "step": 76000 }, { "epoch": 5.97, "learning_rate": 0.0005, "loss": 1.4017, "step": 76100 }, { "epoch": 5.98, "learning_rate": 0.0005, "loss": 1.4201, "step": 76200 }, { "epoch": 5.98, "learning_rate": 0.0005, "loss": 1.4072, "step": 76300 }, { "epoch": 5.99, "learning_rate": 0.0005, "loss": 1.4121, "step": 76400 }, { "epoch": 6.0, "learning_rate": 0.0005, "loss": 1.3649, "step": 76500 }, { "epoch": 6.01, "learning_rate": 0.0005, "loss": 1.3496, "step": 76600 }, { "epoch": 6.01, "learning_rate": 0.0005, "loss": 1.3088, "step": 76700 }, { "epoch": 6.02, "learning_rate": 0.0005, "loss": 1.3273, "step": 76800 }, { "epoch": 6.03, "learning_rate": 0.0005, "loss": 1.3207, "step": 76900 }, { "epoch": 6.04, "learning_rate": 0.0005, "loss": 1.3231, "step": 77000 }, { "epoch": 6.05, "learning_rate": 0.0005, "loss": 1.3061, "step": 77100 }, { "epoch": 6.05, "learning_rate": 0.0005, "loss": 1.3236, "step": 77200 }, { "epoch": 6.06, "learning_rate": 0.0005, "loss": 1.3353, "step": 77300 }, { "epoch": 6.07, "learning_rate": 0.0005, "loss": 1.3088, "step": 77400 }, { "epoch": 6.08, "learning_rate": 0.0005, "loss": 1.3163, "step": 77500 }, { "epoch": 6.09, "learning_rate": 0.0005, "loss": 1.3123, "step": 77600 }, { "epoch": 6.09, "learning_rate": 0.0005, "loss": 1.3145, "step": 77700 }, { "epoch": 6.1, "learning_rate": 0.0005, "loss": 1.3152, "step": 77800 }, { "epoch": 6.11, "learning_rate": 0.0005, "loss": 1.3271, "step": 77900 }, { "epoch": 6.12, "learning_rate": 0.0005, "loss": 1.3517, "step": 78000 }, { "epoch": 6.12, "learning_rate": 0.0005, "loss": 1.3451, "step": 78100 }, { "epoch": 6.13, "learning_rate": 0.0005, "loss": 1.3471, "step": 78200 }, { "epoch": 6.14, "learning_rate": 0.0005, "loss": 1.3285, "step": 78300 }, { "epoch": 6.15, "learning_rate": 0.0005, "loss": 1.3541, "step": 78400 }, { "epoch": 6.16, "learning_rate": 0.0005, "loss": 1.3361, "step": 78500 }, { "epoch": 6.16, "learning_rate": 0.0005, "loss": 1.3424, "step": 78600 }, { "epoch": 6.17, "learning_rate": 0.0005, "loss": 1.3402, "step": 78700 }, { "epoch": 6.18, "learning_rate": 0.0005, "loss": 1.3305, "step": 78800 }, { "epoch": 6.19, "learning_rate": 0.0005, "loss": 1.3223, "step": 78900 }, { "epoch": 6.2, "learning_rate": 0.0005, "loss": 1.3367, "step": 79000 }, { "epoch": 6.2, "learning_rate": 0.0005, "loss": 1.3242, "step": 79100 }, { "epoch": 6.21, "learning_rate": 0.0005, "loss": 1.3428, "step": 79200 }, { "epoch": 6.22, "learning_rate": 0.0005, "loss": 1.3484, "step": 79300 }, { "epoch": 6.23, "learning_rate": 0.0005, "loss": 1.3515, "step": 79400 }, { "epoch": 6.23, "learning_rate": 0.0005, "loss": 1.3462, "step": 79500 }, { "epoch": 6.24, "learning_rate": 0.0005, "loss": 1.3391, "step": 79600 }, { "epoch": 6.25, "learning_rate": 0.0005, "loss": 1.3436, "step": 79700 }, { "epoch": 6.26, "learning_rate": 0.0005, "loss": 1.3496, "step": 79800 }, { "epoch": 6.27, "learning_rate": 0.0005, "loss": 1.3447, "step": 79900 }, { "epoch": 6.27, "learning_rate": 0.0005, "loss": 1.3329, "step": 80000 }, { "epoch": 6.27, "eval_gen_len": 18.70927871457579, "eval_loss": 2.332681655883789, "eval_rouge1": 34.2256, "eval_rouge2": 13.2076, "eval_rougeL": 28.0369, "eval_rougeLsum": 28.0357, "eval_runtime": 344.7634, "eval_samples_per_second": 32.854, "eval_steps_per_second": 2.054, "step": 80000 }, { "epoch": 6.28, "learning_rate": 0.0005, "loss": 1.351, "step": 80100 }, { "epoch": 6.29, "learning_rate": 0.0005, "loss": 1.3381, "step": 80200 }, { "epoch": 6.3, "learning_rate": 0.0005, "loss": 1.327, "step": 80300 }, { "epoch": 6.3, "learning_rate": 0.0005, "loss": 1.3345, "step": 80400 }, { "epoch": 6.31, "learning_rate": 0.0005, "loss": 1.3395, "step": 80500 }, { "epoch": 6.32, "learning_rate": 0.0005, "loss": 1.3643, "step": 80600 }, { "epoch": 6.33, "learning_rate": 0.0005, "loss": 1.3277, "step": 80700 }, { "epoch": 6.34, "learning_rate": 0.0005, "loss": 1.3444, "step": 80800 }, { "epoch": 6.34, "learning_rate": 0.0005, "loss": 1.3393, "step": 80900 }, { "epoch": 6.35, "learning_rate": 0.0005, "loss": 1.3726, "step": 81000 }, { "epoch": 6.36, "learning_rate": 0.0005, "loss": 1.3172, "step": 81100 }, { "epoch": 6.37, "learning_rate": 0.0005, "loss": 1.3653, "step": 81200 }, { "epoch": 6.38, "learning_rate": 0.0005, "loss": 1.3827, "step": 81300 }, { "epoch": 6.38, "learning_rate": 0.0005, "loss": 1.3105, "step": 81400 }, { "epoch": 6.39, "learning_rate": 0.0005, "loss": 1.3732, "step": 81500 }, { "epoch": 6.4, "learning_rate": 0.0005, "loss": 1.3452, "step": 81600 }, { "epoch": 6.41, "learning_rate": 0.0005, "loss": 1.3516, "step": 81700 }, { "epoch": 6.41, "learning_rate": 0.0005, "loss": 1.3753, "step": 81800 }, { "epoch": 6.42, "learning_rate": 0.0005, "loss": 1.3545, "step": 81900 }, { "epoch": 6.43, "learning_rate": 0.0005, "loss": 1.3357, "step": 82000 }, { "epoch": 6.44, "learning_rate": 0.0005, "loss": 1.3628, "step": 82100 }, { "epoch": 6.45, "learning_rate": 0.0005, "loss": 1.351, "step": 82200 }, { "epoch": 6.45, "learning_rate": 0.0005, "loss": 1.3438, "step": 82300 }, { "epoch": 6.46, "learning_rate": 0.0005, "loss": 1.3635, "step": 82400 }, { "epoch": 6.47, "learning_rate": 0.0005, "loss": 1.3506, "step": 82500 }, { "epoch": 6.48, "learning_rate": 0.0005, "loss": 1.3611, "step": 82600 }, { "epoch": 6.49, "learning_rate": 0.0005, "loss": 1.3467, "step": 82700 }, { "epoch": 6.49, "learning_rate": 0.0005, "loss": 1.3528, "step": 82800 }, { "epoch": 6.5, "learning_rate": 0.0005, "loss": 1.3513, "step": 82900 }, { "epoch": 6.51, "learning_rate": 0.0005, "loss": 1.3372, "step": 83000 }, { "epoch": 6.52, "learning_rate": 0.0005, "loss": 1.3724, "step": 83100 }, { "epoch": 6.52, "learning_rate": 0.0005, "loss": 1.3554, "step": 83200 }, { "epoch": 6.53, "learning_rate": 0.0005, "loss": 1.3711, "step": 83300 }, { "epoch": 6.54, "learning_rate": 0.0005, "loss": 1.3534, "step": 83400 }, { "epoch": 6.55, "learning_rate": 0.0005, "loss": 1.3736, "step": 83500 }, { "epoch": 6.56, "learning_rate": 0.0005, "loss": 1.3265, "step": 83600 }, { "epoch": 6.56, "learning_rate": 0.0005, "loss": 1.3467, "step": 83700 }, { "epoch": 6.57, "learning_rate": 0.0005, "loss": 1.3601, "step": 83800 }, { "epoch": 6.58, "learning_rate": 0.0005, "loss": 1.3549, "step": 83900 }, { "epoch": 6.59, "learning_rate": 0.0005, "loss": 1.3584, "step": 84000 }, { "epoch": 6.6, "learning_rate": 0.0005, "loss": 1.3577, "step": 84100 }, { "epoch": 6.6, "learning_rate": 0.0005, "loss": 1.3551, "step": 84200 }, { "epoch": 6.61, "learning_rate": 0.0005, "loss": 1.3676, "step": 84300 }, { "epoch": 6.62, "learning_rate": 0.0005, "loss": 1.3669, "step": 84400 }, { "epoch": 6.63, "learning_rate": 0.0005, "loss": 1.3832, "step": 84500 }, { "epoch": 6.63, "learning_rate": 0.0005, "loss": 1.3649, "step": 84600 }, { "epoch": 6.64, "learning_rate": 0.0005, "loss": 1.3812, "step": 84700 }, { "epoch": 6.65, "learning_rate": 0.0005, "loss": 1.3602, "step": 84800 }, { "epoch": 6.66, "learning_rate": 0.0005, "loss": 1.3428, "step": 84900 }, { "epoch": 6.67, "learning_rate": 0.0005, "loss": 1.3502, "step": 85000 }, { "epoch": 6.67, "learning_rate": 0.0005, "loss": 1.354, "step": 85100 }, { "epoch": 6.68, "learning_rate": 0.0005, "loss": 1.3682, "step": 85200 }, { "epoch": 6.69, "learning_rate": 0.0005, "loss": 1.3206, "step": 85300 }, { "epoch": 6.7, "learning_rate": 0.0005, "loss": 1.3594, "step": 85400 }, { "epoch": 6.7, "learning_rate": 0.0005, "loss": 1.3724, "step": 85500 }, { "epoch": 6.71, "learning_rate": 0.0005, "loss": 1.3418, "step": 85600 }, { "epoch": 6.72, "learning_rate": 0.0005, "loss": 1.3721, "step": 85700 }, { "epoch": 6.73, "learning_rate": 0.0005, "loss": 1.3535, "step": 85800 }, { "epoch": 6.74, "learning_rate": 0.0005, "loss": 1.3513, "step": 85900 }, { "epoch": 6.74, "learning_rate": 0.0005, "loss": 1.3545, "step": 86000 }, { "epoch": 6.75, "learning_rate": 0.0005, "loss": 1.3537, "step": 86100 }, { "epoch": 6.76, "learning_rate": 0.0005, "loss": 1.3609, "step": 86200 }, { "epoch": 6.77, "learning_rate": 0.0005, "loss": 1.3591, "step": 86300 }, { "epoch": 6.78, "learning_rate": 0.0005, "loss": 1.3687, "step": 86400 }, { "epoch": 6.78, "learning_rate": 0.0005, "loss": 1.3531, "step": 86500 }, { "epoch": 6.79, "learning_rate": 0.0005, "loss": 1.3581, "step": 86600 }, { "epoch": 6.8, "learning_rate": 0.0005, "loss": 1.3692, "step": 86700 }, { "epoch": 6.81, "learning_rate": 0.0005, "loss": 1.3517, "step": 86800 }, { "epoch": 6.81, "learning_rate": 0.0005, "loss": 1.3695, "step": 86900 }, { "epoch": 6.82, "learning_rate": 0.0005, "loss": 1.3506, "step": 87000 }, { "epoch": 6.83, "learning_rate": 0.0005, "loss": 1.3671, "step": 87100 }, { "epoch": 6.84, "learning_rate": 0.0005, "loss": 1.3768, "step": 87200 }, { "epoch": 6.85, "learning_rate": 0.0005, "loss": 1.3591, "step": 87300 }, { "epoch": 6.85, "learning_rate": 0.0005, "loss": 1.3556, "step": 87400 }, { "epoch": 6.86, "learning_rate": 0.0005, "loss": 1.3318, "step": 87500 }, { "epoch": 6.87, "learning_rate": 0.0005, "loss": 1.3809, "step": 87600 }, { "epoch": 6.88, "learning_rate": 0.0005, "loss": 1.3655, "step": 87700 }, { "epoch": 6.89, "learning_rate": 0.0005, "loss": 1.3515, "step": 87800 }, { "epoch": 6.89, "learning_rate": 0.0005, "loss": 1.3745, "step": 87900 }, { "epoch": 6.9, "learning_rate": 0.0005, "loss": 1.3488, "step": 88000 }, { "epoch": 6.91, "learning_rate": 0.0005, "loss": 1.3714, "step": 88100 }, { "epoch": 6.92, "learning_rate": 0.0005, "loss": 1.3579, "step": 88200 }, { "epoch": 6.92, "learning_rate": 0.0005, "loss": 1.3671, "step": 88300 }, { "epoch": 6.93, "learning_rate": 0.0005, "loss": 1.3617, "step": 88400 }, { "epoch": 6.94, "learning_rate": 0.0005, "loss": 1.3514, "step": 88500 }, { "epoch": 6.95, "learning_rate": 0.0005, "loss": 1.3786, "step": 88600 }, { "epoch": 6.96, "learning_rate": 0.0005, "loss": 1.3782, "step": 88700 }, { "epoch": 6.96, "learning_rate": 0.0005, "loss": 1.3563, "step": 88800 }, { "epoch": 6.97, "learning_rate": 0.0005, "loss": 1.3643, "step": 88900 }, { "epoch": 6.98, "learning_rate": 0.0005, "loss": 1.3903, "step": 89000 }, { "epoch": 6.99, "learning_rate": 0.0005, "loss": 1.3791, "step": 89100 }, { "epoch": 6.99, "learning_rate": 0.0005, "loss": 1.3554, "step": 89200 }, { "epoch": 7.0, "learning_rate": 0.0005, "loss": 1.3172, "step": 89300 }, { "epoch": 7.01, "learning_rate": 0.0005, "loss": 1.2948, "step": 89400 }, { "epoch": 7.02, "learning_rate": 0.0005, "loss": 1.2977, "step": 89500 }, { "epoch": 7.03, "learning_rate": 0.0005, "loss": 1.284, "step": 89600 }, { "epoch": 7.03, "learning_rate": 0.0005, "loss": 1.2795, "step": 89700 }, { "epoch": 7.04, "learning_rate": 0.0005, "loss": 1.268, "step": 89800 }, { "epoch": 7.05, "learning_rate": 0.0005, "loss": 1.2986, "step": 89900 }, { "epoch": 7.06, "learning_rate": 0.0005, "loss": 1.2728, "step": 90000 }, { "epoch": 7.06, "eval_gen_len": 18.74291515847091, "eval_loss": 2.339409112930298, "eval_rouge1": 34.4263, "eval_rouge2": 13.442, "eval_rougeL": 28.2935, "eval_rougeLsum": 28.2854, "eval_runtime": 343.606, "eval_samples_per_second": 32.965, "eval_steps_per_second": 2.06, "step": 90000 }, { "epoch": 7.07, "learning_rate": 0.0005, "loss": 1.2807, "step": 90100 }, { "epoch": 7.07, "learning_rate": 0.0005, "loss": 1.3081, "step": 90200 }, { "epoch": 7.08, "learning_rate": 0.0005, "loss": 1.2923, "step": 90300 }, { "epoch": 7.09, "learning_rate": 0.0005, "loss": 1.273, "step": 90400 }, { "epoch": 7.1, "learning_rate": 0.0005, "loss": 1.2567, "step": 90500 }, { "epoch": 7.1, "learning_rate": 0.0005, "loss": 1.2771, "step": 90600 }, { "epoch": 7.11, "learning_rate": 0.0005, "loss": 1.2768, "step": 90700 }, { "epoch": 7.12, "learning_rate": 0.0005, "loss": 1.2704, "step": 90800 }, { "epoch": 7.13, "learning_rate": 0.0005, "loss": 1.2755, "step": 90900 }, { "epoch": 7.14, "learning_rate": 0.0005, "loss": 1.2813, "step": 91000 }, { "epoch": 7.14, "learning_rate": 0.0005, "loss": 1.2972, "step": 91100 }, { "epoch": 7.15, "learning_rate": 0.0005, "loss": 1.2805, "step": 91200 }, { "epoch": 7.16, "learning_rate": 0.0005, "loss": 1.2757, "step": 91300 }, { "epoch": 7.17, "learning_rate": 0.0005, "loss": 1.2915, "step": 91400 }, { "epoch": 7.18, "learning_rate": 0.0005, "loss": 1.3053, "step": 91500 }, { "epoch": 7.18, "learning_rate": 0.0005, "loss": 1.3232, "step": 91600 }, { "epoch": 7.19, "learning_rate": 0.0005, "loss": 1.3037, "step": 91700 }, { "epoch": 7.2, "learning_rate": 0.0005, "loss": 1.2884, "step": 91800 }, { "epoch": 7.21, "learning_rate": 0.0005, "loss": 1.2971, "step": 91900 }, { "epoch": 7.21, "learning_rate": 0.0005, "loss": 1.3044, "step": 92000 }, { "epoch": 7.22, "learning_rate": 0.0005, "loss": 1.3029, "step": 92100 }, { "epoch": 7.23, "learning_rate": 0.0005, "loss": 1.2959, "step": 92200 }, { "epoch": 7.24, "learning_rate": 0.0005, "loss": 1.3149, "step": 92300 }, { "epoch": 7.25, "learning_rate": 0.0005, "loss": 1.2849, "step": 92400 }, { "epoch": 7.25, "learning_rate": 0.0005, "loss": 1.3093, "step": 92500 }, { "epoch": 7.26, "learning_rate": 0.0005, "loss": 1.3339, "step": 92600 }, { "epoch": 7.27, "learning_rate": 0.0005, "loss": 1.3035, "step": 92700 }, { "epoch": 7.28, "learning_rate": 0.0005, "loss": 1.3038, "step": 92800 }, { "epoch": 7.29, "learning_rate": 0.0005, "loss": 1.3045, "step": 92900 }, { "epoch": 7.29, "learning_rate": 0.0005, "loss": 1.3056, "step": 93000 }, { "epoch": 7.3, "learning_rate": 0.0005, "loss": 1.2988, "step": 93100 }, { "epoch": 7.31, "learning_rate": 0.0005, "loss": 1.3026, "step": 93200 }, { "epoch": 7.32, "learning_rate": 0.0005, "loss": 1.2925, "step": 93300 }, { "epoch": 7.32, "learning_rate": 0.0005, "loss": 1.2901, "step": 93400 }, { "epoch": 7.33, "learning_rate": 0.0005, "loss": 1.2944, "step": 93500 }, { "epoch": 7.34, "learning_rate": 0.0005, "loss": 1.3112, "step": 93600 }, { "epoch": 7.35, "learning_rate": 0.0005, "loss": 1.313, "step": 93700 }, { "epoch": 7.36, "learning_rate": 0.0005, "loss": 1.2896, "step": 93800 }, { "epoch": 7.36, "learning_rate": 0.0005, "loss": 1.2935, "step": 93900 }, { "epoch": 7.37, "learning_rate": 0.0005, "loss": 1.3067, "step": 94000 }, { "epoch": 7.38, "learning_rate": 0.0005, "loss": 1.311, "step": 94100 }, { "epoch": 7.39, "learning_rate": 0.0005, "loss": 1.2834, "step": 94200 }, { "epoch": 7.39, "learning_rate": 0.0005, "loss": 1.2961, "step": 94300 }, { "epoch": 7.4, "learning_rate": 0.0005, "loss": 1.3183, "step": 94400 }, { "epoch": 7.41, "learning_rate": 0.0005, "loss": 1.3031, "step": 94500 }, { "epoch": 7.42, "learning_rate": 0.0005, "loss": 1.3242, "step": 94600 }, { "epoch": 7.43, "learning_rate": 0.0005, "loss": 1.3186, "step": 94700 }, { "epoch": 7.43, "learning_rate": 0.0005, "loss": 1.3099, "step": 94800 }, { "epoch": 7.44, "learning_rate": 0.0005, "loss": 1.2844, "step": 94900 }, { "epoch": 7.45, "learning_rate": 0.0005, "loss": 1.3044, "step": 95000 }, { "epoch": 7.46, "learning_rate": 0.0005, "loss": 1.3048, "step": 95100 }, { "epoch": 7.47, "learning_rate": 0.0005, "loss": 1.3194, "step": 95200 }, { "epoch": 7.47, "learning_rate": 0.0005, "loss": 1.3178, "step": 95300 }, { "epoch": 7.48, "learning_rate": 0.0005, "loss": 1.3232, "step": 95400 }, { "epoch": 7.49, "learning_rate": 0.0005, "loss": 1.3172, "step": 95500 }, { "epoch": 7.5, "learning_rate": 0.0005, "loss": 1.3211, "step": 95600 }, { "epoch": 7.5, "learning_rate": 0.0005, "loss": 1.3178, "step": 95700 }, { "epoch": 7.51, "learning_rate": 0.0005, "loss": 1.3227, "step": 95800 }, { "epoch": 7.52, "learning_rate": 0.0005, "loss": 1.3077, "step": 95900 }, { "epoch": 7.53, "learning_rate": 0.0005, "loss": 1.3239, "step": 96000 }, { "epoch": 7.54, "learning_rate": 0.0005, "loss": 1.2918, "step": 96100 }, { "epoch": 7.54, "learning_rate": 0.0005, "loss": 1.3214, "step": 96200 }, { "epoch": 7.55, "learning_rate": 0.0005, "loss": 1.2858, "step": 96300 }, { "epoch": 7.56, "learning_rate": 0.0005, "loss": 1.323, "step": 96400 }, { "epoch": 7.57, "learning_rate": 0.0005, "loss": 1.3028, "step": 96500 }, { "epoch": 7.58, "learning_rate": 0.0005, "loss": 1.3091, "step": 96600 }, { "epoch": 7.58, "learning_rate": 0.0005, "loss": 1.3236, "step": 96700 }, { "epoch": 7.59, "learning_rate": 0.0005, "loss": 1.2892, "step": 96800 }, { "epoch": 7.6, "learning_rate": 0.0005, "loss": 1.2942, "step": 96900 }, { "epoch": 7.61, "learning_rate": 0.0005, "loss": 1.3217, "step": 97000 }, { "epoch": 7.61, "learning_rate": 0.0005, "loss": 1.3195, "step": 97100 }, { "epoch": 7.62, "learning_rate": 0.0005, "loss": 1.3238, "step": 97200 }, { "epoch": 7.63, "learning_rate": 0.0005, "loss": 1.3113, "step": 97300 }, { "epoch": 7.64, "learning_rate": 0.0005, "loss": 1.3075, "step": 97400 }, { "epoch": 7.65, "learning_rate": 0.0005, "loss": 1.3338, "step": 97500 }, { "epoch": 7.65, "learning_rate": 0.0005, "loss": 1.3121, "step": 97600 }, { "epoch": 7.66, "learning_rate": 0.0005, "loss": 1.3415, "step": 97700 }, { "epoch": 7.67, "learning_rate": 0.0005, "loss": 1.3144, "step": 97800 }, { "epoch": 7.68, "learning_rate": 0.0005, "loss": 1.327, "step": 97900 }, { "epoch": 7.69, "learning_rate": 0.0005, "loss": 1.3231, "step": 98000 }, { "epoch": 7.69, "learning_rate": 0.0005, "loss": 1.327, "step": 98100 }, { "epoch": 7.7, "learning_rate": 0.0005, "loss": 1.2992, "step": 98200 }, { "epoch": 7.71, "learning_rate": 0.0005, "loss": 1.3232, "step": 98300 }, { "epoch": 7.72, "learning_rate": 0.0005, "loss": 1.3222, "step": 98400 }, { "epoch": 7.72, "learning_rate": 0.0005, "loss": 1.3297, "step": 98500 }, { "epoch": 7.73, "learning_rate": 0.0005, "loss": 1.3063, "step": 98600 }, { "epoch": 7.74, "learning_rate": 0.0005, "loss": 1.3341, "step": 98700 }, { "epoch": 7.75, "learning_rate": 0.0005, "loss": 1.3312, "step": 98800 }, { "epoch": 7.76, "learning_rate": 0.0005, "loss": 1.3341, "step": 98900 }, { "epoch": 7.76, "learning_rate": 0.0005, "loss": 1.317, "step": 99000 }, { "epoch": 7.77, "learning_rate": 0.0005, "loss": 1.3329, "step": 99100 }, { "epoch": 7.78, "learning_rate": 0.0005, "loss": 1.3381, "step": 99200 }, { "epoch": 7.79, "learning_rate": 0.0005, "loss": 1.3284, "step": 99300 }, { "epoch": 7.79, "learning_rate": 0.0005, "loss": 1.3254, "step": 99400 }, { "epoch": 7.8, "learning_rate": 0.0005, "loss": 1.3338, "step": 99500 }, { "epoch": 7.81, "learning_rate": 0.0005, "loss": 1.3293, "step": 99600 }, { "epoch": 7.82, "learning_rate": 0.0005, "loss": 1.3338, "step": 99700 }, { "epoch": 7.83, "learning_rate": 0.0005, "loss": 1.3222, "step": 99800 }, { "epoch": 7.83, "learning_rate": 0.0005, "loss": 1.3132, "step": 99900 }, { "epoch": 7.84, "learning_rate": 0.0005, "loss": 1.3226, "step": 100000 }, { "epoch": 7.84, "eval_gen_len": 18.71413436920632, "eval_loss": 2.3180172443389893, "eval_rouge1": 34.7026, "eval_rouge2": 13.6524, "eval_rougeL": 28.5004, "eval_rougeLsum": 28.4899, "eval_runtime": 346.1988, "eval_samples_per_second": 32.718, "eval_steps_per_second": 2.045, "step": 100000 }, { "epoch": 7.85, "learning_rate": 0.0005, "loss": 1.3376, "step": 100100 }, { "epoch": 7.86, "learning_rate": 0.0005, "loss": 1.3383, "step": 100200 }, { "epoch": 7.87, "learning_rate": 0.0005, "loss": 1.3239, "step": 100300 }, { "epoch": 7.87, "learning_rate": 0.0005, "loss": 1.3319, "step": 100400 }, { "epoch": 7.88, "learning_rate": 0.0005, "loss": 1.3161, "step": 100500 }, { "epoch": 7.89, "learning_rate": 0.0005, "loss": 1.3419, "step": 100600 }, { "epoch": 7.9, "learning_rate": 0.0005, "loss": 1.3262, "step": 100700 }, { "epoch": 7.9, "learning_rate": 0.0005, "loss": 1.3392, "step": 100800 }, { "epoch": 7.91, "learning_rate": 0.0005, "loss": 1.307, "step": 100900 }, { "epoch": 7.92, "learning_rate": 0.0005, "loss": 1.3017, "step": 101000 }, { "epoch": 7.93, "learning_rate": 0.0005, "loss": 1.3419, "step": 101100 }, { "epoch": 7.94, "learning_rate": 0.0005, "loss": 1.3279, "step": 101200 }, { "epoch": 7.94, "learning_rate": 0.0005, "loss": 1.344, "step": 101300 }, { "epoch": 7.95, "learning_rate": 0.0005, "loss": 1.3274, "step": 101400 }, { "epoch": 7.96, "learning_rate": 0.0005, "loss": 1.331, "step": 101500 }, { "epoch": 7.97, "learning_rate": 0.0005, "loss": 1.3233, "step": 101600 }, { "epoch": 7.98, "learning_rate": 0.0005, "loss": 1.3093, "step": 101700 }, { "epoch": 7.98, "learning_rate": 0.0005, "loss": 1.3195, "step": 101800 }, { "epoch": 7.99, "learning_rate": 0.0005, "loss": 1.3373, "step": 101900 }, { "epoch": 8.0, "learning_rate": 0.0005, "loss": 1.3105, "step": 102000 }, { "epoch": 8.01, "learning_rate": 0.0005, "loss": 1.2658, "step": 102100 }, { "epoch": 8.01, "learning_rate": 0.0005, "loss": 1.2354, "step": 102200 }, { "epoch": 8.02, "learning_rate": 0.0005, "loss": 1.2282, "step": 102300 }, { "epoch": 8.03, "learning_rate": 0.0005, "loss": 1.2475, "step": 102400 }, { "epoch": 8.04, "learning_rate": 0.0005, "loss": 1.2326, "step": 102500 }, { "epoch": 8.05, "learning_rate": 0.0005, "loss": 1.2364, "step": 102600 }, { "epoch": 8.05, "learning_rate": 0.0005, "loss": 1.2297, "step": 102700 }, { "epoch": 8.06, "learning_rate": 0.0005, "loss": 1.2495, "step": 102800 }, { "epoch": 8.07, "learning_rate": 0.0005, "loss": 1.2551, "step": 102900 }, { "epoch": 8.08, "learning_rate": 0.0005, "loss": 1.2288, "step": 103000 }, { "epoch": 8.09, "learning_rate": 0.0005, "loss": 1.2316, "step": 103100 }, { "epoch": 8.09, "learning_rate": 0.0005, "loss": 1.2272, "step": 103200 }, { "epoch": 8.1, "learning_rate": 0.0005, "loss": 1.2344, "step": 103300 }, { "epoch": 8.11, "learning_rate": 0.0005, "loss": 1.2526, "step": 103400 }, { "epoch": 8.12, "learning_rate": 0.0005, "loss": 1.2614, "step": 103500 }, { "epoch": 8.12, "learning_rate": 0.0005, "loss": 1.2656, "step": 103600 }, { "epoch": 8.13, "learning_rate": 0.0005, "loss": 1.2525, "step": 103700 }, { "epoch": 8.14, "learning_rate": 0.0005, "loss": 1.2627, "step": 103800 }, { "epoch": 8.15, "learning_rate": 0.0005, "loss": 1.2767, "step": 103900 }, { "epoch": 8.16, "learning_rate": 0.0005, "loss": 1.244, "step": 104000 }, { "epoch": 8.16, "learning_rate": 0.0005, "loss": 1.2445, "step": 104100 }, { "epoch": 8.17, "learning_rate": 0.0005, "loss": 1.2767, "step": 104200 }, { "epoch": 8.18, "learning_rate": 0.0005, "loss": 1.2529, "step": 104300 }, { "epoch": 8.19, "learning_rate": 0.0005, "loss": 1.2635, "step": 104400 }, { "epoch": 8.19, "learning_rate": 0.0005, "loss": 1.2651, "step": 104500 }, { "epoch": 8.2, "learning_rate": 0.0005, "loss": 1.2541, "step": 104600 }, { "epoch": 8.21, "learning_rate": 0.0005, "loss": 1.2532, "step": 104700 }, { "epoch": 8.22, "learning_rate": 0.0005, "loss": 1.2615, "step": 104800 }, { "epoch": 8.23, "learning_rate": 0.0005, "loss": 1.2415, "step": 104900 }, { "epoch": 8.23, "learning_rate": 0.0005, "loss": 1.261, "step": 105000 }, { "epoch": 8.24, "learning_rate": 0.0005, "loss": 1.2472, "step": 105100 }, { "epoch": 8.25, "learning_rate": 0.0005, "loss": 1.2539, "step": 105200 }, { "epoch": 8.26, "learning_rate": 0.0005, "loss": 1.2555, "step": 105300 }, { "epoch": 8.27, "learning_rate": 0.0005, "loss": 1.2804, "step": 105400 }, { "epoch": 8.27, "learning_rate": 0.0005, "loss": 1.2768, "step": 105500 }, { "epoch": 8.28, "learning_rate": 0.0005, "loss": 1.266, "step": 105600 }, { "epoch": 8.29, "learning_rate": 0.0005, "loss": 1.266, "step": 105700 }, { "epoch": 8.3, "learning_rate": 0.0005, "loss": 1.2677, "step": 105800 }, { "epoch": 8.3, "learning_rate": 0.0005, "loss": 1.2573, "step": 105900 }, { "epoch": 8.31, "learning_rate": 0.0005, "loss": 1.2606, "step": 106000 }, { "epoch": 8.32, "learning_rate": 0.0005, "loss": 1.2601, "step": 106100 }, { "epoch": 8.33, "learning_rate": 0.0005, "loss": 1.2811, "step": 106200 }, { "epoch": 8.34, "learning_rate": 0.0005, "loss": 1.2713, "step": 106300 }, { "epoch": 8.34, "learning_rate": 0.0005, "loss": 1.27, "step": 106400 }, { "epoch": 8.35, "learning_rate": 0.0005, "loss": 1.2663, "step": 106500 }, { "epoch": 8.36, "learning_rate": 0.0005, "loss": 1.2709, "step": 106600 }, { "epoch": 8.37, "learning_rate": 0.0005, "loss": 1.2922, "step": 106700 }, { "epoch": 8.38, "learning_rate": 0.0005, "loss": 1.2709, "step": 106800 }, { "epoch": 8.38, "learning_rate": 0.0005, "loss": 1.2704, "step": 106900 }, { "epoch": 8.39, "learning_rate": 0.0005, "loss": 1.261, "step": 107000 }, { "epoch": 8.4, "learning_rate": 0.0005, "loss": 1.2758, "step": 107100 }, { "epoch": 8.41, "learning_rate": 0.0005, "loss": 1.2745, "step": 107200 }, { "epoch": 8.41, "learning_rate": 0.0005, "loss": 1.2802, "step": 107300 }, { "epoch": 8.42, "learning_rate": 0.0005, "loss": 1.2722, "step": 107400 }, { "epoch": 8.43, "learning_rate": 0.0005, "loss": 1.262, "step": 107500 }, { "epoch": 8.44, "learning_rate": 0.0005, "loss": 1.2662, "step": 107600 }, { "epoch": 8.45, "learning_rate": 0.0005, "loss": 1.2709, "step": 107700 }, { "epoch": 8.45, "learning_rate": 0.0005, "loss": 1.2977, "step": 107800 }, { "epoch": 8.46, "learning_rate": 0.0005, "loss": 1.296, "step": 107900 }, { "epoch": 8.47, "learning_rate": 0.0005, "loss": 1.2796, "step": 108000 }, { "epoch": 8.48, "learning_rate": 0.0005, "loss": 1.2628, "step": 108100 }, { "epoch": 8.48, "learning_rate": 0.0005, "loss": 1.2708, "step": 108200 }, { "epoch": 8.49, "learning_rate": 0.0005, "loss": 1.2661, "step": 108300 }, { "epoch": 8.5, "learning_rate": 0.0005, "loss": 1.2518, "step": 108400 }, { "epoch": 8.51, "learning_rate": 0.0005, "loss": 1.2881, "step": 108500 }, { "epoch": 8.52, "learning_rate": 0.0005, "loss": 1.2887, "step": 108600 }, { "epoch": 8.52, "learning_rate": 0.0005, "loss": 1.2821, "step": 108700 }, { "epoch": 8.53, "learning_rate": 0.0005, "loss": 1.282, "step": 108800 }, { "epoch": 8.54, "learning_rate": 0.0005, "loss": 1.2672, "step": 108900 }, { "epoch": 8.55, "learning_rate": 0.0005, "loss": 1.2563, "step": 109000 }, { "epoch": 8.56, "learning_rate": 0.0005, "loss": 1.2637, "step": 109100 }, { "epoch": 8.56, "learning_rate": 0.0005, "loss": 1.285, "step": 109200 }, { "epoch": 8.57, "learning_rate": 0.0005, "loss": 1.2787, "step": 109300 }, { "epoch": 8.58, "learning_rate": 0.0005, "loss": 1.2965, "step": 109400 }, { "epoch": 8.59, "learning_rate": 0.0005, "loss": 1.2879, "step": 109500 }, { "epoch": 8.59, "learning_rate": 0.0005, "loss": 1.2715, "step": 109600 }, { "epoch": 8.6, "learning_rate": 0.0005, "loss": 1.2717, "step": 109700 }, { "epoch": 8.61, "learning_rate": 0.0005, "loss": 1.3013, "step": 109800 }, { "epoch": 8.62, "learning_rate": 0.0005, "loss": 1.2841, "step": 109900 }, { "epoch": 8.63, "learning_rate": 0.0005, "loss": 1.2956, "step": 110000 }, { "epoch": 8.63, "eval_gen_len": 18.680056502162973, "eval_loss": 2.327761650085449, "eval_rouge1": 34.8338, "eval_rouge2": 13.7914, "eval_rougeL": 28.7114, "eval_rougeLsum": 28.7056, "eval_runtime": 344.147, "eval_samples_per_second": 32.913, "eval_steps_per_second": 2.057, "step": 110000 }, { "epoch": 8.63, "learning_rate": 0.0005, "loss": 1.2873, "step": 110100 }, { "epoch": 8.64, "learning_rate": 0.0005, "loss": 1.2802, "step": 110200 }, { "epoch": 8.65, "learning_rate": 0.0005, "loss": 1.2734, "step": 110300 }, { "epoch": 8.66, "learning_rate": 0.0005, "loss": 1.2922, "step": 110400 }, { "epoch": 8.67, "learning_rate": 0.0005, "loss": 1.2602, "step": 110500 }, { "epoch": 8.67, "learning_rate": 0.0005, "loss": 1.2733, "step": 110600 }, { "epoch": 8.68, "learning_rate": 0.0005, "loss": 1.29, "step": 110700 }, { "epoch": 8.69, "learning_rate": 0.0005, "loss": 1.2988, "step": 110800 }, { "epoch": 8.7, "learning_rate": 0.0005, "loss": 1.2828, "step": 110900 }, { "epoch": 8.7, "learning_rate": 0.0005, "loss": 1.2803, "step": 111000 }, { "epoch": 8.71, "learning_rate": 0.0005, "loss": 1.2651, "step": 111100 }, { "epoch": 8.72, "learning_rate": 0.0005, "loss": 1.2762, "step": 111200 }, { "epoch": 8.73, "learning_rate": 0.0005, "loss": 1.3007, "step": 111300 }, { "epoch": 8.74, "learning_rate": 0.0005, "loss": 1.3065, "step": 111400 }, { "epoch": 8.74, "learning_rate": 0.0005, "loss": 1.2941, "step": 111500 }, { "epoch": 8.75, "learning_rate": 0.0005, "loss": 1.2835, "step": 111600 }, { "epoch": 8.76, "learning_rate": 0.0005, "loss": 1.2879, "step": 111700 }, { "epoch": 8.77, "learning_rate": 0.0005, "loss": 1.2951, "step": 111800 }, { "epoch": 8.78, "learning_rate": 0.0005, "loss": 1.289, "step": 111900 }, { "epoch": 8.78, "learning_rate": 0.0005, "loss": 1.3021, "step": 112000 }, { "epoch": 8.79, "learning_rate": 0.0005, "loss": 1.278, "step": 112100 }, { "epoch": 8.8, "learning_rate": 0.0005, "loss": 1.3145, "step": 112200 }, { "epoch": 8.81, "learning_rate": 0.0005, "loss": 1.3147, "step": 112300 }, { "epoch": 8.81, "learning_rate": 0.0005, "loss": 1.2801, "step": 112400 }, { "epoch": 8.82, "learning_rate": 0.0005, "loss": 1.2945, "step": 112500 }, { "epoch": 8.83, "learning_rate": 0.0005, "loss": 1.2865, "step": 112600 }, { "epoch": 8.84, "learning_rate": 0.0005, "loss": 1.2919, "step": 112700 }, { "epoch": 8.85, "learning_rate": 0.0005, "loss": 1.3269, "step": 112800 }, { "epoch": 8.85, "learning_rate": 0.0005, "loss": 1.3069, "step": 112900 }, { "epoch": 8.86, "learning_rate": 0.0005, "loss": 1.3089, "step": 113000 }, { "epoch": 8.87, "learning_rate": 0.0005, "loss": 1.2979, "step": 113100 }, { "epoch": 8.88, "learning_rate": 0.0005, "loss": 1.2858, "step": 113200 }, { "epoch": 8.88, "learning_rate": 0.0005, "loss": 1.2684, "step": 113300 }, { "epoch": 8.89, "learning_rate": 0.0005, "loss": 1.2868, "step": 113400 }, { "epoch": 8.9, "learning_rate": 0.0005, "loss": 1.2744, "step": 113500 }, { "epoch": 8.91, "learning_rate": 0.0005, "loss": 1.2957, "step": 113600 }, { "epoch": 8.92, "learning_rate": 0.0005, "loss": 1.3137, "step": 113700 }, { "epoch": 8.92, "learning_rate": 0.0005, "loss": 1.3029, "step": 113800 }, { "epoch": 8.93, "learning_rate": 0.0005, "loss": 1.2701, "step": 113900 }, { "epoch": 8.94, "learning_rate": 0.0005, "loss": 1.2671, "step": 114000 }, { "epoch": 8.95, "learning_rate": 0.0005, "loss": 1.2999, "step": 114100 }, { "epoch": 8.96, "learning_rate": 0.0005, "loss": 1.2827, "step": 114200 }, { "epoch": 8.96, "learning_rate": 0.0005, "loss": 1.2958, "step": 114300 }, { "epoch": 8.97, "learning_rate": 0.0005, "loss": 1.2641, "step": 114400 }, { "epoch": 8.98, "learning_rate": 0.0005, "loss": 1.2787, "step": 114500 }, { "epoch": 8.99, "learning_rate": 0.0005, "loss": 1.2903, "step": 114600 }, { "epoch": 8.99, "learning_rate": 0.0005, "loss": 1.2938, "step": 114700 }, { "epoch": 9.0, "learning_rate": 0.0005, "loss": 1.2768, "step": 114800 }, { "epoch": 9.01, "learning_rate": 0.0005, "loss": 1.1876, "step": 114900 }, { "epoch": 9.02, "learning_rate": 0.0005, "loss": 1.2012, "step": 115000 }, { "epoch": 9.03, "learning_rate": 0.0005, "loss": 1.2075, "step": 115100 }, { "epoch": 9.03, "learning_rate": 0.0005, "loss": 1.2316, "step": 115200 }, { "epoch": 9.04, "learning_rate": 0.0005, "loss": 1.1978, "step": 115300 }, { "epoch": 9.05, "learning_rate": 0.0005, "loss": 1.2175, "step": 115400 }, { "epoch": 9.06, "learning_rate": 0.0005, "loss": 1.2097, "step": 115500 }, { "epoch": 9.07, "learning_rate": 0.0005, "loss": 1.1848, "step": 115600 }, { "epoch": 9.07, "learning_rate": 0.0005, "loss": 1.2143, "step": 115700 }, { "epoch": 9.08, "learning_rate": 0.0005, "loss": 1.2262, "step": 115800 }, { "epoch": 9.09, "learning_rate": 0.0005, "loss": 1.2187, "step": 115900 }, { "epoch": 9.1, "learning_rate": 0.0005, "loss": 1.2057, "step": 116000 }, { "epoch": 9.1, "learning_rate": 0.0005, "loss": 1.1871, "step": 116100 }, { "epoch": 9.11, "learning_rate": 0.0005, "loss": 1.2168, "step": 116200 }, { "epoch": 9.12, "learning_rate": 0.0005, "loss": 1.2171, "step": 116300 }, { "epoch": 9.13, "learning_rate": 0.0005, "loss": 1.2214, "step": 116400 }, { "epoch": 9.14, "learning_rate": 0.0005, "loss": 1.2076, "step": 116500 }, { "epoch": 9.14, "learning_rate": 0.0005, "loss": 1.2064, "step": 116600 }, { "epoch": 9.15, "learning_rate": 0.0005, "loss": 1.2093, "step": 116700 }, { "epoch": 9.16, "learning_rate": 0.0005, "loss": 1.1972, "step": 116800 }, { "epoch": 9.17, "learning_rate": 0.0005, "loss": 1.2101, "step": 116900 }, { "epoch": 9.18, "learning_rate": 0.0005, "loss": 1.2405, "step": 117000 }, { "epoch": 9.18, "learning_rate": 0.0005, "loss": 1.2157, "step": 117100 }, { "epoch": 9.19, "learning_rate": 0.0005, "loss": 1.2268, "step": 117200 }, { "epoch": 9.2, "learning_rate": 0.0005, "loss": 1.2173, "step": 117300 }, { "epoch": 9.21, "learning_rate": 0.0005, "loss": 1.2324, "step": 117400 }, { "epoch": 9.21, "learning_rate": 0.0005, "loss": 1.2277, "step": 117500 }, { "epoch": 9.22, "learning_rate": 0.0005, "loss": 1.2212, "step": 117600 }, { "epoch": 9.23, "learning_rate": 0.0005, "loss": 1.2272, "step": 117700 }, { "epoch": 9.24, "learning_rate": 0.0005, "loss": 1.2437, "step": 117800 }, { "epoch": 9.25, "learning_rate": 0.0005, "loss": 1.2423, "step": 117900 }, { "epoch": 9.25, "learning_rate": 0.0005, "loss": 1.2392, "step": 118000 }, { "epoch": 9.26, "learning_rate": 0.0005, "loss": 1.2487, "step": 118100 }, { "epoch": 9.27, "learning_rate": 0.0005, "loss": 1.2178, "step": 118200 }, { "epoch": 9.28, "learning_rate": 0.0005, "loss": 1.2299, "step": 118300 }, { "epoch": 9.28, "learning_rate": 0.0005, "loss": 1.2235, "step": 118400 }, { "epoch": 9.29, "learning_rate": 0.0005, "loss": 1.2293, "step": 118500 }, { "epoch": 9.3, "learning_rate": 0.0005, "loss": 1.2289, "step": 118600 }, { "epoch": 9.31, "learning_rate": 0.0005, "loss": 1.226, "step": 118700 }, { "epoch": 9.32, "learning_rate": 0.0005, "loss": 1.2305, "step": 118800 }, { "epoch": 9.32, "learning_rate": 0.0005, "loss": 1.2195, "step": 118900 }, { "epoch": 9.33, "learning_rate": 0.0005, "loss": 1.2574, "step": 119000 }, { "epoch": 9.34, "learning_rate": 0.0005, "loss": 1.2401, "step": 119100 }, { "epoch": 9.35, "learning_rate": 0.0005, "loss": 1.2439, "step": 119200 }, { "epoch": 9.36, "learning_rate": 0.0005, "loss": 1.2165, "step": 119300 }, { "epoch": 9.36, "learning_rate": 0.0005, "loss": 1.2318, "step": 119400 }, { "epoch": 9.37, "learning_rate": 0.0005, "loss": 1.2358, "step": 119500 }, { "epoch": 9.38, "learning_rate": 0.0005, "loss": 1.2421, "step": 119600 }, { "epoch": 9.39, "learning_rate": 0.0005, "loss": 1.2324, "step": 119700 }, { "epoch": 9.39, "learning_rate": 0.0005, "loss": 1.2332, "step": 119800 }, { "epoch": 9.4, "learning_rate": 0.0005, "loss": 1.2377, "step": 119900 }, { "epoch": 9.41, "learning_rate": 0.0005, "loss": 1.2568, "step": 120000 }, { "epoch": 9.41, "eval_gen_len": 18.66372384567847, "eval_loss": 2.3514394760131836, "eval_rouge1": 34.7411, "eval_rouge2": 13.8346, "eval_rougeL": 28.6141, "eval_rougeLsum": 28.6032, "eval_runtime": 345.2724, "eval_samples_per_second": 32.806, "eval_steps_per_second": 2.051, "step": 120000 }, { "epoch": 9.42, "learning_rate": 0.0005, "loss": 1.2152, "step": 120100 }, { "epoch": 9.43, "learning_rate": 0.0005, "loss": 1.2593, "step": 120200 }, { "epoch": 9.43, "learning_rate": 0.0005, "loss": 1.2727, "step": 120300 }, { "epoch": 9.44, "learning_rate": 0.0005, "loss": 1.2449, "step": 120400 }, { "epoch": 9.45, "learning_rate": 0.0005, "loss": 1.2418, "step": 120500 }, { "epoch": 9.46, "learning_rate": 0.0005, "loss": 1.2555, "step": 120600 }, { "epoch": 9.47, "learning_rate": 0.0005, "loss": 1.2419, "step": 120700 }, { "epoch": 9.47, "learning_rate": 0.0005, "loss": 1.222, "step": 120800 }, { "epoch": 9.48, "learning_rate": 0.0005, "loss": 1.2457, "step": 120900 }, { "epoch": 9.49, "learning_rate": 0.0005, "loss": 1.2392, "step": 121000 }, { "epoch": 9.5, "learning_rate": 0.0005, "loss": 1.243, "step": 121100 }, { "epoch": 9.5, "learning_rate": 0.0005, "loss": 1.2354, "step": 121200 }, { "epoch": 9.51, "learning_rate": 0.0005, "loss": 1.2476, "step": 121300 }, { "epoch": 9.52, "learning_rate": 0.0005, "loss": 1.2493, "step": 121400 }, { "epoch": 9.53, "learning_rate": 0.0005, "loss": 1.2469, "step": 121500 }, { "epoch": 9.54, "learning_rate": 0.0005, "loss": 1.248, "step": 121600 }, { "epoch": 9.54, "learning_rate": 0.0005, "loss": 1.2485, "step": 121700 }, { "epoch": 9.55, "learning_rate": 0.0005, "loss": 1.2614, "step": 121800 }, { "epoch": 9.56, "learning_rate": 0.0005, "loss": 1.2542, "step": 121900 }, { "epoch": 9.57, "learning_rate": 0.0005, "loss": 1.2463, "step": 122000 }, { "epoch": 9.57, "learning_rate": 0.0005, "loss": 1.2421, "step": 122100 }, { "epoch": 9.58, "learning_rate": 0.0005, "loss": 1.2599, "step": 122200 }, { "epoch": 9.59, "learning_rate": 0.0005, "loss": 1.2507, "step": 122300 }, { "epoch": 9.6, "learning_rate": 0.0005, "loss": 1.2517, "step": 122400 }, { "epoch": 9.61, "learning_rate": 0.0005, "loss": 1.2524, "step": 122500 }, { "epoch": 9.61, "learning_rate": 0.0005, "loss": 1.2492, "step": 122600 }, { "epoch": 9.62, "learning_rate": 0.0005, "loss": 1.2445, "step": 122700 }, { "epoch": 9.63, "learning_rate": 0.0005, "loss": 1.235, "step": 122800 }, { "epoch": 9.64, "learning_rate": 0.0005, "loss": 1.2788, "step": 122900 }, { "epoch": 9.65, "learning_rate": 0.0005, "loss": 1.2313, "step": 123000 }, { "epoch": 9.65, "learning_rate": 0.0005, "loss": 1.2594, "step": 123100 }, { "epoch": 9.66, "learning_rate": 0.0005, "loss": 1.237, "step": 123200 }, { "epoch": 9.67, "learning_rate": 0.0005, "loss": 1.2641, "step": 123300 }, { "epoch": 9.68, "learning_rate": 0.0005, "loss": 1.2346, "step": 123400 }, { "epoch": 9.68, "learning_rate": 0.0005, "loss": 1.2685, "step": 123500 }, { "epoch": 9.69, "learning_rate": 0.0005, "loss": 1.2756, "step": 123600 }, { "epoch": 9.7, "learning_rate": 0.0005, "loss": 1.2478, "step": 123700 }, { "epoch": 9.71, "learning_rate": 0.0005, "loss": 1.2453, "step": 123800 }, { "epoch": 9.72, "learning_rate": 0.0005, "loss": 1.2631, "step": 123900 }, { "epoch": 9.72, "learning_rate": 0.0005, "loss": 1.2612, "step": 124000 }, { "epoch": 9.73, "learning_rate": 0.0005, "loss": 1.2553, "step": 124100 }, { "epoch": 9.74, "learning_rate": 0.0005, "loss": 1.2489, "step": 124200 }, { "epoch": 9.75, "learning_rate": 0.0005, "loss": 1.2581, "step": 124300 }, { "epoch": 9.76, "learning_rate": 0.0005, "loss": 1.2501, "step": 124400 }, { "epoch": 9.76, "learning_rate": 0.0005, "loss": 1.2433, "step": 124500 }, { "epoch": 9.77, "learning_rate": 0.0005, "loss": 1.2479, "step": 124600 }, { "epoch": 9.78, "learning_rate": 0.0005, "loss": 1.2705, "step": 124700 }, { "epoch": 9.79, "learning_rate": 0.0005, "loss": 1.2819, "step": 124800 }, { "epoch": 9.79, "learning_rate": 0.0005, "loss": 1.2689, "step": 124900 }, { "epoch": 9.8, "learning_rate": 0.0005, "loss": 1.2599, "step": 125000 }, { "epoch": 9.81, "learning_rate": 0.0005, "loss": 1.2337, "step": 125100 }, { "epoch": 9.82, "learning_rate": 0.0005, "loss": 1.2309, "step": 125200 }, { "epoch": 9.83, "learning_rate": 0.0005, "loss": 1.2511, "step": 125300 }, { "epoch": 9.83, "learning_rate": 0.0005, "loss": 1.2792, "step": 125400 }, { "epoch": 9.84, "learning_rate": 0.0005, "loss": 1.2619, "step": 125500 }, { "epoch": 9.85, "learning_rate": 0.0005, "loss": 1.2643, "step": 125600 }, { "epoch": 9.86, "learning_rate": 0.0005, "loss": 1.24, "step": 125700 }, { "epoch": 9.87, "learning_rate": 0.0005, "loss": 1.2424, "step": 125800 }, { "epoch": 9.87, "learning_rate": 0.0005, "loss": 1.2288, "step": 125900 }, { "epoch": 9.88, "learning_rate": 0.0005, "loss": 1.2395, "step": 126000 }, { "epoch": 9.89, "learning_rate": 0.0005, "loss": 1.2835, "step": 126100 }, { "epoch": 9.9, "learning_rate": 0.0005, "loss": 1.2581, "step": 126200 }, { "epoch": 9.9, "learning_rate": 0.0005, "loss": 1.2563, "step": 126300 }, { "epoch": 9.91, "learning_rate": 0.0005, "loss": 1.2553, "step": 126400 }, { "epoch": 9.92, "learning_rate": 0.0005, "loss": 1.26, "step": 126500 }, { "epoch": 9.93, "learning_rate": 0.0005, "loss": 1.2634, "step": 126600 }, { "epoch": 9.94, "learning_rate": 0.0005, "loss": 1.2648, "step": 126700 }, { "epoch": 9.94, "learning_rate": 0.0005, "loss": 1.2538, "step": 126800 }, { "epoch": 9.95, "learning_rate": 0.0005, "loss": 1.2725, "step": 126900 }, { "epoch": 9.96, "learning_rate": 0.0005, "loss": 1.2494, "step": 127000 }, { "epoch": 9.97, "learning_rate": 0.0005, "loss": 1.2813, "step": 127100 }, { "epoch": 9.97, "learning_rate": 0.0005, "loss": 1.2514, "step": 127200 }, { "epoch": 9.98, "learning_rate": 0.0005, "loss": 1.2516, "step": 127300 }, { "epoch": 9.99, "learning_rate": 0.0005, "loss": 1.2691, "step": 127400 }, { "epoch": 10.0, "learning_rate": 0.0005, "loss": 1.2495, "step": 127500 }, { "epoch": 10.01, "learning_rate": 0.0005, "loss": 1.2004, "step": 127600 }, { "epoch": 10.01, "learning_rate": 0.0005, "loss": 1.1841, "step": 127700 }, { "epoch": 10.02, "learning_rate": 0.0005, "loss": 1.1587, "step": 127800 }, { "epoch": 10.03, "learning_rate": 0.0005, "loss": 1.151, "step": 127900 }, { "epoch": 10.04, "learning_rate": 0.0005, "loss": 1.1594, "step": 128000 }, { "epoch": 10.05, "learning_rate": 0.0005, "loss": 1.1961, "step": 128100 }, { "epoch": 10.05, "learning_rate": 0.0005, "loss": 1.1642, "step": 128200 }, { "epoch": 10.06, "learning_rate": 0.0005, "loss": 1.1716, "step": 128300 }, { "epoch": 10.07, "learning_rate": 0.0005, "loss": 1.1898, "step": 128400 }, { "epoch": 10.08, "learning_rate": 0.0005, "loss": 1.1968, "step": 128500 }, { "epoch": 10.08, "learning_rate": 0.0005, "loss": 1.2105, "step": 128600 }, { "epoch": 10.09, "learning_rate": 0.0005, "loss": 1.1601, "step": 128700 }, { "epoch": 10.1, "learning_rate": 0.0005, "loss": 1.1855, "step": 128800 }, { "epoch": 10.11, "learning_rate": 0.0005, "loss": 1.1889, "step": 128900 }, { "epoch": 10.12, "learning_rate": 0.0005, "loss": 1.1638, "step": 129000 }, { "epoch": 10.12, "learning_rate": 0.0005, "loss": 1.1665, "step": 129100 }, { "epoch": 10.13, "learning_rate": 0.0005, "loss": 1.191, "step": 129200 }, { "epoch": 10.14, "learning_rate": 0.0005, "loss": 1.1814, "step": 129300 }, { "epoch": 10.15, "learning_rate": 0.0005, "loss": 1.2109, "step": 129400 }, { "epoch": 10.16, "learning_rate": 0.0005, "loss": 1.1878, "step": 129500 }, { "epoch": 10.16, "learning_rate": 0.0005, "loss": 1.1812, "step": 129600 }, { "epoch": 10.17, "learning_rate": 0.0005, "loss": 1.19, "step": 129700 }, { "epoch": 10.18, "learning_rate": 0.0005, "loss": 1.2061, "step": 129800 }, { "epoch": 10.19, "learning_rate": 0.0005, "loss": 1.1939, "step": 129900 }, { "epoch": 10.19, "learning_rate": 0.0005, "loss": 1.196, "step": 130000 }, { "epoch": 10.19, "eval_gen_len": 18.68429416438598, "eval_loss": 2.362765073776245, "eval_rouge1": 35.0302, "eval_rouge2": 13.9715, "eval_rougeL": 28.7794, "eval_rougeLsum": 28.7677, "eval_runtime": 342.4044, "eval_samples_per_second": 33.081, "eval_steps_per_second": 2.068, "step": 130000 }, { "epoch": 10.2, "learning_rate": 0.0005, "loss": 1.196, "step": 130100 }, { "epoch": 10.21, "learning_rate": 0.0005, "loss": 1.1993, "step": 130200 }, { "epoch": 10.22, "learning_rate": 0.0005, "loss": 1.1875, "step": 130300 }, { "epoch": 10.23, "learning_rate": 0.0005, "loss": 1.196, "step": 130400 }, { "epoch": 10.23, "learning_rate": 0.0005, "loss": 1.2116, "step": 130500 }, { "epoch": 10.24, "learning_rate": 0.0005, "loss": 1.1748, "step": 130600 }, { "epoch": 10.25, "learning_rate": 0.0005, "loss": 1.1957, "step": 130700 }, { "epoch": 10.26, "learning_rate": 0.0005, "loss": 1.2038, "step": 130800 }, { "epoch": 10.27, "learning_rate": 0.0005, "loss": 1.1831, "step": 130900 }, { "epoch": 10.27, "learning_rate": 0.0005, "loss": 1.1999, "step": 131000 }, { "epoch": 10.28, "learning_rate": 0.0005, "loss": 1.1911, "step": 131100 }, { "epoch": 10.29, "learning_rate": 0.0005, "loss": 1.1899, "step": 131200 }, { "epoch": 10.3, "learning_rate": 0.0005, "loss": 1.2048, "step": 131300 }, { "epoch": 10.3, "learning_rate": 0.0005, "loss": 1.2188, "step": 131400 }, { "epoch": 10.31, "learning_rate": 0.0005, "loss": 1.2149, "step": 131500 }, { "epoch": 10.32, "learning_rate": 0.0005, "loss": 1.2153, "step": 131600 }, { "epoch": 10.33, "learning_rate": 0.0005, "loss": 1.2156, "step": 131700 }, { "epoch": 10.34, "learning_rate": 0.0005, "loss": 1.1967, "step": 131800 }, { "epoch": 10.34, "learning_rate": 0.0005, "loss": 1.2148, "step": 131900 }, { "epoch": 10.35, "learning_rate": 0.0005, "loss": 1.211, "step": 132000 }, { "epoch": 10.36, "learning_rate": 0.0005, "loss": 1.1971, "step": 132100 }, { "epoch": 10.37, "learning_rate": 0.0005, "loss": 1.2019, "step": 132200 }, { "epoch": 10.37, "learning_rate": 0.0005, "loss": 1.2078, "step": 132300 }, { "epoch": 10.38, "learning_rate": 0.0005, "loss": 1.1958, "step": 132400 }, { "epoch": 10.39, "learning_rate": 0.0005, "loss": 1.243, "step": 132500 }, { "epoch": 10.4, "learning_rate": 0.0005, "loss": 1.2074, "step": 132600 }, { "epoch": 10.41, "learning_rate": 0.0005, "loss": 1.2086, "step": 132700 }, { "epoch": 10.41, "learning_rate": 0.0005, "loss": 1.2075, "step": 132800 }, { "epoch": 10.42, "learning_rate": 0.0005, "loss": 1.2124, "step": 132900 }, { "epoch": 10.43, "learning_rate": 0.0005, "loss": 1.2052, "step": 133000 }, { "epoch": 10.44, "learning_rate": 0.0005, "loss": 1.1858, "step": 133100 }, { "epoch": 10.45, "learning_rate": 0.0005, "loss": 1.1925, "step": 133200 }, { "epoch": 10.45, "learning_rate": 0.0005, "loss": 1.1945, "step": 133300 }, { "epoch": 10.46, "learning_rate": 0.0005, "loss": 1.2041, "step": 133400 }, { "epoch": 10.47, "learning_rate": 0.0005, "loss": 1.22, "step": 133500 }, { "epoch": 10.48, "learning_rate": 0.0005, "loss": 1.2024, "step": 133600 }, { "epoch": 10.48, "learning_rate": 0.0005, "loss": 1.2076, "step": 133700 }, { "epoch": 10.49, "learning_rate": 0.0005, "loss": 1.2013, "step": 133800 }, { "epoch": 10.5, "learning_rate": 0.0005, "loss": 1.2254, "step": 133900 }, { "epoch": 10.51, "learning_rate": 0.0005, "loss": 1.202, "step": 134000 }, { "epoch": 10.52, "learning_rate": 0.0005, "loss": 1.2101, "step": 134100 }, { "epoch": 10.52, "learning_rate": 0.0005, "loss": 1.2049, "step": 134200 }, { "epoch": 10.53, "learning_rate": 0.0005, "loss": 1.2248, "step": 134300 }, { "epoch": 10.54, "learning_rate": 0.0005, "loss": 1.2321, "step": 134400 }, { "epoch": 10.55, "learning_rate": 0.0005, "loss": 1.2106, "step": 134500 }, { "epoch": 10.56, "learning_rate": 0.0005, "loss": 1.2166, "step": 134600 }, { "epoch": 10.56, "learning_rate": 0.0005, "loss": 1.2197, "step": 134700 }, { "epoch": 10.57, "learning_rate": 0.0005, "loss": 1.2132, "step": 134800 }, { "epoch": 10.58, "learning_rate": 0.0005, "loss": 1.2318, "step": 134900 }, { "epoch": 10.59, "learning_rate": 0.0005, "loss": 1.2189, "step": 135000 }, { "epoch": 10.59, "learning_rate": 0.0005, "loss": 1.2334, "step": 135100 }, { "epoch": 10.6, "learning_rate": 0.0005, "loss": 1.2417, "step": 135200 }, { "epoch": 10.61, "learning_rate": 0.0005, "loss": 1.2381, "step": 135300 }, { "epoch": 10.62, "learning_rate": 0.0005, "loss": 1.2264, "step": 135400 }, { "epoch": 10.63, "learning_rate": 0.0005, "loss": 1.2124, "step": 135500 }, { "epoch": 10.63, "learning_rate": 0.0005, "loss": 1.2251, "step": 135600 }, { "epoch": 10.64, "learning_rate": 0.0005, "loss": 1.2115, "step": 135700 }, { "epoch": 10.65, "learning_rate": 0.0005, "loss": 1.2197, "step": 135800 }, { "epoch": 10.66, "learning_rate": 0.0005, "loss": 1.2118, "step": 135900 }, { "epoch": 10.66, "learning_rate": 0.0005, "loss": 1.2366, "step": 136000 }, { "epoch": 10.67, "learning_rate": 0.0005, "loss": 1.2107, "step": 136100 }, { "epoch": 10.68, "learning_rate": 0.0005, "loss": 1.2308, "step": 136200 }, { "epoch": 10.69, "learning_rate": 0.0005, "loss": 1.2161, "step": 136300 }, { "epoch": 10.7, "learning_rate": 0.0005, "loss": 1.2348, "step": 136400 }, { "epoch": 10.7, "learning_rate": 0.0005, "loss": 1.207, "step": 136500 }, { "epoch": 10.71, "learning_rate": 0.0005, "loss": 1.2388, "step": 136600 }, { "epoch": 10.72, "learning_rate": 0.0005, "loss": 1.2165, "step": 136700 }, { "epoch": 10.73, "learning_rate": 0.0005, "loss": 1.2283, "step": 136800 }, { "epoch": 10.74, "learning_rate": 0.0005, "loss": 1.2366, "step": 136900 }, { "epoch": 10.74, "learning_rate": 0.0005, "loss": 1.2192, "step": 137000 }, { "epoch": 10.75, "learning_rate": 0.0005, "loss": 1.2077, "step": 137100 }, { "epoch": 10.76, "learning_rate": 0.0005, "loss": 1.225, "step": 137200 }, { "epoch": 10.77, "learning_rate": 0.0005, "loss": 1.2208, "step": 137300 }, { "epoch": 10.77, "learning_rate": 0.0005, "loss": 1.2194, "step": 137400 }, { "epoch": 10.78, "learning_rate": 0.0005, "loss": 1.2345, "step": 137500 }, { "epoch": 10.79, "learning_rate": 0.0005, "loss": 1.2508, "step": 137600 }, { "epoch": 10.8, "learning_rate": 0.0005, "loss": 1.2184, "step": 137700 }, { "epoch": 10.81, "learning_rate": 0.0005, "loss": 1.247, "step": 137800 }, { "epoch": 10.81, "learning_rate": 0.0005, "loss": 1.2342, "step": 137900 }, { "epoch": 10.82, "learning_rate": 0.0005, "loss": 1.2214, "step": 138000 }, { "epoch": 10.83, "learning_rate": 0.0005, "loss": 1.2064, "step": 138100 }, { "epoch": 10.84, "learning_rate": 0.0005, "loss": 1.2336, "step": 138200 }, { "epoch": 10.85, "learning_rate": 0.0005, "loss": 1.2518, "step": 138300 }, { "epoch": 10.85, "learning_rate": 0.0005, "loss": 1.2242, "step": 138400 }, { "epoch": 10.86, "learning_rate": 0.0005, "loss": 1.2257, "step": 138500 }, { "epoch": 10.87, "learning_rate": 0.0005, "loss": 1.2038, "step": 138600 }, { "epoch": 10.88, "learning_rate": 0.0005, "loss": 1.2265, "step": 138700 }, { "epoch": 10.88, "learning_rate": 0.0005, "loss": 1.2268, "step": 138800 }, { "epoch": 10.89, "learning_rate": 0.0005, "loss": 1.2488, "step": 138900 }, { "epoch": 10.9, "learning_rate": 0.0005, "loss": 1.1833, "step": 139000 }, { "epoch": 10.91, "learning_rate": 0.0005, "loss": 1.2291, "step": 139100 }, { "epoch": 10.92, "learning_rate": 0.0005, "loss": 1.2266, "step": 139200 }, { "epoch": 10.92, "learning_rate": 0.0005, "loss": 1.2037, "step": 139300 }, { "epoch": 10.93, "learning_rate": 0.0005, "loss": 1.2575, "step": 139400 }, { "epoch": 10.94, "learning_rate": 0.0005, "loss": 1.2288, "step": 139500 }, { "epoch": 10.95, "learning_rate": 0.0005, "loss": 1.2285, "step": 139600 }, { "epoch": 10.96, "learning_rate": 0.0005, "loss": 1.2516, "step": 139700 }, { "epoch": 10.96, "learning_rate": 0.0005, "loss": 1.2349, "step": 139800 }, { "epoch": 10.97, "learning_rate": 0.0005, "loss": 1.2487, "step": 139900 }, { "epoch": 10.98, "learning_rate": 0.0005, "loss": 1.2228, "step": 140000 }, { "epoch": 10.98, "eval_gen_len": 18.681733910126248, "eval_loss": 2.3462131023406982, "eval_rouge1": 35.2577, "eval_rouge2": 14.1723, "eval_rougeL": 28.9678, "eval_rougeLsum": 28.9582, "eval_runtime": 342.5375, "eval_samples_per_second": 33.068, "eval_steps_per_second": 2.067, "step": 140000 }, { "epoch": 10.99, "learning_rate": 0.0005, "loss": 1.2395, "step": 140100 }, { "epoch": 10.99, "learning_rate": 0.0005, "loss": 1.2426, "step": 140200 }, { "epoch": 11.0, "learning_rate": 0.0005, "loss": 1.1879, "step": 140300 }, { "epoch": 11.01, "learning_rate": 0.0005, "loss": 1.15, "step": 140400 }, { "epoch": 11.02, "learning_rate": 0.0005, "loss": 1.1689, "step": 140500 }, { "epoch": 11.03, "learning_rate": 0.0005, "loss": 1.1181, "step": 140600 }, { "epoch": 11.03, "learning_rate": 0.0005, "loss": 1.1402, "step": 140700 }, { "epoch": 11.04, "learning_rate": 0.0005, "loss": 1.1483, "step": 140800 }, { "epoch": 11.05, "learning_rate": 0.0005, "loss": 1.1526, "step": 140900 }, { "epoch": 11.06, "learning_rate": 0.0005, "loss": 1.1349, "step": 141000 }, { "epoch": 11.06, "learning_rate": 0.0005, "loss": 1.1376, "step": 141100 }, { "epoch": 11.07, "learning_rate": 0.0005, "loss": 1.1485, "step": 141200 }, { "epoch": 11.08, "learning_rate": 0.0005, "loss": 1.1632, "step": 141300 }, { "epoch": 11.09, "learning_rate": 0.0005, "loss": 1.1419, "step": 141400 }, { "epoch": 11.1, "learning_rate": 0.0005, "loss": 1.1597, "step": 141500 }, { "epoch": 11.1, "learning_rate": 0.0005, "loss": 1.1683, "step": 141600 }, { "epoch": 11.11, "learning_rate": 0.0005, "loss": 1.1875, "step": 141700 }, { "epoch": 11.12, "learning_rate": 0.0005, "loss": 1.1291, "step": 141800 }, { "epoch": 11.13, "learning_rate": 0.0005, "loss": 1.1621, "step": 141900 }, { "epoch": 11.14, "learning_rate": 0.0005, "loss": 1.1523, "step": 142000 }, { "epoch": 11.14, "learning_rate": 0.0005, "loss": 1.1555, "step": 142100 }, { "epoch": 11.15, "learning_rate": 0.0005, "loss": 1.1392, "step": 142200 }, { "epoch": 11.16, "learning_rate": 0.0005, "loss": 1.1446, "step": 142300 }, { "epoch": 11.17, "learning_rate": 0.0005, "loss": 1.1578, "step": 142400 }, { "epoch": 11.17, "learning_rate": 0.0005, "loss": 1.1403, "step": 142500 }, { "epoch": 11.18, "learning_rate": 0.0005, "loss": 1.15, "step": 142600 }, { "epoch": 11.19, "learning_rate": 0.0005, "loss": 1.1473, "step": 142700 }, { "epoch": 11.2, "learning_rate": 0.0005, "loss": 1.1652, "step": 142800 }, { "epoch": 11.21, "learning_rate": 0.0005, "loss": 1.1748, "step": 142900 }, { "epoch": 11.21, "learning_rate": 0.0005, "loss": 1.1487, "step": 143000 }, { "epoch": 11.22, "learning_rate": 0.0005, "loss": 1.177, "step": 143100 }, { "epoch": 11.23, "learning_rate": 0.0005, "loss": 1.1724, "step": 143200 }, { "epoch": 11.24, "learning_rate": 0.0005, "loss": 1.1556, "step": 143300 }, { "epoch": 11.25, "learning_rate": 0.0005, "loss": 1.1606, "step": 143400 }, { "epoch": 11.25, "learning_rate": 0.0005, "loss": 1.1826, "step": 143500 }, { "epoch": 11.26, "learning_rate": 0.0005, "loss": 1.1488, "step": 143600 }, { "epoch": 11.27, "learning_rate": 0.0005, "loss": 1.1783, "step": 143700 }, { "epoch": 11.28, "learning_rate": 0.0005, "loss": 1.1688, "step": 143800 }, { "epoch": 11.28, "learning_rate": 0.0005, "loss": 1.1695, "step": 143900 }, { "epoch": 11.29, "learning_rate": 0.0005, "loss": 1.1735, "step": 144000 }, { "epoch": 11.3, "learning_rate": 0.0005, "loss": 1.1811, "step": 144100 }, { "epoch": 11.31, "learning_rate": 0.0005, "loss": 1.1831, "step": 144200 }, { "epoch": 11.32, "learning_rate": 0.0005, "loss": 1.1766, "step": 144300 }, { "epoch": 11.32, "learning_rate": 0.0005, "loss": 1.1709, "step": 144400 }, { "epoch": 11.33, "learning_rate": 0.0005, "loss": 1.1633, "step": 144500 }, { "epoch": 11.34, "learning_rate": 0.0005, "loss": 1.1802, "step": 144600 }, { "epoch": 11.35, "learning_rate": 0.0005, "loss": 1.1757, "step": 144700 }, { "epoch": 11.36, "learning_rate": 0.0005, "loss": 1.1612, "step": 144800 }, { "epoch": 11.36, "learning_rate": 0.0005, "loss": 1.1808, "step": 144900 }, { "epoch": 11.37, "learning_rate": 0.0005, "loss": 1.1823, "step": 145000 }, { "epoch": 11.38, "learning_rate": 0.0005, "loss": 1.1876, "step": 145100 }, { "epoch": 11.39, "learning_rate": 0.0005, "loss": 1.1948, "step": 145200 }, { "epoch": 11.39, "learning_rate": 0.0005, "loss": 1.183, "step": 145300 }, { "epoch": 11.4, "learning_rate": 0.0005, "loss": 1.19, "step": 145400 }, { "epoch": 11.41, "learning_rate": 0.0005, "loss": 1.1493, "step": 145500 }, { "epoch": 11.42, "learning_rate": 0.0005, "loss": 1.1708, "step": 145600 }, { "epoch": 11.43, "learning_rate": 0.0005, "loss": 1.1942, "step": 145700 }, { "epoch": 11.43, "learning_rate": 0.0005, "loss": 1.1929, "step": 145800 }, { "epoch": 11.44, "learning_rate": 0.0005, "loss": 1.2045, "step": 145900 }, { "epoch": 11.45, "learning_rate": 0.0005, "loss": 1.1818, "step": 146000 }, { "epoch": 11.46, "learning_rate": 0.0005, "loss": 1.2014, "step": 146100 }, { "epoch": 11.46, "learning_rate": 0.0005, "loss": 1.1809, "step": 146200 }, { "epoch": 11.47, "learning_rate": 0.0005, "loss": 1.1725, "step": 146300 }, { "epoch": 11.48, "learning_rate": 0.0005, "loss": 1.1694, "step": 146400 }, { "epoch": 11.49, "learning_rate": 0.0005, "loss": 1.1675, "step": 146500 }, { "epoch": 11.5, "learning_rate": 0.0005, "loss": 1.2055, "step": 146600 }, { "epoch": 11.5, "learning_rate": 0.0005, "loss": 1.2039, "step": 146700 }, { "epoch": 11.51, "learning_rate": 0.0005, "loss": 1.1823, "step": 146800 }, { "epoch": 11.52, "learning_rate": 0.0005, "loss": 1.1643, "step": 146900 }, { "epoch": 11.53, "learning_rate": 0.0005, "loss": 1.1949, "step": 147000 }, { "epoch": 11.54, "learning_rate": 0.0005, "loss": 1.1884, "step": 147100 }, { "epoch": 11.54, "learning_rate": 0.0005, "loss": 1.1693, "step": 147200 }, { "epoch": 11.55, "learning_rate": 0.0005, "loss": 1.1765, "step": 147300 }, { "epoch": 11.56, "learning_rate": 0.0005, "loss": 1.1672, "step": 147400 }, { "epoch": 11.57, "learning_rate": 0.0005, "loss": 1.192, "step": 147500 }, { "epoch": 11.57, "learning_rate": 0.0005, "loss": 1.1945, "step": 147600 }, { "epoch": 11.58, "learning_rate": 0.0005, "loss": 1.1885, "step": 147700 }, { "epoch": 11.59, "learning_rate": 0.0005, "loss": 1.1994, "step": 147800 }, { "epoch": 11.6, "learning_rate": 0.0005, "loss": 1.1773, "step": 147900 }, { "epoch": 11.61, "learning_rate": 0.0005, "loss": 1.209, "step": 148000 }, { "epoch": 11.61, "learning_rate": 0.0005, "loss": 1.1904, "step": 148100 }, { "epoch": 11.62, "learning_rate": 0.0005, "loss": 1.204, "step": 148200 }, { "epoch": 11.63, "learning_rate": 0.0005, "loss": 1.2284, "step": 148300 }, { "epoch": 11.64, "learning_rate": 0.0005, "loss": 1.1981, "step": 148400 }, { "epoch": 11.65, "learning_rate": 0.0005, "loss": 1.1934, "step": 148500 }, { "epoch": 11.65, "learning_rate": 0.0005, "loss": 1.1706, "step": 148600 }, { "epoch": 11.66, "learning_rate": 0.0005, "loss": 1.1944, "step": 148700 }, { "epoch": 11.67, "learning_rate": 0.0005, "loss": 1.1932, "step": 148800 }, { "epoch": 11.68, "learning_rate": 0.0005, "loss": 1.1723, "step": 148900 }, { "epoch": 11.68, "learning_rate": 0.0005, "loss": 1.1924, "step": 149000 }, { "epoch": 11.69, "learning_rate": 0.0005, "loss": 1.198, "step": 149100 }, { "epoch": 11.7, "learning_rate": 0.0005, "loss": 1.1977, "step": 149200 }, { "epoch": 11.71, "learning_rate": 0.0005, "loss": 1.2074, "step": 149300 }, { "epoch": 11.72, "learning_rate": 0.0005, "loss": 1.1834, "step": 149400 }, { "epoch": 11.72, "learning_rate": 0.0005, "loss": 1.1882, "step": 149500 }, { "epoch": 11.73, "learning_rate": 0.0005, "loss": 1.1864, "step": 149600 }, { "epoch": 11.74, "learning_rate": 0.0005, "loss": 1.1997, "step": 149700 }, { "epoch": 11.75, "learning_rate": 0.0005, "loss": 1.198, "step": 149800 }, { "epoch": 11.76, "learning_rate": 0.0005, "loss": 1.1838, "step": 149900 }, { "epoch": 11.76, "learning_rate": 0.0005, "loss": 1.2029, "step": 150000 }, { "epoch": 11.76, "eval_gen_len": 18.718636885318265, "eval_loss": 2.359110116958618, "eval_rouge1": 34.8446, "eval_rouge2": 14.049, "eval_rougeL": 28.7144, "eval_rougeLsum": 28.7106, "eval_runtime": 344.0253, "eval_samples_per_second": 32.925, "eval_steps_per_second": 2.058, "step": 150000 }, { "epoch": 11.77, "learning_rate": 0.0005, "loss": 1.2122, "step": 150100 }, { "epoch": 11.78, "learning_rate": 0.0005, "loss": 1.1915, "step": 150200 }, { "epoch": 11.79, "learning_rate": 0.0005, "loss": 1.204, "step": 150300 }, { "epoch": 11.79, "learning_rate": 0.0005, "loss": 1.2037, "step": 150400 }, { "epoch": 11.8, "learning_rate": 0.0005, "loss": 1.1972, "step": 150500 }, { "epoch": 11.81, "learning_rate": 0.0005, "loss": 1.2176, "step": 150600 }, { "epoch": 11.82, "learning_rate": 0.0005, "loss": 1.2285, "step": 150700 }, { "epoch": 11.83, "learning_rate": 0.0005, "loss": 1.2129, "step": 150800 }, { "epoch": 11.83, "learning_rate": 0.0005, "loss": 1.2006, "step": 150900 }, { "epoch": 11.84, "learning_rate": 0.0005, "loss": 1.2294, "step": 151000 }, { "epoch": 11.85, "learning_rate": 0.0005, "loss": 1.2171, "step": 151100 }, { "epoch": 11.86, "learning_rate": 0.0005, "loss": 1.2049, "step": 151200 }, { "epoch": 11.86, "learning_rate": 0.0005, "loss": 1.1918, "step": 151300 }, { "epoch": 11.87, "learning_rate": 0.0005, "loss": 1.1853, "step": 151400 }, { "epoch": 11.88, "learning_rate": 0.0005, "loss": 1.1898, "step": 151500 }, { "epoch": 11.89, "learning_rate": 0.0005, "loss": 1.2226, "step": 151600 }, { "epoch": 11.9, "learning_rate": 0.0005, "loss": 1.2084, "step": 151700 }, { "epoch": 11.9, "learning_rate": 0.0005, "loss": 1.1979, "step": 151800 }, { "epoch": 11.91, "learning_rate": 0.0005, "loss": 1.2002, "step": 151900 }, { "epoch": 11.92, "learning_rate": 0.0005, "loss": 1.2009, "step": 152000 }, { "epoch": 11.93, "learning_rate": 0.0005, "loss": 1.2152, "step": 152100 }, { "epoch": 11.94, "learning_rate": 0.0005, "loss": 1.2111, "step": 152200 }, { "epoch": 11.94, "learning_rate": 0.0005, "loss": 1.2051, "step": 152300 }, { "epoch": 11.95, "learning_rate": 0.0005, "loss": 1.2065, "step": 152400 }, { "epoch": 11.96, "learning_rate": 0.0005, "loss": 1.2008, "step": 152500 }, { "epoch": 11.97, "learning_rate": 0.0005, "loss": 1.2221, "step": 152600 }, { "epoch": 11.97, "learning_rate": 0.0005, "loss": 1.2175, "step": 152700 }, { "epoch": 11.98, "learning_rate": 0.0005, "loss": 1.2027, "step": 152800 }, { "epoch": 11.99, "learning_rate": 0.0005, "loss": 1.1946, "step": 152900 }, { "epoch": 12.0, "learning_rate": 0.0005, "loss": 1.2154, "step": 153000 }, { "epoch": 12.01, "learning_rate": 0.0005, "loss": 1.1322, "step": 153100 }, { "epoch": 12.01, "learning_rate": 0.0005, "loss": 1.1326, "step": 153200 }, { "epoch": 12.02, "learning_rate": 0.0005, "loss": 1.129, "step": 153300 }, { "epoch": 12.03, "learning_rate": 0.0005, "loss": 1.1352, "step": 153400 }, { "epoch": 12.04, "learning_rate": 0.0005, "loss": 1.1344, "step": 153500 }, { "epoch": 12.05, "learning_rate": 0.0005, "loss": 1.0996, "step": 153600 }, { "epoch": 12.05, "learning_rate": 0.0005, "loss": 1.1338, "step": 153700 }, { "epoch": 12.06, "learning_rate": 0.0005, "loss": 1.122, "step": 153800 }, { "epoch": 12.07, "learning_rate": 0.0005, "loss": 1.1314, "step": 153900 }, { "epoch": 12.08, "learning_rate": 0.0005, "loss": 1.1346, "step": 154000 }, { "epoch": 12.08, "learning_rate": 0.0005, "loss": 1.1408, "step": 154100 }, { "epoch": 12.09, "learning_rate": 0.0005, "loss": 1.127, "step": 154200 }, { "epoch": 12.1, "learning_rate": 0.0005, "loss": 1.1311, "step": 154300 }, { "epoch": 12.11, "learning_rate": 0.0005, "loss": 1.1393, "step": 154400 }, { "epoch": 12.12, "learning_rate": 0.0005, "loss": 1.1483, "step": 154500 }, { "epoch": 12.12, "learning_rate": 0.0005, "loss": 1.1587, "step": 154600 }, { "epoch": 12.13, "learning_rate": 0.0005, "loss": 1.1518, "step": 154700 }, { "epoch": 12.14, "learning_rate": 0.0005, "loss": 1.1536, "step": 154800 }, { "epoch": 12.15, "learning_rate": 0.0005, "loss": 1.1415, "step": 154900 }, { "epoch": 12.15, "learning_rate": 0.0005, "loss": 1.1333, "step": 155000 }, { "epoch": 12.16, "learning_rate": 0.0005, "loss": 1.1504, "step": 155100 }, { "epoch": 12.17, "learning_rate": 0.0005, "loss": 1.1564, "step": 155200 }, { "epoch": 12.18, "learning_rate": 0.0005, "loss": 1.1661, "step": 155300 }, { "epoch": 12.19, "learning_rate": 0.0005, "loss": 1.1426, "step": 155400 }, { "epoch": 12.19, "learning_rate": 0.0005, "loss": 1.1569, "step": 155500 }, { "epoch": 12.2, "learning_rate": 0.0005, "loss": 1.1492, "step": 155600 }, { "epoch": 12.21, "learning_rate": 0.0005, "loss": 1.1497, "step": 155700 }, { "epoch": 12.22, "learning_rate": 0.0005, "loss": 1.1648, "step": 155800 }, { "epoch": 12.23, "learning_rate": 0.0005, "loss": 1.1646, "step": 155900 }, { "epoch": 12.23, "learning_rate": 0.0005, "loss": 1.152, "step": 156000 }, { "epoch": 12.24, "learning_rate": 0.0005, "loss": 1.1823, "step": 156100 }, { "epoch": 12.25, "learning_rate": 0.0005, "loss": 1.1476, "step": 156200 }, { "epoch": 12.26, "learning_rate": 0.0005, "loss": 1.164, "step": 156300 }, { "epoch": 12.26, "learning_rate": 0.0005, "loss": 1.1714, "step": 156400 }, { "epoch": 12.27, "learning_rate": 0.0005, "loss": 1.1536, "step": 156500 }, { "epoch": 12.28, "learning_rate": 0.0005, "loss": 1.1508, "step": 156600 }, { "epoch": 12.29, "learning_rate": 0.0005, "loss": 1.1664, "step": 156700 }, { "epoch": 12.3, "learning_rate": 0.0005, "loss": 1.1602, "step": 156800 }, { "epoch": 12.3, "learning_rate": 0.0005, "loss": 1.1709, "step": 156900 }, { "epoch": 12.31, "learning_rate": 0.0005, "loss": 1.1634, "step": 157000 }, { "epoch": 12.32, "learning_rate": 0.0005, "loss": 1.1512, "step": 157100 }, { "epoch": 12.33, "learning_rate": 0.0005, "loss": 1.1742, "step": 157200 }, { "epoch": 12.34, "learning_rate": 0.0005, "loss": 1.175, "step": 157300 }, { "epoch": 12.34, "learning_rate": 0.0005, "loss": 1.1651, "step": 157400 }, { "epoch": 12.35, "learning_rate": 0.0005, "loss": 1.1466, "step": 157500 }, { "epoch": 12.36, "learning_rate": 0.0005, "loss": 1.1837, "step": 157600 }, { "epoch": 12.37, "learning_rate": 0.0005, "loss": 1.1443, "step": 157700 }, { "epoch": 12.37, "learning_rate": 0.0005, "loss": 1.1536, "step": 157800 }, { "epoch": 12.38, "learning_rate": 0.0005, "loss": 1.1573, "step": 157900 }, { "epoch": 12.39, "learning_rate": 0.0005, "loss": 1.1771, "step": 158000 }, { "epoch": 12.4, "learning_rate": 0.0005, "loss": 1.1854, "step": 158100 }, { "epoch": 12.41, "learning_rate": 0.0005, "loss": 1.1495, "step": 158200 }, { "epoch": 12.41, "learning_rate": 0.0005, "loss": 1.1569, "step": 158300 }, { "epoch": 12.42, "learning_rate": 0.0005, "loss": 1.1653, "step": 158400 }, { "epoch": 12.43, "learning_rate": 0.0005, "loss": 1.1752, "step": 158500 }, { "epoch": 12.44, "learning_rate": 0.0005, "loss": 1.1757, "step": 158600 }, { "epoch": 12.45, "learning_rate": 0.0005, "loss": 1.1979, "step": 158700 }, { "epoch": 12.45, "learning_rate": 0.0005, "loss": 1.1784, "step": 158800 }, { "epoch": 12.46, "learning_rate": 0.0005, "loss": 1.1607, "step": 158900 }, { "epoch": 12.47, "learning_rate": 0.0005, "loss": 1.1937, "step": 159000 }, { "epoch": 12.48, "learning_rate": 0.0005, "loss": 1.1799, "step": 159100 }, { "epoch": 12.48, "learning_rate": 0.0005, "loss": 1.1793, "step": 159200 }, { "epoch": 12.49, "learning_rate": 0.0005, "loss": 1.1743, "step": 159300 }, { "epoch": 12.5, "learning_rate": 0.0005, "loss": 1.1639, "step": 159400 }, { "epoch": 12.51, "learning_rate": 0.0005, "loss": 1.1779, "step": 159500 }, { "epoch": 12.52, "learning_rate": 0.0005, "loss": 1.1658, "step": 159600 }, { "epoch": 12.52, "learning_rate": 0.0005, "loss": 1.1748, "step": 159700 }, { "epoch": 12.53, "learning_rate": 0.0005, "loss": 1.1649, "step": 159800 }, { "epoch": 12.54, "learning_rate": 0.0005, "loss": 1.1988, "step": 159900 }, { "epoch": 12.55, "learning_rate": 0.0005, "loss": 1.1803, "step": 160000 }, { "epoch": 12.55, "eval_gen_len": 18.743091727730203, "eval_loss": 2.3673198223114014, "eval_rouge1": 35.1989, "eval_rouge2": 14.1819, "eval_rougeL": 29.066, "eval_rougeLsum": 29.054, "eval_runtime": 343.7666, "eval_samples_per_second": 32.95, "eval_steps_per_second": 2.06, "step": 160000 }, { "epoch": 12.55, "learning_rate": 0.0005, "loss": 1.1638, "step": 160100 }, { "epoch": 12.56, "learning_rate": 0.0005, "loss": 1.149, "step": 160200 }, { "epoch": 12.57, "learning_rate": 0.0005, "loss": 1.1321, "step": 160300 }, { "epoch": 12.58, "learning_rate": 0.0005, "loss": 1.1495, "step": 160400 }, { "epoch": 12.59, "learning_rate": 0.0005, "loss": 1.1303, "step": 160500 }, { "epoch": 12.59, "learning_rate": 0.0005, "loss": 1.1605, "step": 160600 }, { "epoch": 12.6, "learning_rate": 0.0005, "loss": 1.1455, "step": 160700 }, { "epoch": 12.61, "learning_rate": 0.0005, "loss": 1.1459, "step": 160800 }, { "epoch": 12.62, "learning_rate": 0.0005, "loss": 1.1393, "step": 160900 }, { "epoch": 12.63, "learning_rate": 0.0005, "loss": 1.1386, "step": 161000 }, { "epoch": 12.63, "learning_rate": 0.0005, "loss": 1.1447, "step": 161100 }, { "epoch": 12.64, "learning_rate": 0.0005, "loss": 1.145, "step": 161200 }, { "epoch": 12.65, "learning_rate": 0.0005, "loss": 1.152, "step": 161300 }, { "epoch": 12.66, "learning_rate": 0.0005, "loss": 1.1342, "step": 161400 }, { "epoch": 12.66, "learning_rate": 0.0005, "loss": 1.1623, "step": 161500 }, { "epoch": 12.67, "learning_rate": 0.0005, "loss": 1.1526, "step": 161600 }, { "epoch": 12.68, "learning_rate": 0.0005, "loss": 1.1477, "step": 161700 }, { "epoch": 12.69, "learning_rate": 0.0005, "loss": 1.1437, "step": 161800 }, { "epoch": 12.7, "learning_rate": 0.0005, "loss": 1.1639, "step": 161900 }, { "epoch": 12.7, "learning_rate": 0.0005, "loss": 1.1569, "step": 162000 }, { "epoch": 12.71, "learning_rate": 0.0005, "loss": 1.1576, "step": 162100 }, { "epoch": 12.72, "learning_rate": 0.0005, "loss": 1.1515, "step": 162200 }, { "epoch": 12.73, "learning_rate": 0.0005, "loss": 1.1578, "step": 162300 }, { "epoch": 12.74, "learning_rate": 0.0005, "loss": 1.1403, "step": 162400 }, { "epoch": 12.74, "learning_rate": 0.0005, "loss": 1.1511, "step": 162500 }, { "epoch": 12.75, "learning_rate": 0.0005, "loss": 1.1581, "step": 162600 }, { "epoch": 12.76, "learning_rate": 0.0005, "loss": 1.1572, "step": 162700 }, { "epoch": 12.77, "learning_rate": 0.0005, "loss": 1.1475, "step": 162800 }, { "epoch": 12.77, "learning_rate": 0.0005, "loss": 1.159, "step": 162900 }, { "epoch": 12.78, "learning_rate": 0.0005, "loss": 1.1562, "step": 163000 }, { "epoch": 12.79, "learning_rate": 0.0005, "loss": 1.1721, "step": 163100 }, { "epoch": 12.8, "learning_rate": 0.0005, "loss": 1.164, "step": 163200 }, { "epoch": 12.81, "learning_rate": 0.0005, "loss": 1.1522, "step": 163300 }, { "epoch": 12.81, "learning_rate": 0.0005, "loss": 1.1563, "step": 163400 }, { "epoch": 12.82, "learning_rate": 0.0005, "loss": 1.1595, "step": 163500 }, { "epoch": 12.83, "learning_rate": 0.0005, "loss": 1.1642, "step": 163600 }, { "epoch": 12.84, "learning_rate": 0.0005, "loss": 1.1569, "step": 163700 }, { "epoch": 12.85, "learning_rate": 0.0005, "loss": 1.1536, "step": 163800 }, { "epoch": 12.85, "learning_rate": 0.0005, "loss": 1.1575, "step": 163900 }, { "epoch": 12.86, "learning_rate": 0.0005, "loss": 1.1597, "step": 164000 }, { "epoch": 12.87, "learning_rate": 0.0005, "loss": 1.1452, "step": 164100 }, { "epoch": 12.88, "learning_rate": 0.0005, "loss": 1.1616, "step": 164200 }, { "epoch": 12.88, "learning_rate": 0.0005, "loss": 1.1667, "step": 164300 }, { "epoch": 12.89, "learning_rate": 0.0005, "loss": 1.1595, "step": 164400 }, { "epoch": 12.9, "learning_rate": 0.0005, "loss": 1.1741, "step": 164500 }, { "epoch": 12.91, "learning_rate": 0.0005, "loss": 1.1855, "step": 164600 }, { "epoch": 12.92, "learning_rate": 0.0005, "loss": 1.1701, "step": 164700 }, { "epoch": 12.92, "learning_rate": 0.0005, "loss": 1.1808, "step": 164800 }, { "epoch": 12.93, "learning_rate": 0.0005, "loss": 1.159, "step": 164900 }, { "epoch": 12.94, "learning_rate": 0.0005, "loss": 1.1646, "step": 165000 }, { "epoch": 12.95, "learning_rate": 0.0005, "loss": 1.1641, "step": 165100 }, { "epoch": 12.95, "learning_rate": 0.0005, "loss": 1.1478, "step": 165200 }, { "epoch": 12.96, "learning_rate": 0.0005, "loss": 1.1586, "step": 165300 }, { "epoch": 12.97, "learning_rate": 0.0005, "loss": 1.1634, "step": 165400 }, { "epoch": 12.98, "learning_rate": 0.0005, "loss": 1.1682, "step": 165500 }, { "epoch": 12.99, "learning_rate": 0.0005, "loss": 1.1613, "step": 165600 }, { "epoch": 12.99, "learning_rate": 0.0005, "loss": 1.1577, "step": 165700 }, { "epoch": 13.0, "learning_rate": 0.0005, "loss": 1.1236, "step": 165800 }, { "epoch": 13.01, "learning_rate": 0.0005, "loss": 1.1019, "step": 165900 }, { "epoch": 13.02, "learning_rate": 0.0005, "loss": 1.1098, "step": 166000 }, { "epoch": 13.03, "learning_rate": 0.0005, "loss": 1.094, "step": 166100 }, { "epoch": 13.03, "learning_rate": 0.0005, "loss": 1.1069, "step": 166200 }, { "epoch": 13.04, "learning_rate": 0.0005, "loss": 1.0924, "step": 166300 }, { "epoch": 13.05, "learning_rate": 0.0005, "loss": 1.1061, "step": 166400 }, { "epoch": 13.06, "learning_rate": 0.0005, "loss": 1.1109, "step": 166500 }, { "epoch": 13.06, "learning_rate": 0.0005, "loss": 1.1294, "step": 166600 }, { "epoch": 13.07, "learning_rate": 0.0005, "loss": 1.1029, "step": 166700 }, { "epoch": 13.08, "learning_rate": 0.0005, "loss": 1.1201, "step": 166800 }, { "epoch": 13.09, "learning_rate": 0.0005, "loss": 1.0957, "step": 166900 }, { "epoch": 13.1, "learning_rate": 0.0005, "loss": 1.1034, "step": 167000 }, { "epoch": 13.1, "learning_rate": 0.0005, "loss": 1.1175, "step": 167100 }, { "epoch": 13.11, "learning_rate": 0.0005, "loss": 1.1181, "step": 167200 }, { "epoch": 13.12, "learning_rate": 0.0005, "loss": 1.1114, "step": 167300 }, { "epoch": 13.13, "learning_rate": 0.0005, "loss": 1.1111, "step": 167400 }, { "epoch": 13.14, "learning_rate": 0.0005, "loss": 1.0998, "step": 167500 }, { "epoch": 13.14, "learning_rate": 0.0005, "loss": 1.1189, "step": 167600 }, { "epoch": 13.15, "learning_rate": 0.0005, "loss": 1.092, "step": 167700 }, { "epoch": 13.16, "learning_rate": 0.0005, "loss": 1.1112, "step": 167800 }, { "epoch": 13.17, "learning_rate": 0.0005, "loss": 1.1004, "step": 167900 }, { "epoch": 13.17, "learning_rate": 0.0005, "loss": 1.1054, "step": 168000 }, { "epoch": 13.18, "learning_rate": 0.0005, "loss": 1.1193, "step": 168100 }, { "epoch": 13.19, "learning_rate": 0.0005, "loss": 1.1312, "step": 168200 }, { "epoch": 13.2, "learning_rate": 0.0005, "loss": 1.128, "step": 168300 }, { "epoch": 13.21, "learning_rate": 0.0005, "loss": 1.1258, "step": 168400 }, { "epoch": 13.21, "learning_rate": 0.0005, "loss": 1.1162, "step": 168500 }, { "epoch": 13.22, "learning_rate": 0.0005, "loss": 1.1088, "step": 168600 }, { "epoch": 13.23, "learning_rate": 0.0005, "loss": 1.1106, "step": 168700 }, { "epoch": 13.24, "learning_rate": 0.0005, "loss": 1.0878, "step": 168800 }, { "epoch": 13.24, "learning_rate": 0.0005, "loss": 1.1264, "step": 168900 }, { "epoch": 13.25, "learning_rate": 0.0005, "loss": 1.114, "step": 169000 }, { "epoch": 13.26, "learning_rate": 0.0005, "loss": 1.1263, "step": 169100 }, { "epoch": 13.27, "learning_rate": 0.0005, "loss": 1.1365, "step": 169200 }, { "epoch": 13.28, "learning_rate": 0.0005, "loss": 1.1339, "step": 169300 }, { "epoch": 13.28, "learning_rate": 0.0005, "loss": 1.1212, "step": 169400 }, { "epoch": 13.29, "learning_rate": 0.0005, "loss": 1.129, "step": 169500 }, { "epoch": 13.3, "learning_rate": 0.0005, "loss": 1.1214, "step": 169600 }, { "epoch": 13.31, "learning_rate": 0.0005, "loss": 1.1283, "step": 169700 }, { "epoch": 13.32, "learning_rate": 0.0005, "loss": 1.0951, "step": 169800 }, { "epoch": 13.32, "learning_rate": 0.0005, "loss": 1.1302, "step": 169900 }, { "epoch": 13.33, "learning_rate": 0.0005, "loss": 1.1123, "step": 170000 }, { "epoch": 13.33, "eval_gen_len": 18.691621788646597, "eval_loss": 2.402947187423706, "eval_rouge1": 35.1849, "eval_rouge2": 14.2186, "eval_rougeL": 29.0353, "eval_rougeLsum": 29.0203, "eval_runtime": 343.1956, "eval_samples_per_second": 33.005, "eval_steps_per_second": 2.063, "step": 170000 }, { "epoch": 13.34, "learning_rate": 0.0005, "loss": 1.1359, "step": 170100 }, { "epoch": 13.35, "learning_rate": 0.0005, "loss": 1.1214, "step": 170200 }, { "epoch": 13.35, "learning_rate": 0.0005, "loss": 1.1228, "step": 170300 }, { "epoch": 13.36, "learning_rate": 0.0005, "loss": 1.1058, "step": 170400 }, { "epoch": 13.37, "learning_rate": 0.0005, "loss": 1.1077, "step": 170500 }, { "epoch": 13.38, "learning_rate": 0.0005, "loss": 1.1253, "step": 170600 }, { "epoch": 13.39, "learning_rate": 0.0005, "loss": 1.1375, "step": 170700 }, { "epoch": 13.39, "learning_rate": 0.0005, "loss": 1.1263, "step": 170800 }, { "epoch": 13.4, "learning_rate": 0.0005, "loss": 1.1423, "step": 170900 }, { "epoch": 13.41, "learning_rate": 0.0005, "loss": 1.141, "step": 171000 }, { "epoch": 13.42, "learning_rate": 0.0005, "loss": 1.1288, "step": 171100 }, { "epoch": 13.43, "learning_rate": 0.0005, "loss": 1.1449, "step": 171200 }, { "epoch": 13.43, "learning_rate": 0.0005, "loss": 1.1116, "step": 171300 }, { "epoch": 13.44, "learning_rate": 0.0005, "loss": 1.1132, "step": 171400 }, { "epoch": 13.45, "learning_rate": 0.0005, "loss": 1.1256, "step": 171500 }, { "epoch": 13.46, "learning_rate": 0.0005, "loss": 1.1448, "step": 171600 }, { "epoch": 13.46, "learning_rate": 0.0005, "loss": 1.1284, "step": 171700 }, { "epoch": 13.47, "learning_rate": 0.0005, "loss": 1.1418, "step": 171800 }, { "epoch": 13.48, "learning_rate": 0.0005, "loss": 1.1365, "step": 171900 }, { "epoch": 13.49, "learning_rate": 0.0005, "loss": 1.1445, "step": 172000 }, { "epoch": 13.5, "learning_rate": 0.0005, "loss": 1.1358, "step": 172100 }, { "epoch": 13.5, "learning_rate": 0.0005, "loss": 1.1433, "step": 172200 }, { "epoch": 13.51, "learning_rate": 0.0005, "loss": 1.149, "step": 172300 }, { "epoch": 13.52, "learning_rate": 0.0005, "loss": 1.136, "step": 172400 }, { "epoch": 13.53, "learning_rate": 0.0005, "loss": 1.1626, "step": 172500 }, { "epoch": 13.54, "learning_rate": 0.0005, "loss": 1.1296, "step": 172600 }, { "epoch": 13.54, "learning_rate": 0.0005, "loss": 1.1324, "step": 172700 }, { "epoch": 13.55, "learning_rate": 0.0005, "loss": 1.1233, "step": 172800 }, { "epoch": 13.56, "learning_rate": 0.0005, "loss": 1.1472, "step": 172900 }, { "epoch": 13.57, "learning_rate": 0.0005, "loss": 1.1348, "step": 173000 }, { "epoch": 13.57, "learning_rate": 0.0005, "loss": 1.1338, "step": 173100 }, { "epoch": 13.58, "learning_rate": 0.0005, "loss": 1.1341, "step": 173200 }, { "epoch": 13.59, "learning_rate": 0.0005, "loss": 1.1679, "step": 173300 }, { "epoch": 13.6, "learning_rate": 0.0005, "loss": 1.1462, "step": 173400 }, { "epoch": 13.61, "learning_rate": 0.0005, "loss": 1.1452, "step": 173500 }, { "epoch": 13.61, "learning_rate": 0.0005, "loss": 1.122, "step": 173600 }, { "epoch": 13.62, "learning_rate": 0.0005, "loss": 1.1425, "step": 173700 }, { "epoch": 13.63, "learning_rate": 0.0005, "loss": 1.1436, "step": 173800 }, { "epoch": 13.64, "learning_rate": 0.0005, "loss": 1.1484, "step": 173900 }, { "epoch": 13.64, "learning_rate": 0.0005, "loss": 1.1434, "step": 174000 }, { "epoch": 13.65, "learning_rate": 0.0005, "loss": 1.137, "step": 174100 }, { "epoch": 13.66, "learning_rate": 0.0005, "loss": 1.1412, "step": 174200 }, { "epoch": 13.67, "learning_rate": 0.0005, "loss": 1.1312, "step": 174300 }, { "epoch": 13.68, "learning_rate": 0.0005, "loss": 1.1483, "step": 174400 }, { "epoch": 13.68, "learning_rate": 0.0005, "loss": 1.1517, "step": 174500 }, { "epoch": 13.69, "learning_rate": 0.0005, "loss": 1.1333, "step": 174600 }, { "epoch": 13.7, "learning_rate": 0.0005, "loss": 1.1375, "step": 174700 }, { "epoch": 13.71, "learning_rate": 0.0005, "loss": 1.1216, "step": 174800 }, { "epoch": 13.72, "learning_rate": 0.0005, "loss": 1.132, "step": 174900 }, { "epoch": 13.72, "learning_rate": 0.0005, "loss": 1.1635, "step": 175000 }, { "epoch": 13.73, "learning_rate": 0.0005, "loss": 1.1496, "step": 175100 }, { "epoch": 13.74, "learning_rate": 0.0005, "loss": 1.1656, "step": 175200 }, { "epoch": 13.75, "learning_rate": 0.0005, "loss": 1.1612, "step": 175300 }, { "epoch": 13.75, "learning_rate": 0.0005, "loss": 1.1553, "step": 175400 }, { "epoch": 13.76, "learning_rate": 0.0005, "loss": 1.1427, "step": 175500 }, { "epoch": 13.77, "learning_rate": 0.0005, "loss": 1.1426, "step": 175600 }, { "epoch": 13.78, "learning_rate": 0.0005, "loss": 1.123, "step": 175700 }, { "epoch": 13.79, "learning_rate": 0.0005, "loss": 1.1331, "step": 175800 }, { "epoch": 13.79, "learning_rate": 0.0005, "loss": 1.1252, "step": 175900 }, { "epoch": 13.8, "learning_rate": 0.0005, "loss": 1.1554, "step": 176000 }, { "epoch": 13.81, "learning_rate": 0.0005, "loss": 1.1659, "step": 176100 }, { "epoch": 13.82, "learning_rate": 0.0005, "loss": 1.1598, "step": 176200 }, { "epoch": 13.83, "learning_rate": 0.0005, "loss": 1.1493, "step": 176300 }, { "epoch": 13.83, "learning_rate": 0.0005, "loss": 1.1486, "step": 176400 }, { "epoch": 13.84, "learning_rate": 0.0005, "loss": 1.165, "step": 176500 }, { "epoch": 13.85, "learning_rate": 0.0005, "loss": 1.162, "step": 176600 }, { "epoch": 13.86, "learning_rate": 0.0005, "loss": 1.1494, "step": 176700 }, { "epoch": 13.86, "learning_rate": 0.0005, "loss": 1.1545, "step": 176800 }, { "epoch": 13.87, "learning_rate": 0.0005, "loss": 1.1265, "step": 176900 }, { "epoch": 13.88, "learning_rate": 0.0005, "loss": 1.1291, "step": 177000 }, { "epoch": 13.89, "learning_rate": 0.0005, "loss": 1.156, "step": 177100 }, { "epoch": 13.9, "learning_rate": 0.0005, "loss": 1.1521, "step": 177200 }, { "epoch": 13.9, "learning_rate": 0.0005, "loss": 1.1479, "step": 177300 }, { "epoch": 13.91, "learning_rate": 0.0005, "loss": 1.1744, "step": 177400 }, { "epoch": 13.92, "learning_rate": 0.0005, "loss": 1.1429, "step": 177500 }, { "epoch": 13.93, "learning_rate": 0.0005, "loss": 1.1531, "step": 177600 }, { "epoch": 13.94, "learning_rate": 0.0005, "loss": 1.1391, "step": 177700 }, { "epoch": 13.94, "learning_rate": 0.0005, "loss": 1.184, "step": 177800 }, { "epoch": 13.95, "learning_rate": 0.0005, "loss": 1.1511, "step": 177900 }, { "epoch": 13.96, "learning_rate": 0.0005, "loss": 1.169, "step": 178000 }, { "epoch": 13.97, "learning_rate": 0.0005, "loss": 1.1608, "step": 178100 }, { "epoch": 13.97, "learning_rate": 0.0005, "loss": 1.157, "step": 178200 }, { "epoch": 13.98, "learning_rate": 0.0005, "loss": 1.1458, "step": 178300 }, { "epoch": 13.99, "learning_rate": 0.0005, "loss": 1.1811, "step": 178400 }, { "epoch": 14.0, "learning_rate": 0.0005, "loss": 1.1609, "step": 178500 }, { "epoch": 14.01, "learning_rate": 0.0005, "loss": 1.0836, "step": 178600 }, { "epoch": 14.01, "learning_rate": 0.0005, "loss": 1.0551, "step": 178700 }, { "epoch": 14.02, "learning_rate": 0.0005, "loss": 1.0806, "step": 178800 }, { "epoch": 14.03, "learning_rate": 0.0005, "loss": 1.0879, "step": 178900 }, { "epoch": 14.04, "learning_rate": 0.0005, "loss": 1.0807, "step": 179000 }, { "epoch": 14.04, "learning_rate": 0.0005, "loss": 1.0772, "step": 179100 }, { "epoch": 14.05, "learning_rate": 0.0005, "loss": 1.081, "step": 179200 }, { "epoch": 14.06, "learning_rate": 0.0005, "loss": 1.0607, "step": 179300 }, { "epoch": 14.07, "learning_rate": 0.0005, "loss": 1.0847, "step": 179400 }, { "epoch": 14.08, "learning_rate": 0.0005, "loss": 1.0823, "step": 179500 }, { "epoch": 14.08, "learning_rate": 0.0005, "loss": 1.0844, "step": 179600 }, { "epoch": 14.09, "learning_rate": 0.0005, "loss": 1.0688, "step": 179700 }, { "epoch": 14.1, "learning_rate": 0.0005, "loss": 1.0674, "step": 179800 }, { "epoch": 14.11, "learning_rate": 0.0005, "loss": 1.0893, "step": 179900 }, { "epoch": 14.12, "learning_rate": 0.0005, "loss": 1.0958, "step": 180000 }, { "epoch": 14.12, "eval_gen_len": 18.657632206232893, "eval_loss": 2.432025671005249, "eval_rouge1": 34.8892, "eval_rouge2": 14.0297, "eval_rougeL": 28.7636, "eval_rougeLsum": 28.7631, "eval_runtime": 344.3087, "eval_samples_per_second": 32.898, "eval_steps_per_second": 2.056, "step": 180000 }, { "epoch": 14.12, "learning_rate": 0.0005, "loss": 1.0977, "step": 180100 }, { "epoch": 14.13, "learning_rate": 0.0005, "loss": 1.0785, "step": 180200 }, { "epoch": 14.14, "learning_rate": 0.0005, "loss": 1.0967, "step": 180300 }, { "epoch": 14.15, "learning_rate": 0.0005, "loss": 1.0603, "step": 180400 }, { "epoch": 14.15, "learning_rate": 0.0005, "loss": 1.0986, "step": 180500 }, { "epoch": 14.16, "learning_rate": 0.0005, "loss": 1.0738, "step": 180600 }, { "epoch": 14.17, "learning_rate": 0.0005, "loss": 1.1258, "step": 180700 }, { "epoch": 14.18, "learning_rate": 0.0005, "loss": 1.1022, "step": 180800 }, { "epoch": 14.19, "learning_rate": 0.0005, "loss": 1.0924, "step": 180900 }, { "epoch": 14.19, "learning_rate": 0.0005, "loss": 1.0958, "step": 181000 }, { "epoch": 14.2, "learning_rate": 0.0005, "loss": 1.0834, "step": 181100 }, { "epoch": 14.21, "learning_rate": 0.0005, "loss": 1.0942, "step": 181200 }, { "epoch": 14.22, "learning_rate": 0.0005, "loss": 1.118, "step": 181300 }, { "epoch": 14.23, "learning_rate": 0.0005, "loss": 1.0997, "step": 181400 }, { "epoch": 14.23, "learning_rate": 0.0005, "loss": 1.0987, "step": 181500 }, { "epoch": 14.24, "learning_rate": 0.0005, "loss": 1.1071, "step": 181600 }, { "epoch": 14.25, "learning_rate": 0.0005, "loss": 1.0964, "step": 181700 }, { "epoch": 14.26, "learning_rate": 0.0005, "loss": 1.1189, "step": 181800 }, { "epoch": 14.26, "learning_rate": 0.0005, "loss": 1.0887, "step": 181900 }, { "epoch": 14.27, "learning_rate": 0.0005, "loss": 1.1021, "step": 182000 }, { "epoch": 14.28, "learning_rate": 0.0005, "loss": 1.0815, "step": 182100 }, { "epoch": 14.29, "learning_rate": 0.0005, "loss": 1.1012, "step": 182200 }, { "epoch": 14.3, "learning_rate": 0.0005, "loss": 1.129, "step": 182300 }, { "epoch": 14.3, "learning_rate": 0.0005, "loss": 1.0999, "step": 182400 }, { "epoch": 14.31, "learning_rate": 0.0005, "loss": 1.0838, "step": 182500 }, { "epoch": 14.32, "learning_rate": 0.0005, "loss": 1.1064, "step": 182600 }, { "epoch": 14.33, "learning_rate": 0.0005, "loss": 1.1099, "step": 182700 }, { "epoch": 14.34, "learning_rate": 0.0005, "loss": 1.1062, "step": 182800 }, { "epoch": 14.34, "learning_rate": 0.0005, "loss": 1.0973, "step": 182900 }, { "epoch": 14.35, "learning_rate": 0.0005, "loss": 1.1286, "step": 183000 }, { "epoch": 14.36, "learning_rate": 0.0005, "loss": 1.1003, "step": 183100 }, { "epoch": 14.37, "learning_rate": 0.0005, "loss": 1.1231, "step": 183200 }, { "epoch": 14.37, "learning_rate": 0.0005, "loss": 1.099, "step": 183300 }, { "epoch": 14.38, "learning_rate": 0.0005, "loss": 1.0944, "step": 183400 }, { "epoch": 14.39, "learning_rate": 0.0005, "loss": 1.0811, "step": 183500 }, { "epoch": 14.4, "learning_rate": 0.0005, "loss": 1.0824, "step": 183600 }, { "epoch": 14.41, "learning_rate": 0.0005, "loss": 1.1227, "step": 183700 }, { "epoch": 14.41, "learning_rate": 0.0005, "loss": 1.1075, "step": 183800 }, { "epoch": 14.42, "learning_rate": 0.0005, "loss": 1.1308, "step": 183900 }, { "epoch": 14.43, "learning_rate": 0.0005, "loss": 1.0981, "step": 184000 }, { "epoch": 14.44, "learning_rate": 0.0005, "loss": 1.0908, "step": 184100 }, { "epoch": 14.44, "learning_rate": 0.0005, "loss": 1.1119, "step": 184200 }, { "epoch": 14.45, "learning_rate": 0.0005, "loss": 1.1105, "step": 184300 }, { "epoch": 14.46, "learning_rate": 0.0005, "loss": 1.0906, "step": 184400 }, { "epoch": 14.47, "learning_rate": 0.0005, "loss": 1.1159, "step": 184500 }, { "epoch": 14.48, "learning_rate": 0.0005, "loss": 1.1075, "step": 184600 }, { "epoch": 14.48, "learning_rate": 0.0005, "loss": 1.1018, "step": 184700 }, { "epoch": 14.49, "learning_rate": 0.0005, "loss": 1.1072, "step": 184800 }, { "epoch": 14.5, "learning_rate": 0.0005, "loss": 1.1099, "step": 184900 }, { "epoch": 14.51, "learning_rate": 0.0005, "loss": 1.1203, "step": 185000 }, { "epoch": 14.52, "learning_rate": 0.0005, "loss": 1.1082, "step": 185100 }, { "epoch": 14.52, "learning_rate": 0.0005, "loss": 1.1119, "step": 185200 }, { "epoch": 14.53, "learning_rate": 0.0005, "loss": 1.125, "step": 185300 }, { "epoch": 14.54, "learning_rate": 0.0005, "loss": 1.1, "step": 185400 }, { "epoch": 14.55, "learning_rate": 0.0005, "loss": 1.117, "step": 185500 }, { "epoch": 14.55, "learning_rate": 0.0005, "loss": 1.1313, "step": 185600 }, { "epoch": 14.56, "learning_rate": 0.0005, "loss": 1.1077, "step": 185700 }, { "epoch": 14.57, "learning_rate": 0.0005, "loss": 1.0911, "step": 185800 }, { "epoch": 14.58, "learning_rate": 0.0005, "loss": 1.1025, "step": 185900 }, { "epoch": 14.59, "learning_rate": 0.0005, "loss": 1.1294, "step": 186000 }, { "epoch": 14.59, "learning_rate": 0.0005, "loss": 1.1237, "step": 186100 }, { "epoch": 14.6, "learning_rate": 0.0005, "loss": 1.129, "step": 186200 }, { "epoch": 14.61, "learning_rate": 0.0005, "loss": 1.1181, "step": 186300 }, { "epoch": 14.62, "learning_rate": 0.0005, "loss": 1.1075, "step": 186400 }, { "epoch": 14.63, "learning_rate": 0.0005, "loss": 1.1288, "step": 186500 }, { "epoch": 14.63, "learning_rate": 0.0005, "loss": 1.1283, "step": 186600 }, { "epoch": 14.64, "learning_rate": 0.0005, "loss": 1.1295, "step": 186700 }, { "epoch": 14.65, "learning_rate": 0.0005, "loss": 1.0999, "step": 186800 }, { "epoch": 14.66, "learning_rate": 0.0005, "loss": 1.1222, "step": 186900 }, { "epoch": 14.66, "learning_rate": 0.0005, "loss": 1.1256, "step": 187000 }, { "epoch": 14.67, "learning_rate": 0.0005, "loss": 1.1186, "step": 187100 }, { "epoch": 14.68, "learning_rate": 0.0005, "loss": 1.1217, "step": 187200 }, { "epoch": 14.69, "learning_rate": 0.0005, "loss": 1.1336, "step": 187300 }, { "epoch": 14.7, "learning_rate": 0.0005, "loss": 1.1138, "step": 187400 }, { "epoch": 14.7, "learning_rate": 0.0005, "loss": 1.1287, "step": 187500 }, { "epoch": 14.71, "learning_rate": 0.0005, "loss": 1.1219, "step": 187600 }, { "epoch": 14.72, "learning_rate": 0.0005, "loss": 1.1354, "step": 187700 }, { "epoch": 14.73, "learning_rate": 0.0005, "loss": 1.1256, "step": 187800 }, { "epoch": 14.73, "learning_rate": 0.0005, "loss": 1.113, "step": 187900 }, { "epoch": 14.74, "learning_rate": 0.0005, "loss": 1.1323, "step": 188000 }, { "epoch": 14.75, "learning_rate": 0.0005, "loss": 1.1357, "step": 188100 }, { "epoch": 14.76, "learning_rate": 0.0005, "loss": 1.138, "step": 188200 }, { "epoch": 14.77, "learning_rate": 0.0005, "loss": 1.1336, "step": 188300 }, { "epoch": 14.77, "learning_rate": 0.0005, "loss": 1.1109, "step": 188400 }, { "epoch": 14.78, "learning_rate": 0.0005, "loss": 1.1071, "step": 188500 }, { "epoch": 14.79, "learning_rate": 0.0005, "loss": 1.1515, "step": 188600 }, { "epoch": 14.8, "learning_rate": 0.0005, "loss": 1.1109, "step": 188700 }, { "epoch": 14.81, "learning_rate": 0.0005, "loss": 1.1463, "step": 188800 }, { "epoch": 14.81, "learning_rate": 0.0005, "loss": 1.1401, "step": 188900 }, { "epoch": 14.82, "learning_rate": 0.0005, "loss": 1.1327, "step": 189000 }, { "epoch": 14.83, "learning_rate": 0.0005, "loss": 1.1423, "step": 189100 }, { "epoch": 14.84, "learning_rate": 0.0005, "loss": 1.125, "step": 189200 }, { "epoch": 14.84, "learning_rate": 0.0005, "loss": 1.1287, "step": 189300 }, { "epoch": 14.85, "learning_rate": 0.0005, "loss": 1.1205, "step": 189400 }, { "epoch": 14.86, "learning_rate": 0.0005, "loss": 1.1175, "step": 189500 }, { "epoch": 14.87, "learning_rate": 0.0005, "loss": 1.1428, "step": 189600 }, { "epoch": 14.88, "learning_rate": 0.0005, "loss": 1.1161, "step": 189700 }, { "epoch": 14.88, "learning_rate": 0.0005, "loss": 1.1274, "step": 189800 }, { "epoch": 14.89, "learning_rate": 0.0005, "loss": 1.1579, "step": 189900 }, { "epoch": 14.9, "learning_rate": 0.0005, "loss": 1.1264, "step": 190000 }, { "epoch": 14.9, "eval_gen_len": 18.73020217180189, "eval_loss": 2.38362717628479, "eval_rouge1": 35.2446, "eval_rouge2": 14.196, "eval_rougeL": 28.9968, "eval_rougeLsum": 28.9914, "eval_runtime": 342.3996, "eval_samples_per_second": 33.081, "eval_steps_per_second": 2.068, "step": 190000 }, { "epoch": 14.91, "learning_rate": 0.0005, "loss": 1.1146, "step": 190100 }, { "epoch": 14.92, "learning_rate": 0.0005, "loss": 1.1429, "step": 190200 }, { "epoch": 14.92, "learning_rate": 0.0005, "loss": 1.1314, "step": 190300 }, { "epoch": 14.93, "learning_rate": 0.0005, "loss": 1.1577, "step": 190400 }, { "epoch": 14.94, "learning_rate": 0.0005, "loss": 1.1414, "step": 190500 }, { "epoch": 14.95, "learning_rate": 0.0005, "loss": 1.109, "step": 190600 }, { "epoch": 14.95, "learning_rate": 0.0005, "loss": 1.1154, "step": 190700 }, { "epoch": 14.96, "learning_rate": 0.0005, "loss": 1.129, "step": 190800 }, { "epoch": 14.97, "learning_rate": 0.0005, "loss": 1.144, "step": 190900 }, { "epoch": 14.98, "learning_rate": 0.0005, "loss": 1.1323, "step": 191000 }, { "epoch": 14.99, "learning_rate": 0.0005, "loss": 1.155, "step": 191100 }, { "epoch": 14.99, "learning_rate": 0.0005, "loss": 1.1309, "step": 191200 }, { "epoch": 15.0, "learning_rate": 0.0005, "loss": 1.0932, "step": 191300 }, { "epoch": 15.01, "learning_rate": 0.0005, "loss": 1.0808, "step": 191400 }, { "epoch": 15.02, "learning_rate": 0.0005, "loss": 1.046, "step": 191500 }, { "epoch": 15.03, "learning_rate": 0.0005, "loss": 1.0491, "step": 191600 }, { "epoch": 15.03, "learning_rate": 0.0005, "loss": 1.0523, "step": 191700 }, { "epoch": 15.04, "learning_rate": 0.0005, "loss": 1.072, "step": 191800 }, { "epoch": 15.05, "learning_rate": 0.0005, "loss": 1.0484, "step": 191900 }, { "epoch": 15.06, "learning_rate": 0.0005, "loss": 1.046, "step": 192000 }, { "epoch": 15.06, "learning_rate": 0.0005, "loss": 1.0693, "step": 192100 }, { "epoch": 15.07, "learning_rate": 0.0005, "loss": 1.0533, "step": 192200 }, { "epoch": 15.08, "learning_rate": 0.0005, "loss": 1.0542, "step": 192300 }, { "epoch": 15.09, "learning_rate": 0.0005, "loss": 1.0432, "step": 192400 }, { "epoch": 15.1, "learning_rate": 0.0005, "loss": 1.0598, "step": 192500 }, { "epoch": 15.1, "learning_rate": 0.0005, "loss": 1.0533, "step": 192600 }, { "epoch": 15.11, "learning_rate": 0.0005, "loss": 1.0682, "step": 192700 }, { "epoch": 15.12, "learning_rate": 0.0005, "loss": 1.0734, "step": 192800 }, { "epoch": 15.13, "learning_rate": 0.0005, "loss": 1.0609, "step": 192900 }, { "epoch": 15.13, "learning_rate": 0.0005, "loss": 1.0822, "step": 193000 }, { "epoch": 15.14, "learning_rate": 0.0005, "loss": 1.0573, "step": 193100 }, { "epoch": 15.15, "learning_rate": 0.0005, "loss": 1.0729, "step": 193200 }, { "epoch": 15.16, "learning_rate": 0.0005, "loss": 1.0827, "step": 193300 }, { "epoch": 15.17, "learning_rate": 0.0005, "loss": 1.083, "step": 193400 }, { "epoch": 15.17, "learning_rate": 0.0005, "loss": 1.0814, "step": 193500 }, { "epoch": 15.18, "learning_rate": 0.0005, "loss": 1.0638, "step": 193600 }, { "epoch": 15.19, "learning_rate": 0.0005, "loss": 1.0634, "step": 193700 }, { "epoch": 15.2, "learning_rate": 0.0005, "loss": 1.0851, "step": 193800 }, { "epoch": 15.21, "learning_rate": 0.0005, "loss": 1.0631, "step": 193900 }, { "epoch": 15.21, "learning_rate": 0.0005, "loss": 1.0872, "step": 194000 }, { "epoch": 15.22, "learning_rate": 0.0005, "loss": 1.087, "step": 194100 }, { "epoch": 15.23, "learning_rate": 0.0005, "loss": 1.0731, "step": 194200 }, { "epoch": 15.24, "learning_rate": 0.0005, "loss": 1.0594, "step": 194300 }, { "epoch": 15.24, "learning_rate": 0.0005, "loss": 1.041, "step": 194400 }, { "epoch": 15.25, "learning_rate": 0.0005, "loss": 1.0599, "step": 194500 }, { "epoch": 15.26, "learning_rate": 0.0005, "loss": 1.0616, "step": 194600 }, { "epoch": 15.27, "learning_rate": 0.0005, "loss": 1.0786, "step": 194700 }, { "epoch": 15.28, "learning_rate": 0.0005, "loss": 1.0772, "step": 194800 }, { "epoch": 15.28, "learning_rate": 0.0005, "loss": 1.0876, "step": 194900 }, { "epoch": 15.29, "learning_rate": 0.0005, "loss": 1.0908, "step": 195000 }, { "epoch": 15.3, "learning_rate": 0.0005, "loss": 1.0711, "step": 195100 }, { "epoch": 15.31, "learning_rate": 0.0005, "loss": 1.0557, "step": 195200 }, { "epoch": 15.32, "learning_rate": 0.0005, "loss": 1.0476, "step": 195300 }, { "epoch": 15.32, "learning_rate": 0.0005, "loss": 1.0742, "step": 195400 }, { "epoch": 15.33, "learning_rate": 0.0005, "loss": 1.0806, "step": 195500 }, { "epoch": 15.34, "learning_rate": 0.0005, "loss": 1.0555, "step": 195600 }, { "epoch": 15.35, "learning_rate": 0.0005, "loss": 1.0759, "step": 195700 }, { "epoch": 15.35, "learning_rate": 0.0005, "loss": 1.0776, "step": 195800 }, { "epoch": 15.36, "learning_rate": 0.0005, "loss": 1.0914, "step": 195900 }, { "epoch": 15.37, "learning_rate": 0.0005, "loss": 1.0939, "step": 196000 }, { "epoch": 15.38, "learning_rate": 0.0005, "loss": 1.0655, "step": 196100 }, { "epoch": 15.39, "learning_rate": 0.0005, "loss": 1.0871, "step": 196200 }, { "epoch": 15.39, "learning_rate": 0.0005, "loss": 1.0839, "step": 196300 }, { "epoch": 15.4, "learning_rate": 0.0005, "loss": 1.0851, "step": 196400 }, { "epoch": 15.41, "learning_rate": 0.0005, "loss": 1.0742, "step": 196500 }, { "epoch": 15.42, "learning_rate": 0.0005, "loss": 1.0946, "step": 196600 }, { "epoch": 15.43, "learning_rate": 0.0005, "loss": 1.0919, "step": 196700 }, { "epoch": 15.43, "learning_rate": 0.0005, "loss": 1.0594, "step": 196800 }, { "epoch": 15.44, "learning_rate": 0.0005, "loss": 1.0874, "step": 196900 }, { "epoch": 15.45, "learning_rate": 0.0005, "loss": 1.0856, "step": 197000 }, { "epoch": 15.46, "learning_rate": 0.0005, "loss": 1.0925, "step": 197100 }, { "epoch": 15.46, "learning_rate": 0.0005, "loss": 1.0887, "step": 197200 }, { "epoch": 15.47, "learning_rate": 0.0005, "loss": 1.0745, "step": 197300 }, { "epoch": 15.48, "learning_rate": 0.0005, "loss": 1.0864, "step": 197400 }, { "epoch": 15.49, "learning_rate": 0.0005, "loss": 1.0766, "step": 197500 }, { "epoch": 15.5, "learning_rate": 0.0005, "loss": 1.0996, "step": 197600 }, { "epoch": 15.5, "learning_rate": 0.0005, "loss": 1.0983, "step": 197700 }, { "epoch": 15.51, "learning_rate": 0.0005, "loss": 1.1033, "step": 197800 }, { "epoch": 15.52, "learning_rate": 0.0005, "loss": 1.1132, "step": 197900 }, { "epoch": 15.53, "learning_rate": 0.0005, "loss": 1.1001, "step": 198000 }, { "epoch": 15.53, "learning_rate": 0.0005, "loss": 1.0912, "step": 198100 }, { "epoch": 15.54, "learning_rate": 0.0005, "loss": 1.0939, "step": 198200 }, { "epoch": 15.55, "learning_rate": 0.0005, "loss": 1.1, "step": 198300 }, { "epoch": 15.56, "learning_rate": 0.0005, "loss": 1.1019, "step": 198400 }, { "epoch": 15.57, "learning_rate": 0.0005, "loss": 1.1057, "step": 198500 }, { "epoch": 15.57, "learning_rate": 0.0005, "loss": 1.1087, "step": 198600 }, { "epoch": 15.58, "learning_rate": 0.0005, "loss": 1.0962, "step": 198700 }, { "epoch": 15.59, "learning_rate": 0.0005, "loss": 1.0962, "step": 198800 }, { "epoch": 15.6, "learning_rate": 0.0005, "loss": 1.0943, "step": 198900 }, { "epoch": 15.61, "learning_rate": 0.0005, "loss": 1.0968, "step": 199000 }, { "epoch": 15.61, "learning_rate": 0.0005, "loss": 1.1118, "step": 199100 }, { "epoch": 15.62, "learning_rate": 0.0005, "loss": 1.0882, "step": 199200 }, { "epoch": 15.63, "learning_rate": 0.0005, "loss": 1.088, "step": 199300 }, { "epoch": 15.64, "learning_rate": 0.0005, "loss": 1.0895, "step": 199400 }, { "epoch": 15.64, "learning_rate": 0.0005, "loss": 1.1213, "step": 199500 }, { "epoch": 15.65, "learning_rate": 0.0005, "loss": 1.1064, "step": 199600 }, { "epoch": 15.66, "learning_rate": 0.0005, "loss": 1.1179, "step": 199700 }, { "epoch": 15.67, "learning_rate": 0.0005, "loss": 1.1243, "step": 199800 }, { "epoch": 15.68, "learning_rate": 0.0005, "loss": 1.1109, "step": 199900 }, { "epoch": 15.68, "learning_rate": 0.0005, "loss": 1.1161, "step": 200000 }, { "epoch": 15.68, "eval_gen_len": 18.732762426061623, "eval_loss": 2.4067530632019043, "eval_rouge1": 35.3601, "eval_rouge2": 14.3396, "eval_rougeL": 29.1605, "eval_rougeLsum": 29.1595, "eval_runtime": 343.0369, "eval_samples_per_second": 33.02, "eval_steps_per_second": 2.064, "step": 200000 }, { "epoch": 15.69, "learning_rate": 0.0005, "loss": 1.1047, "step": 200100 }, { "epoch": 15.7, "learning_rate": 0.0005, "loss": 1.0996, "step": 200200 }, { "epoch": 15.71, "learning_rate": 0.0005, "loss": 1.0988, "step": 200300 }, { "epoch": 15.72, "learning_rate": 0.0005, "loss": 1.1002, "step": 200400 }, { "epoch": 15.72, "learning_rate": 0.0005, "loss": 1.1066, "step": 200500 }, { "epoch": 15.73, "learning_rate": 0.0005, "loss": 1.0973, "step": 200600 }, { "epoch": 15.74, "learning_rate": 0.0005, "loss": 1.0974, "step": 200700 }, { "epoch": 15.75, "learning_rate": 0.0005, "loss": 1.0902, "step": 200800 }, { "epoch": 15.75, "learning_rate": 0.0005, "loss": 1.1074, "step": 200900 }, { "epoch": 15.76, "learning_rate": 0.0005, "loss": 1.1035, "step": 201000 }, { "epoch": 15.77, "learning_rate": 0.0005, "loss": 1.114, "step": 201100 }, { "epoch": 15.78, "learning_rate": 0.0005, "loss": 1.1059, "step": 201200 }, { "epoch": 15.79, "learning_rate": 0.0005, "loss": 1.1142, "step": 201300 }, { "epoch": 15.79, "learning_rate": 0.0005, "loss": 1.0982, "step": 201400 }, { "epoch": 15.8, "learning_rate": 0.0005, "loss": 1.0976, "step": 201500 }, { "epoch": 15.81, "learning_rate": 0.0005, "loss": 1.0827, "step": 201600 }, { "epoch": 15.82, "learning_rate": 0.0005, "loss": 1.118, "step": 201700 }, { "epoch": 15.82, "learning_rate": 0.0005, "loss": 1.0929, "step": 201800 }, { "epoch": 15.83, "learning_rate": 0.0005, "loss": 1.1065, "step": 201900 }, { "epoch": 15.84, "learning_rate": 0.0005, "loss": 1.0972, "step": 202000 }, { "epoch": 15.85, "learning_rate": 0.0005, "loss": 1.1277, "step": 202100 }, { "epoch": 15.86, "learning_rate": 0.0005, "loss": 1.0867, "step": 202200 }, { "epoch": 15.86, "learning_rate": 0.0005, "loss": 1.1086, "step": 202300 }, { "epoch": 15.87, "learning_rate": 0.0005, "loss": 1.1308, "step": 202400 }, { "epoch": 15.88, "learning_rate": 0.0005, "loss": 1.115, "step": 202500 }, { "epoch": 15.89, "learning_rate": 0.0005, "loss": 1.1275, "step": 202600 }, { "epoch": 15.9, "learning_rate": 0.0005, "loss": 1.1183, "step": 202700 }, { "epoch": 15.9, "learning_rate": 0.0005, "loss": 1.1048, "step": 202800 }, { "epoch": 15.91, "learning_rate": 0.0005, "loss": 1.1195, "step": 202900 }, { "epoch": 15.92, "learning_rate": 0.0005, "loss": 1.1114, "step": 203000 }, { "epoch": 15.93, "learning_rate": 0.0005, "loss": 1.1119, "step": 203100 }, { "epoch": 15.93, "learning_rate": 0.0005, "loss": 1.0991, "step": 203200 }, { "epoch": 15.94, "learning_rate": 0.0005, "loss": 1.1211, "step": 203300 }, { "epoch": 15.95, "learning_rate": 0.0005, "loss": 1.0928, "step": 203400 }, { "epoch": 15.96, "learning_rate": 0.0005, "loss": 1.1166, "step": 203500 }, { "epoch": 15.97, "learning_rate": 0.0005, "loss": 1.1247, "step": 203600 }, { "epoch": 15.97, "learning_rate": 0.0005, "loss": 1.1156, "step": 203700 }, { "epoch": 15.98, "learning_rate": 0.0005, "loss": 1.1297, "step": 203800 }, { "epoch": 15.99, "learning_rate": 0.0005, "loss": 1.0901, "step": 203900 }, { "epoch": 16.0, "learning_rate": 0.0005, "loss": 1.1182, "step": 204000 }, { "epoch": 16.01, "learning_rate": 0.0005, "loss": 1.0607, "step": 204100 }, { "epoch": 16.01, "learning_rate": 0.0005, "loss": 1.027, "step": 204200 }, { "epoch": 16.02, "learning_rate": 0.0005, "loss": 1.0254, "step": 204300 }, { "epoch": 16.03, "learning_rate": 0.0005, "loss": 1.0366, "step": 204400 }, { "epoch": 16.04, "learning_rate": 0.0005, "loss": 1.0353, "step": 204500 }, { "epoch": 16.04, "learning_rate": 0.0005, "loss": 1.03, "step": 204600 }, { "epoch": 16.05, "learning_rate": 0.0005, "loss": 1.0267, "step": 204700 }, { "epoch": 16.06, "learning_rate": 0.0005, "loss": 1.0589, "step": 204800 }, { "epoch": 16.07, "learning_rate": 0.0005, "loss": 1.044, "step": 204900 }, { "epoch": 16.08, "learning_rate": 0.0005, "loss": 1.0634, "step": 205000 }, { "epoch": 16.08, "learning_rate": 0.0005, "loss": 1.0341, "step": 205100 }, { "epoch": 16.09, "learning_rate": 0.0005, "loss": 1.0392, "step": 205200 }, { "epoch": 16.1, "learning_rate": 0.0005, "loss": 1.0415, "step": 205300 }, { "epoch": 16.11, "learning_rate": 0.0005, "loss": 1.0326, "step": 205400 }, { "epoch": 16.12, "learning_rate": 0.0005, "loss": 1.0315, "step": 205500 }, { "epoch": 16.12, "learning_rate": 0.0005, "loss": 1.05, "step": 205600 }, { "epoch": 16.13, "learning_rate": 0.0005, "loss": 1.0281, "step": 205700 }, { "epoch": 16.14, "learning_rate": 0.0005, "loss": 1.0348, "step": 205800 }, { "epoch": 16.15, "learning_rate": 0.0005, "loss": 1.0406, "step": 205900 }, { "epoch": 16.15, "learning_rate": 0.0005, "loss": 1.0691, "step": 206000 }, { "epoch": 16.16, "learning_rate": 0.0005, "loss": 1.0536, "step": 206100 }, { "epoch": 16.17, "learning_rate": 0.0005, "loss": 1.0411, "step": 206200 }, { "epoch": 16.18, "learning_rate": 0.0005, "loss": 1.0639, "step": 206300 }, { "epoch": 16.19, "learning_rate": 0.0005, "loss": 1.0356, "step": 206400 }, { "epoch": 16.19, "learning_rate": 0.0005, "loss": 1.07, "step": 206500 }, { "epoch": 16.2, "learning_rate": 0.0005, "loss": 1.0525, "step": 206600 }, { "epoch": 16.21, "learning_rate": 0.0005, "loss": 1.0434, "step": 206700 }, { "epoch": 16.22, "learning_rate": 0.0005, "loss": 1.0605, "step": 206800 }, { "epoch": 16.22, "learning_rate": 0.0005, "loss": 1.0554, "step": 206900 }, { "epoch": 16.23, "learning_rate": 0.0005, "loss": 1.0467, "step": 207000 }, { "epoch": 16.24, "learning_rate": 0.0005, "loss": 1.0378, "step": 207100 }, { "epoch": 16.25, "learning_rate": 0.0005, "loss": 1.0699, "step": 207200 }, { "epoch": 16.26, "learning_rate": 0.0005, "loss": 1.0577, "step": 207300 }, { "epoch": 16.26, "learning_rate": 0.0005, "loss": 1.0271, "step": 207400 }, { "epoch": 16.27, "learning_rate": 0.0005, "loss": 1.0292, "step": 207500 }, { "epoch": 16.28, "learning_rate": 0.0005, "loss": 1.0541, "step": 207600 }, { "epoch": 16.29, "learning_rate": 0.0005, "loss": 1.0584, "step": 207700 }, { "epoch": 16.3, "learning_rate": 0.0005, "loss": 1.056, "step": 207800 }, { "epoch": 16.3, "learning_rate": 0.0005, "loss": 1.0629, "step": 207900 }, { "epoch": 16.31, "learning_rate": 0.0005, "loss": 1.0629, "step": 208000 }, { "epoch": 16.32, "learning_rate": 0.0005, "loss": 1.0636, "step": 208100 }, { "epoch": 16.33, "learning_rate": 0.0005, "loss": 1.0589, "step": 208200 }, { "epoch": 16.33, "learning_rate": 0.0005, "loss": 1.066, "step": 208300 }, { "epoch": 16.34, "learning_rate": 0.0005, "loss": 1.0703, "step": 208400 }, { "epoch": 16.35, "learning_rate": 0.0005, "loss": 1.0861, "step": 208500 }, { "epoch": 16.36, "learning_rate": 0.0005, "loss": 1.0717, "step": 208600 }, { "epoch": 16.37, "learning_rate": 0.0005, "loss": 1.0534, "step": 208700 }, { "epoch": 16.37, "learning_rate": 0.0005, "loss": 1.0496, "step": 208800 }, { "epoch": 16.38, "learning_rate": 0.0005, "loss": 1.0954, "step": 208900 }, { "epoch": 16.39, "learning_rate": 0.0005, "loss": 1.0616, "step": 209000 }, { "epoch": 16.4, "learning_rate": 0.0005, "loss": 1.0744, "step": 209100 }, { "epoch": 16.41, "learning_rate": 0.0005, "loss": 1.0587, "step": 209200 }, { "epoch": 16.41, "learning_rate": 0.0005, "loss": 1.0927, "step": 209300 }, { "epoch": 16.42, "learning_rate": 0.0005, "loss": 1.0739, "step": 209400 }, { "epoch": 16.43, "learning_rate": 0.0005, "loss": 1.0573, "step": 209500 }, { "epoch": 16.44, "learning_rate": 0.0005, "loss": 1.0688, "step": 209600 }, { "epoch": 16.44, "learning_rate": 0.0005, "loss": 1.0621, "step": 209700 }, { "epoch": 16.45, "learning_rate": 0.0005, "loss": 1.0803, "step": 209800 }, { "epoch": 16.46, "learning_rate": 0.0005, "loss": 1.0593, "step": 209900 }, { "epoch": 16.47, "learning_rate": 0.0005, "loss": 1.0635, "step": 210000 }, { "epoch": 16.47, "eval_gen_len": 18.72428710161561, "eval_loss": 2.420046806335449, "eval_rouge1": 35.2643, "eval_rouge2": 14.3476, "eval_rougeL": 29.0944, "eval_rougeLsum": 29.0995, "eval_runtime": 342.8229, "eval_samples_per_second": 33.04, "eval_steps_per_second": 2.065, "step": 210000 }, { "epoch": 16.48, "learning_rate": 0.0005, "loss": 1.0756, "step": 210100 }, { "epoch": 16.48, "learning_rate": 0.0005, "loss": 1.0664, "step": 210200 }, { "epoch": 16.49, "learning_rate": 0.0005, "loss": 1.0633, "step": 210300 }, { "epoch": 16.5, "learning_rate": 0.0005, "loss": 1.0576, "step": 210400 }, { "epoch": 16.51, "learning_rate": 0.0005, "loss": 1.0769, "step": 210500 }, { "epoch": 16.52, "learning_rate": 0.0005, "loss": 1.075, "step": 210600 }, { "epoch": 16.52, "learning_rate": 0.0005, "loss": 1.0745, "step": 210700 }, { "epoch": 16.53, "learning_rate": 0.0005, "loss": 1.0794, "step": 210800 }, { "epoch": 16.54, "learning_rate": 0.0005, "loss": 1.0714, "step": 210900 }, { "epoch": 16.55, "learning_rate": 0.0005, "loss": 1.0805, "step": 211000 }, { "epoch": 16.55, "learning_rate": 0.0005, "loss": 1.0741, "step": 211100 }, { "epoch": 16.56, "learning_rate": 0.0005, "loss": 1.0728, "step": 211200 }, { "epoch": 16.57, "learning_rate": 0.0005, "loss": 1.068, "step": 211300 }, { "epoch": 16.58, "learning_rate": 0.0005, "loss": 1.0635, "step": 211400 }, { "epoch": 16.59, "learning_rate": 0.0005, "loss": 1.065, "step": 211500 }, { "epoch": 16.59, "learning_rate": 0.0005, "loss": 1.0566, "step": 211600 }, { "epoch": 16.6, "learning_rate": 0.0005, "loss": 1.0866, "step": 211700 }, { "epoch": 16.61, "learning_rate": 0.0005, "loss": 1.0751, "step": 211800 }, { "epoch": 16.62, "learning_rate": 0.0005, "loss": 1.0654, "step": 211900 }, { "epoch": 16.62, "learning_rate": 0.0005, "loss": 1.0855, "step": 212000 }, { "epoch": 16.63, "learning_rate": 0.0005, "loss": 1.0628, "step": 212100 }, { "epoch": 16.64, "learning_rate": 0.0005, "loss": 1.0968, "step": 212200 }, { "epoch": 16.65, "learning_rate": 0.0005, "loss": 1.063, "step": 212300 }, { "epoch": 16.66, "learning_rate": 0.0005, "loss": 1.0699, "step": 212400 }, { "epoch": 16.66, "learning_rate": 0.0005, "loss": 1.0878, "step": 212500 }, { "epoch": 16.67, "learning_rate": 0.0005, "loss": 1.0845, "step": 212600 }, { "epoch": 16.68, "learning_rate": 0.0005, "loss": 1.071, "step": 212700 }, { "epoch": 16.69, "learning_rate": 0.0005, "loss": 1.0747, "step": 212800 }, { "epoch": 16.7, "learning_rate": 0.0005, "loss": 1.0946, "step": 212900 }, { "epoch": 16.7, "learning_rate": 0.0005, "loss": 1.0802, "step": 213000 }, { "epoch": 16.71, "learning_rate": 0.0005, "loss": 1.0749, "step": 213100 }, { "epoch": 16.72, "learning_rate": 0.0005, "loss": 1.0836, "step": 213200 }, { "epoch": 16.73, "learning_rate": 0.0005, "loss": 1.077, "step": 213300 }, { "epoch": 16.73, "learning_rate": 0.0005, "loss": 1.1056, "step": 213400 }, { "epoch": 16.74, "learning_rate": 0.0005, "loss": 1.1031, "step": 213500 }, { "epoch": 16.75, "learning_rate": 0.0005, "loss": 1.106, "step": 213600 }, { "epoch": 16.76, "learning_rate": 0.0005, "loss": 1.0814, "step": 213700 }, { "epoch": 16.77, "learning_rate": 0.0005, "loss": 1.0765, "step": 213800 }, { "epoch": 16.77, "learning_rate": 0.0005, "loss": 1.0846, "step": 213900 }, { "epoch": 16.78, "learning_rate": 0.0005, "loss": 1.0821, "step": 214000 }, { "epoch": 16.79, "learning_rate": 0.0005, "loss": 1.0836, "step": 214100 }, { "epoch": 16.8, "learning_rate": 0.0005, "loss": 1.0871, "step": 214200 }, { "epoch": 16.81, "learning_rate": 0.0005, "loss": 1.0805, "step": 214300 }, { "epoch": 16.81, "learning_rate": 0.0005, "loss": 1.08, "step": 214400 }, { "epoch": 16.82, "learning_rate": 0.0005, "loss": 1.0934, "step": 214500 }, { "epoch": 16.83, "learning_rate": 0.0005, "loss": 1.0884, "step": 214600 }, { "epoch": 16.84, "learning_rate": 0.0005, "loss": 1.0547, "step": 214700 }, { "epoch": 16.84, "learning_rate": 0.0005, "loss": 1.0746, "step": 214800 }, { "epoch": 16.85, "learning_rate": 0.0005, "loss": 1.079, "step": 214900 }, { "epoch": 16.86, "learning_rate": 0.0005, "loss": 1.103, "step": 215000 }, { "epoch": 16.87, "learning_rate": 0.0005, "loss": 1.0682, "step": 215100 }, { "epoch": 16.88, "learning_rate": 0.0005, "loss": 1.1059, "step": 215200 }, { "epoch": 16.88, "learning_rate": 0.0005, "loss": 1.1022, "step": 215300 }, { "epoch": 16.89, "learning_rate": 0.0005, "loss": 1.0808, "step": 215400 }, { "epoch": 16.9, "learning_rate": 0.0005, "loss": 1.1052, "step": 215500 }, { "epoch": 16.91, "learning_rate": 0.0005, "loss": 1.0847, "step": 215600 }, { "epoch": 16.91, "learning_rate": 0.0005, "loss": 1.0745, "step": 215700 }, { "epoch": 16.92, "learning_rate": 0.0005, "loss": 1.13, "step": 215800 }, { "epoch": 16.93, "learning_rate": 0.0005, "loss": 1.0783, "step": 215900 }, { "epoch": 16.94, "learning_rate": 0.0005, "loss": 1.0946, "step": 216000 }, { "epoch": 16.95, "learning_rate": 0.0005, "loss": 1.086, "step": 216100 }, { "epoch": 16.95, "learning_rate": 0.0005, "loss": 1.0999, "step": 216200 }, { "epoch": 16.96, "learning_rate": 0.0005, "loss": 1.0837, "step": 216300 }, { "epoch": 16.97, "learning_rate": 0.0005, "loss": 1.0905, "step": 216400 }, { "epoch": 16.98, "learning_rate": 0.0005, "loss": 1.0939, "step": 216500 }, { "epoch": 16.99, "learning_rate": 0.0005, "loss": 1.0817, "step": 216600 }, { "epoch": 16.99, "learning_rate": 0.0005, "loss": 1.1159, "step": 216700 }, { "epoch": 17.0, "learning_rate": 0.0005, "loss": 1.0563, "step": 216800 }, { "epoch": 17.01, "learning_rate": 0.0005, "loss": 1.0278, "step": 216900 }, { "epoch": 17.02, "learning_rate": 0.0005, "loss": 1.0096, "step": 217000 }, { "epoch": 17.02, "learning_rate": 0.0005, "loss": 1.0302, "step": 217100 }, { "epoch": 17.03, "learning_rate": 0.0005, "loss": 1.0054, "step": 217200 }, { "epoch": 17.04, "learning_rate": 0.0005, "loss": 1.0115, "step": 217300 }, { "epoch": 17.05, "learning_rate": 0.0005, "loss": 1.013, "step": 217400 }, { "epoch": 17.06, "learning_rate": 0.0005, "loss": 1.0184, "step": 217500 }, { "epoch": 17.06, "learning_rate": 0.0005, "loss": 1.0257, "step": 217600 }, { "epoch": 17.07, "learning_rate": 0.0005, "loss": 0.9953, "step": 217700 }, { "epoch": 17.08, "learning_rate": 0.0005, "loss": 1.0177, "step": 217800 }, { "epoch": 17.09, "learning_rate": 0.0005, "loss": 1.026, "step": 217900 }, { "epoch": 17.1, "learning_rate": 0.0005, "loss": 1.0229, "step": 218000 }, { "epoch": 17.1, "learning_rate": 0.0005, "loss": 1.0296, "step": 218100 }, { "epoch": 17.11, "learning_rate": 0.0005, "loss": 1.0076, "step": 218200 }, { "epoch": 17.12, "learning_rate": 0.0005, "loss": 1.0329, "step": 218300 }, { "epoch": 17.13, "learning_rate": 0.0005, "loss": 1.0276, "step": 218400 }, { "epoch": 17.13, "learning_rate": 0.0005, "loss": 1.0306, "step": 218500 }, { "epoch": 17.14, "learning_rate": 0.0005, "loss": 1.0196, "step": 218600 }, { "epoch": 17.15, "learning_rate": 0.0005, "loss": 1.0203, "step": 218700 }, { "epoch": 17.16, "learning_rate": 0.0005, "loss": 1.021, "step": 218800 }, { "epoch": 17.17, "learning_rate": 0.0005, "loss": 1.0275, "step": 218900 }, { "epoch": 17.17, "learning_rate": 0.0005, "loss": 1.0293, "step": 219000 }, { "epoch": 17.18, "learning_rate": 0.0005, "loss": 1.0421, "step": 219100 }, { "epoch": 17.19, "learning_rate": 0.0005, "loss": 1.027, "step": 219200 }, { "epoch": 17.2, "learning_rate": 0.0005, "loss": 1.0328, "step": 219300 }, { "epoch": 17.21, "learning_rate": 0.0005, "loss": 1.0245, "step": 219400 }, { "epoch": 17.21, "learning_rate": 0.0005, "loss": 1.0433, "step": 219500 }, { "epoch": 17.22, "learning_rate": 0.0005, "loss": 1.0251, "step": 219600 }, { "epoch": 17.23, "learning_rate": 0.0005, "loss": 1.0364, "step": 219700 }, { "epoch": 17.24, "learning_rate": 0.0005, "loss": 1.0404, "step": 219800 }, { "epoch": 17.24, "learning_rate": 0.0005, "loss": 1.0352, "step": 219900 }, { "epoch": 17.25, "learning_rate": 0.0005, "loss": 1.0312, "step": 220000 }, { "epoch": 17.25, "eval_gen_len": 18.717047761984638, "eval_loss": 2.43268084526062, "eval_rouge1": 35.5056, "eval_rouge2": 14.5785, "eval_rougeL": 29.2484, "eval_rougeLsum": 29.2521, "eval_runtime": 343.9116, "eval_samples_per_second": 32.936, "eval_steps_per_second": 2.059, "step": 220000 }, { "epoch": 17.26, "learning_rate": 0.0005, "loss": 1.0377, "step": 220100 }, { "epoch": 17.27, "learning_rate": 0.0005, "loss": 1.0271, "step": 220200 }, { "epoch": 17.28, "learning_rate": 0.0005, "loss": 1.0351, "step": 220300 }, { "epoch": 17.28, "learning_rate": 0.0005, "loss": 1.0499, "step": 220400 }, { "epoch": 17.29, "learning_rate": 0.0005, "loss": 1.0376, "step": 220500 }, { "epoch": 17.3, "learning_rate": 0.0005, "loss": 1.0376, "step": 220600 }, { "epoch": 17.31, "learning_rate": 0.0005, "loss": 1.0462, "step": 220700 }, { "epoch": 17.31, "learning_rate": 0.0005, "loss": 1.024, "step": 220800 }, { "epoch": 17.32, "learning_rate": 0.0005, "loss": 1.0334, "step": 220900 }, { "epoch": 17.33, "learning_rate": 0.0005, "loss": 1.0291, "step": 221000 }, { "epoch": 17.34, "learning_rate": 0.0005, "loss": 1.037, "step": 221100 }, { "epoch": 17.35, "learning_rate": 0.0005, "loss": 1.0465, "step": 221200 }, { "epoch": 17.35, "learning_rate": 0.0005, "loss": 1.0459, "step": 221300 }, { "epoch": 17.36, "learning_rate": 0.0005, "loss": 1.0302, "step": 221400 }, { "epoch": 17.37, "learning_rate": 0.0005, "loss": 1.0508, "step": 221500 }, { "epoch": 17.38, "learning_rate": 0.0005, "loss": 1.0357, "step": 221600 }, { "epoch": 17.39, "learning_rate": 0.0005, "loss": 1.0265, "step": 221700 }, { "epoch": 17.39, "learning_rate": 0.0005, "loss": 1.062, "step": 221800 }, { "epoch": 17.4, "learning_rate": 0.0005, "loss": 1.0392, "step": 221900 }, { "epoch": 17.41, "learning_rate": 0.0005, "loss": 1.055, "step": 222000 }, { "epoch": 17.42, "learning_rate": 0.0005, "loss": 1.0305, "step": 222100 }, { "epoch": 17.42, "learning_rate": 0.0005, "loss": 1.0596, "step": 222200 }, { "epoch": 17.43, "learning_rate": 0.0005, "loss": 1.0147, "step": 222300 }, { "epoch": 17.44, "learning_rate": 0.0005, "loss": 1.0573, "step": 222400 }, { "epoch": 17.45, "learning_rate": 0.0005, "loss": 1.0502, "step": 222500 }, { "epoch": 17.46, "learning_rate": 0.0005, "loss": 1.0507, "step": 222600 }, { "epoch": 17.46, "learning_rate": 0.0005, "loss": 1.0257, "step": 222700 }, { "epoch": 17.47, "learning_rate": 0.0005, "loss": 1.0551, "step": 222800 }, { "epoch": 17.48, "learning_rate": 0.0005, "loss": 1.0579, "step": 222900 }, { "epoch": 17.49, "learning_rate": 0.0005, "loss": 1.0459, "step": 223000 }, { "epoch": 17.5, "learning_rate": 0.0005, "loss": 1.041, "step": 223100 }, { "epoch": 17.5, "learning_rate": 0.0005, "loss": 1.0705, "step": 223200 }, { "epoch": 17.51, "learning_rate": 0.0005, "loss": 1.0524, "step": 223300 }, { "epoch": 17.52, "learning_rate": 0.0005, "loss": 1.0552, "step": 223400 }, { "epoch": 17.53, "learning_rate": 0.0005, "loss": 1.0558, "step": 223500 }, { "epoch": 17.53, "learning_rate": 0.0005, "loss": 1.0466, "step": 223600 }, { "epoch": 17.54, "learning_rate": 0.0005, "loss": 1.0459, "step": 223700 }, { "epoch": 17.55, "learning_rate": 0.0005, "loss": 1.0556, "step": 223800 }, { "epoch": 17.56, "learning_rate": 0.0005, "loss": 1.0623, "step": 223900 }, { "epoch": 17.57, "learning_rate": 0.0005, "loss": 1.0697, "step": 224000 }, { "epoch": 17.57, "learning_rate": 0.0005, "loss": 1.0428, "step": 224100 }, { "epoch": 17.58, "learning_rate": 0.0005, "loss": 1.0537, "step": 224200 }, { "epoch": 17.59, "learning_rate": 0.0005, "loss": 1.0653, "step": 224300 }, { "epoch": 17.6, "learning_rate": 0.0005, "loss": 1.0703, "step": 224400 }, { "epoch": 17.61, "learning_rate": 0.0005, "loss": 1.0649, "step": 224500 }, { "epoch": 17.61, "learning_rate": 0.0005, "loss": 1.062, "step": 224600 }, { "epoch": 17.62, "learning_rate": 0.0005, "loss": 1.0796, "step": 224700 }, { "epoch": 17.63, "learning_rate": 0.0005, "loss": 1.0423, "step": 224800 }, { "epoch": 17.64, "learning_rate": 0.0005, "loss": 1.039, "step": 224900 }, { "epoch": 17.64, "learning_rate": 0.0005, "loss": 1.0674, "step": 225000 }, { "epoch": 17.65, "learning_rate": 0.0005, "loss": 1.0594, "step": 225100 }, { "epoch": 17.66, "learning_rate": 0.0005, "loss": 1.0662, "step": 225200 }, { "epoch": 17.67, "learning_rate": 0.0005, "loss": 1.0737, "step": 225300 }, { "epoch": 17.68, "learning_rate": 0.0005, "loss": 1.064, "step": 225400 }, { "epoch": 17.68, "learning_rate": 0.0005, "loss": 1.0769, "step": 225500 }, { "epoch": 17.69, "learning_rate": 0.0005, "loss": 1.0544, "step": 225600 }, { "epoch": 17.7, "learning_rate": 0.0005, "loss": 1.0469, "step": 225700 }, { "epoch": 17.71, "learning_rate": 0.0005, "loss": 1.0653, "step": 225800 }, { "epoch": 17.71, "learning_rate": 0.0005, "loss": 1.0664, "step": 225900 }, { "epoch": 17.72, "learning_rate": 0.0005, "loss": 1.0693, "step": 226000 }, { "epoch": 17.73, "learning_rate": 0.0005, "loss": 1.0555, "step": 226100 }, { "epoch": 17.74, "learning_rate": 0.0005, "loss": 1.0667, "step": 226200 }, { "epoch": 17.75, "learning_rate": 0.0005, "loss": 1.0664, "step": 226300 }, { "epoch": 17.75, "learning_rate": 0.0005, "loss": 1.0606, "step": 226400 }, { "epoch": 17.76, "learning_rate": 0.0005, "loss": 1.0692, "step": 226500 }, { "epoch": 17.77, "learning_rate": 0.0005, "loss": 1.0626, "step": 226600 }, { "epoch": 17.78, "learning_rate": 0.0005, "loss": 1.059, "step": 226700 }, { "epoch": 17.79, "learning_rate": 0.0005, "loss": 1.0598, "step": 226800 }, { "epoch": 17.79, "learning_rate": 0.0005, "loss": 1.0858, "step": 226900 }, { "epoch": 17.8, "learning_rate": 0.0005, "loss": 1.0791, "step": 227000 }, { "epoch": 17.81, "learning_rate": 0.0005, "loss": 1.0832, "step": 227100 }, { "epoch": 17.82, "learning_rate": 0.0005, "loss": 1.064, "step": 227200 }, { "epoch": 17.82, "learning_rate": 0.0005, "loss": 1.0677, "step": 227300 }, { "epoch": 17.83, "learning_rate": 0.0005, "loss": 1.0612, "step": 227400 }, { "epoch": 17.84, "learning_rate": 0.0005, "loss": 1.08, "step": 227500 }, { "epoch": 17.85, "learning_rate": 0.0005, "loss": 1.0648, "step": 227600 }, { "epoch": 17.86, "learning_rate": 0.0005, "loss": 1.0848, "step": 227700 }, { "epoch": 17.86, "learning_rate": 0.0005, "loss": 1.0622, "step": 227800 }, { "epoch": 17.87, "learning_rate": 0.0005, "loss": 1.0794, "step": 227900 }, { "epoch": 17.88, "learning_rate": 0.0005, "loss": 1.0737, "step": 228000 }, { "epoch": 17.89, "learning_rate": 0.0005, "loss": 1.0633, "step": 228100 }, { "epoch": 17.9, "learning_rate": 0.0005, "loss": 1.0909, "step": 228200 }, { "epoch": 17.9, "learning_rate": 0.0005, "loss": 1.0621, "step": 228300 }, { "epoch": 17.91, "learning_rate": 0.0005, "loss": 1.0706, "step": 228400 }, { "epoch": 17.92, "learning_rate": 0.0005, "loss": 1.0838, "step": 228500 }, { "epoch": 17.93, "learning_rate": 0.0005, "loss": 1.0868, "step": 228600 }, { "epoch": 17.93, "learning_rate": 0.0005, "loss": 1.0682, "step": 228700 }, { "epoch": 17.94, "learning_rate": 0.0005, "loss": 1.0739, "step": 228800 }, { "epoch": 17.95, "learning_rate": 0.0005, "loss": 1.0755, "step": 228900 }, { "epoch": 17.96, "learning_rate": 0.0005, "loss": 1.0702, "step": 229000 }, { "epoch": 17.97, "learning_rate": 0.0005, "loss": 1.0946, "step": 229100 }, { "epoch": 17.97, "learning_rate": 0.0005, "loss": 1.0562, "step": 229200 }, { "epoch": 17.98, "learning_rate": 0.0005, "loss": 1.0592, "step": 229300 }, { "epoch": 17.99, "learning_rate": 0.0005, "loss": 1.0799, "step": 229400 }, { "epoch": 18.0, "learning_rate": 0.0005, "loss": 1.0777, "step": 229500 }, { "epoch": 18.01, "learning_rate": 0.0005, "loss": 1.0075, "step": 229600 }, { "epoch": 18.01, "learning_rate": 0.0005, "loss": 0.9877, "step": 229700 }, { "epoch": 18.02, "learning_rate": 0.0005, "loss": 0.9947, "step": 229800 }, { "epoch": 18.03, "learning_rate": 0.0005, "loss": 1.0062, "step": 229900 }, { "epoch": 18.04, "learning_rate": 0.0005, "loss": 0.9993, "step": 230000 }, { "epoch": 18.04, "eval_gen_len": 18.69480003531385, "eval_loss": 2.4594662189483643, "eval_rouge1": 35.4197, "eval_rouge2": 14.3682, "eval_rougeL": 29.0924, "eval_rougeLsum": 29.0945, "eval_runtime": 342.3949, "eval_samples_per_second": 33.082, "eval_steps_per_second": 2.068, "step": 230000 }, { "epoch": 18.04, "learning_rate": 0.0005, "loss": 0.9855, "step": 230100 }, { "epoch": 18.05, "learning_rate": 0.0005, "loss": 1.0019, "step": 230200 }, { "epoch": 18.06, "learning_rate": 0.0005, "loss": 1.008, "step": 230300 }, { "epoch": 18.07, "learning_rate": 0.0005, "loss": 0.9993, "step": 230400 }, { "epoch": 18.08, "learning_rate": 0.0005, "loss": 1.0155, "step": 230500 }, { "epoch": 18.08, "learning_rate": 0.0005, "loss": 1.0059, "step": 230600 }, { "epoch": 18.09, "learning_rate": 0.0005, "loss": 1.0119, "step": 230700 }, { "epoch": 18.1, "learning_rate": 0.0005, "loss": 1.0197, "step": 230800 }, { "epoch": 18.11, "learning_rate": 0.0005, "loss": 0.9936, "step": 230900 }, { "epoch": 18.11, "learning_rate": 0.0005, "loss": 1.0079, "step": 231000 }, { "epoch": 18.12, "learning_rate": 0.0005, "loss": 1.0032, "step": 231100 }, { "epoch": 18.13, "learning_rate": 0.0005, "loss": 1.0169, "step": 231200 }, { "epoch": 18.14, "learning_rate": 0.0005, "loss": 1.0071, "step": 231300 }, { "epoch": 18.15, "learning_rate": 0.0005, "loss": 0.9963, "step": 231400 }, { "epoch": 18.15, "learning_rate": 0.0005, "loss": 1.0187, "step": 231500 }, { "epoch": 18.16, "learning_rate": 0.0005, "loss": 0.999, "step": 231600 }, { "epoch": 18.17, "learning_rate": 0.0005, "loss": 1.0289, "step": 231700 }, { "epoch": 18.18, "learning_rate": 0.0005, "loss": 1.0194, "step": 231800 }, { "epoch": 18.19, "learning_rate": 0.0005, "loss": 1.0131, "step": 231900 }, { "epoch": 18.19, "learning_rate": 0.0005, "loss": 1.0165, "step": 232000 }, { "epoch": 18.2, "learning_rate": 0.0005, "loss": 0.99, "step": 232100 }, { "epoch": 18.21, "learning_rate": 0.0005, "loss": 1.027, "step": 232200 }, { "epoch": 18.22, "learning_rate": 0.0005, "loss": 1.0241, "step": 232300 }, { "epoch": 18.22, "learning_rate": 0.0005, "loss": 0.9904, "step": 232400 }, { "epoch": 18.23, "learning_rate": 0.0005, "loss": 1.0165, "step": 232500 }, { "epoch": 18.24, "learning_rate": 0.0005, "loss": 0.9947, "step": 232600 }, { "epoch": 18.25, "learning_rate": 0.0005, "loss": 1.0217, "step": 232700 }, { "epoch": 18.26, "learning_rate": 0.0005, "loss": 1.0232, "step": 232800 }, { "epoch": 18.26, "learning_rate": 0.0005, "loss": 1.017, "step": 232900 }, { "epoch": 18.27, "learning_rate": 0.0005, "loss": 1.0289, "step": 233000 }, { "epoch": 18.28, "learning_rate": 0.0005, "loss": 1.011, "step": 233100 }, { "epoch": 18.29, "learning_rate": 0.0005, "loss": 1.0222, "step": 233200 }, { "epoch": 18.3, "learning_rate": 0.0005, "loss": 1.0305, "step": 233300 }, { "epoch": 18.3, "learning_rate": 0.0005, "loss": 1.0203, "step": 233400 }, { "epoch": 18.31, "learning_rate": 0.0005, "loss": 1.0057, "step": 233500 }, { "epoch": 18.32, "learning_rate": 0.0005, "loss": 1.0363, "step": 233600 }, { "epoch": 18.33, "learning_rate": 0.0005, "loss": 1.0223, "step": 233700 }, { "epoch": 18.33, "learning_rate": 0.0005, "loss": 1.0201, "step": 233800 }, { "epoch": 18.34, "learning_rate": 0.0005, "loss": 1.0183, "step": 233900 }, { "epoch": 18.35, "learning_rate": 0.0005, "loss": 1.0363, "step": 234000 }, { "epoch": 18.36, "learning_rate": 0.0005, "loss": 1.0114, "step": 234100 }, { "epoch": 18.37, "learning_rate": 0.0005, "loss": 1.0234, "step": 234200 }, { "epoch": 18.37, "learning_rate": 0.0005, "loss": 1.0166, "step": 234300 }, { "epoch": 18.38, "learning_rate": 0.0005, "loss": 1.053, "step": 234400 }, { "epoch": 18.39, "learning_rate": 0.0005, "loss": 1.0298, "step": 234500 }, { "epoch": 18.4, "learning_rate": 0.0005, "loss": 1.0401, "step": 234600 }, { "epoch": 18.4, "learning_rate": 0.0005, "loss": 1.0265, "step": 234700 }, { "epoch": 18.41, "learning_rate": 0.0005, "loss": 1.0171, "step": 234800 }, { "epoch": 18.42, "learning_rate": 0.0005, "loss": 1.0059, "step": 234900 }, { "epoch": 18.43, "learning_rate": 0.0005, "loss": 1.043, "step": 235000 }, { "epoch": 18.44, "learning_rate": 0.0005, "loss": 1.0012, "step": 235100 }, { "epoch": 18.44, "learning_rate": 0.0005, "loss": 1.0426, "step": 235200 }, { "epoch": 18.45, "learning_rate": 0.0005, "loss": 1.0278, "step": 235300 }, { "epoch": 18.46, "learning_rate": 0.0005, "loss": 1.0228, "step": 235400 }, { "epoch": 18.47, "learning_rate": 0.0005, "loss": 1.0167, "step": 235500 }, { "epoch": 18.48, "learning_rate": 0.0005, "loss": 1.0007, "step": 235600 }, { "epoch": 18.48, "learning_rate": 0.0005, "loss": 1.0293, "step": 235700 }, { "epoch": 18.49, "learning_rate": 0.0005, "loss": 1.0294, "step": 235800 }, { "epoch": 18.5, "learning_rate": 0.0005, "loss": 1.0245, "step": 235900 }, { "epoch": 18.51, "learning_rate": 0.0005, "loss": 1.0381, "step": 236000 }, { "epoch": 18.51, "learning_rate": 0.0005, "loss": 1.0439, "step": 236100 }, { "epoch": 18.52, "learning_rate": 0.0005, "loss": 1.0417, "step": 236200 }, { "epoch": 18.53, "learning_rate": 0.0005, "loss": 1.0362, "step": 236300 }, { "epoch": 18.54, "learning_rate": 0.0005, "loss": 1.035, "step": 236400 }, { "epoch": 18.55, "learning_rate": 0.0005, "loss": 1.0478, "step": 236500 }, { "epoch": 18.55, "learning_rate": 0.0005, "loss": 1.023, "step": 236600 }, { "epoch": 18.56, "learning_rate": 0.0005, "loss": 1.0543, "step": 236700 }, { "epoch": 18.57, "learning_rate": 0.0005, "loss": 1.0427, "step": 236800 }, { "epoch": 18.58, "learning_rate": 0.0005, "loss": 1.0494, "step": 236900 }, { "epoch": 18.59, "learning_rate": 0.0005, "loss": 1.0253, "step": 237000 }, { "epoch": 18.59, "learning_rate": 0.0005, "loss": 1.074, "step": 237100 }, { "epoch": 18.6, "learning_rate": 0.0005, "loss": 1.043, "step": 237200 }, { "epoch": 18.61, "learning_rate": 0.0005, "loss": 1.0405, "step": 237300 }, { "epoch": 18.62, "learning_rate": 0.0005, "loss": 1.0566, "step": 237400 }, { "epoch": 18.62, "learning_rate": 0.0005, "loss": 1.037, "step": 237500 }, { "epoch": 18.63, "learning_rate": 0.0005, "loss": 1.0321, "step": 237600 }, { "epoch": 18.64, "learning_rate": 0.0005, "loss": 1.024, "step": 237700 }, { "epoch": 18.65, "learning_rate": 0.0005, "loss": 1.0432, "step": 237800 }, { "epoch": 18.66, "learning_rate": 0.0005, "loss": 1.0468, "step": 237900 }, { "epoch": 18.66, "learning_rate": 0.0005, "loss": 1.0686, "step": 238000 }, { "epoch": 18.67, "learning_rate": 0.0005, "loss": 1.0365, "step": 238100 }, { "epoch": 18.68, "learning_rate": 0.0005, "loss": 1.0464, "step": 238200 }, { "epoch": 18.69, "learning_rate": 0.0005, "loss": 1.0473, "step": 238300 }, { "epoch": 18.7, "learning_rate": 0.0005, "loss": 1.0364, "step": 238400 }, { "epoch": 18.7, "learning_rate": 0.0005, "loss": 1.0441, "step": 238500 }, { "epoch": 18.71, "learning_rate": 0.0005, "loss": 1.0376, "step": 238600 }, { "epoch": 18.72, "learning_rate": 0.0005, "loss": 1.0514, "step": 238700 }, { "epoch": 18.73, "learning_rate": 0.0005, "loss": 1.076, "step": 238800 }, { "epoch": 18.73, "learning_rate": 0.0005, "loss": 1.0455, "step": 238900 }, { "epoch": 18.74, "learning_rate": 0.0005, "loss": 1.0466, "step": 239000 }, { "epoch": 18.75, "learning_rate": 0.0005, "loss": 1.0354, "step": 239100 }, { "epoch": 18.76, "learning_rate": 0.0005, "loss": 1.0522, "step": 239200 }, { "epoch": 18.77, "learning_rate": 0.0005, "loss": 1.0421, "step": 239300 }, { "epoch": 18.77, "learning_rate": 0.0005, "loss": 1.0795, "step": 239400 }, { "epoch": 18.78, "learning_rate": 0.0005, "loss": 1.064, "step": 239500 }, { "epoch": 18.79, "learning_rate": 0.0005, "loss": 1.0534, "step": 239600 }, { "epoch": 18.8, "learning_rate": 0.0005, "loss": 1.0589, "step": 239700 }, { "epoch": 18.8, "learning_rate": 0.0005, "loss": 1.0534, "step": 239800 }, { "epoch": 18.81, "learning_rate": 0.0005, "loss": 1.0576, "step": 239900 }, { "epoch": 18.82, "learning_rate": 0.0005, "loss": 1.0384, "step": 240000 }, { "epoch": 18.82, "eval_gen_len": 18.7365586651364, "eval_loss": 2.430091142654419, "eval_rouge1": 35.7684, "eval_rouge2": 14.6816, "eval_rougeL": 29.457, "eval_rougeLsum": 29.4604, "eval_runtime": 342.3499, "eval_samples_per_second": 33.086, "eval_steps_per_second": 2.068, "step": 240000 }, { "epoch": 18.83, "learning_rate": 0.0005, "loss": 1.035, "step": 240100 }, { "epoch": 18.84, "learning_rate": 0.0005, "loss": 1.0457, "step": 240200 }, { "epoch": 18.84, "learning_rate": 0.0005, "loss": 1.0503, "step": 240300 }, { "epoch": 18.85, "learning_rate": 0.0005, "loss": 1.0483, "step": 240400 }, { "epoch": 18.86, "learning_rate": 0.0005, "loss": 1.0436, "step": 240500 }, { "epoch": 18.87, "learning_rate": 0.0005, "loss": 1.0271, "step": 240600 }, { "epoch": 18.88, "learning_rate": 0.0005, "loss": 1.0503, "step": 240700 }, { "epoch": 18.88, "learning_rate": 0.0005, "loss": 1.0274, "step": 240800 }, { "epoch": 18.89, "learning_rate": 0.0005, "loss": 1.0678, "step": 240900 }, { "epoch": 18.9, "learning_rate": 0.0005, "loss": 1.0538, "step": 241000 }, { "epoch": 18.91, "learning_rate": 0.0005, "loss": 1.0525, "step": 241100 }, { "epoch": 18.91, "learning_rate": 0.0005, "loss": 1.0766, "step": 241200 }, { "epoch": 18.92, "learning_rate": 0.0005, "loss": 1.0735, "step": 241300 }, { "epoch": 18.93, "learning_rate": 0.0005, "loss": 1.0399, "step": 241400 }, { "epoch": 18.94, "learning_rate": 0.0005, "loss": 1.058, "step": 241500 }, { "epoch": 18.95, "learning_rate": 0.0005, "loss": 1.0276, "step": 241600 }, { "epoch": 18.95, "learning_rate": 0.0005, "loss": 1.0593, "step": 241700 }, { "epoch": 18.96, "learning_rate": 0.0005, "loss": 1.0349, "step": 241800 }, { "epoch": 18.97, "learning_rate": 0.0005, "loss": 1.0605, "step": 241900 }, { "epoch": 18.98, "learning_rate": 0.0005, "loss": 1.0536, "step": 242000 }, { "epoch": 18.99, "learning_rate": 0.0005, "loss": 1.075, "step": 242100 }, { "epoch": 18.99, "learning_rate": 0.0005, "loss": 1.0602, "step": 242200 }, { "epoch": 19.0, "learning_rate": 0.0005, "loss": 1.0765, "step": 242300 }, { "epoch": 19.01, "learning_rate": 0.0005, "loss": 0.9739, "step": 242400 }, { "epoch": 19.02, "learning_rate": 0.0005, "loss": 0.9734, "step": 242500 }, { "epoch": 19.02, "learning_rate": 0.0005, "loss": 0.9762, "step": 242600 }, { "epoch": 19.03, "learning_rate": 0.0005, "loss": 0.941, "step": 242700 }, { "epoch": 19.04, "learning_rate": 0.0005, "loss": 1.0016, "step": 242800 }, { "epoch": 19.05, "learning_rate": 0.0005, "loss": 1.0044, "step": 242900 }, { "epoch": 19.06, "learning_rate": 0.0005, "loss": 0.9789, "step": 243000 }, { "epoch": 19.06, "learning_rate": 0.0005, "loss": 0.9885, "step": 243100 }, { "epoch": 19.07, "learning_rate": 0.0005, "loss": 0.9731, "step": 243200 }, { "epoch": 19.08, "learning_rate": 0.0005, "loss": 0.9897, "step": 243300 }, { "epoch": 19.09, "learning_rate": 0.0005, "loss": 0.9879, "step": 243400 }, { "epoch": 19.1, "learning_rate": 0.0005, "loss": 0.9734, "step": 243500 }, { "epoch": 19.1, "learning_rate": 0.0005, "loss": 0.9783, "step": 243600 }, { "epoch": 19.11, "learning_rate": 0.0005, "loss": 0.9865, "step": 243700 }, { "epoch": 19.12, "learning_rate": 0.0005, "loss": 0.9979, "step": 243800 }, { "epoch": 19.13, "learning_rate": 0.0005, "loss": 1.0051, "step": 243900 }, { "epoch": 19.13, "learning_rate": 0.0005, "loss": 1.0012, "step": 244000 }, { "epoch": 19.14, "learning_rate": 0.0005, "loss": 0.9992, "step": 244100 }, { "epoch": 19.15, "learning_rate": 0.0005, "loss": 1.0128, "step": 244200 }, { "epoch": 19.16, "learning_rate": 0.0005, "loss": 0.9674, "step": 244300 }, { "epoch": 19.17, "learning_rate": 0.0005, "loss": 0.9806, "step": 244400 }, { "epoch": 19.17, "learning_rate": 0.0005, "loss": 1.008, "step": 244500 }, { "epoch": 19.18, "learning_rate": 0.0005, "loss": 0.9852, "step": 244600 }, { "epoch": 19.19, "learning_rate": 0.0005, "loss": 1.0044, "step": 244700 }, { "epoch": 19.2, "learning_rate": 0.0005, "loss": 0.9852, "step": 244800 }, { "epoch": 19.2, "learning_rate": 0.0005, "loss": 1.0055, "step": 244900 }, { "epoch": 19.21, "learning_rate": 0.0005, "loss": 1.0117, "step": 245000 }, { "epoch": 19.22, "learning_rate": 0.0005, "loss": 1.005, "step": 245100 }, { "epoch": 19.23, "learning_rate": 0.0005, "loss": 0.9853, "step": 245200 }, { "epoch": 19.24, "learning_rate": 0.0005, "loss": 1.0087, "step": 245300 }, { "epoch": 19.24, "learning_rate": 0.0005, "loss": 0.992, "step": 245400 }, { "epoch": 19.25, "learning_rate": 0.0005, "loss": 1.013, "step": 245500 }, { "epoch": 19.26, "learning_rate": 0.0005, "loss": 0.9937, "step": 245600 }, { "epoch": 19.27, "learning_rate": 0.0005, "loss": 1.0154, "step": 245700 }, { "epoch": 19.28, "learning_rate": 0.0005, "loss": 0.9788, "step": 245800 }, { "epoch": 19.28, "learning_rate": 0.0005, "loss": 1.0055, "step": 245900 }, { "epoch": 19.29, "learning_rate": 0.0005, "loss": 1.0161, "step": 246000 }, { "epoch": 19.3, "learning_rate": 0.0005, "loss": 1.0108, "step": 246100 }, { "epoch": 19.31, "learning_rate": 0.0005, "loss": 0.9975, "step": 246200 }, { "epoch": 19.31, "learning_rate": 0.0005, "loss": 0.9924, "step": 246300 }, { "epoch": 19.32, "learning_rate": 0.0005, "loss": 0.9959, "step": 246400 }, { "epoch": 19.33, "learning_rate": 0.0005, "loss": 0.9903, "step": 246500 }, { "epoch": 19.34, "learning_rate": 0.0005, "loss": 1.0071, "step": 246600 }, { "epoch": 19.35, "learning_rate": 0.0005, "loss": 1.011, "step": 246700 }, { "epoch": 19.35, "learning_rate": 0.0005, "loss": 1.0049, "step": 246800 }, { "epoch": 19.36, "learning_rate": 0.0005, "loss": 1.013, "step": 246900 }, { "epoch": 19.37, "learning_rate": 0.0005, "loss": 1.0137, "step": 247000 }, { "epoch": 19.38, "learning_rate": 0.0005, "loss": 1.0292, "step": 247100 }, { "epoch": 19.39, "learning_rate": 0.0005, "loss": 1.0282, "step": 247200 }, { "epoch": 19.39, "learning_rate": 0.0005, "loss": 1.0087, "step": 247300 }, { "epoch": 19.4, "learning_rate": 0.0005, "loss": 1.0116, "step": 247400 }, { "epoch": 19.41, "learning_rate": 0.0005, "loss": 1.0191, "step": 247500 }, { "epoch": 19.42, "learning_rate": 0.0005, "loss": 1.0034, "step": 247600 }, { "epoch": 19.42, "learning_rate": 0.0005, "loss": 0.9946, "step": 247700 }, { "epoch": 19.43, "learning_rate": 0.0005, "loss": 1.0317, "step": 247800 }, { "epoch": 19.44, "learning_rate": 0.0005, "loss": 1.0143, "step": 247900 }, { "epoch": 19.45, "learning_rate": 0.0005, "loss": 1.0393, "step": 248000 }, { "epoch": 19.46, "learning_rate": 0.0005, "loss": 1.0203, "step": 248100 }, { "epoch": 19.46, "learning_rate": 0.0005, "loss": 1.0123, "step": 248200 }, { "epoch": 19.47, "learning_rate": 0.0005, "loss": 1.0127, "step": 248300 }, { "epoch": 19.48, "learning_rate": 0.0005, "loss": 1.017, "step": 248400 }, { "epoch": 19.49, "learning_rate": 0.0005, "loss": 1.0104, "step": 248500 }, { "epoch": 19.49, "learning_rate": 0.0005, "loss": 1.0166, "step": 248600 }, { "epoch": 19.5, "learning_rate": 0.0005, "loss": 1.0118, "step": 248700 }, { "epoch": 19.51, "learning_rate": 0.0005, "loss": 0.9998, "step": 248800 }, { "epoch": 19.52, "learning_rate": 0.0005, "loss": 1.0286, "step": 248900 }, { "epoch": 19.53, "learning_rate": 0.0005, "loss": 1.0186, "step": 249000 }, { "epoch": 19.53, "learning_rate": 0.0005, "loss": 1.0343, "step": 249100 }, { "epoch": 19.54, "learning_rate": 0.0005, "loss": 1.0138, "step": 249200 }, { "epoch": 19.55, "learning_rate": 0.0005, "loss": 1.0173, "step": 249300 }, { "epoch": 19.56, "learning_rate": 0.0005, "loss": 1.0029, "step": 249400 }, { "epoch": 19.57, "learning_rate": 0.0005, "loss": 0.9903, "step": 249500 }, { "epoch": 19.57, "learning_rate": 0.0005, "loss": 1.0233, "step": 249600 }, { "epoch": 19.58, "learning_rate": 0.0005, "loss": 1.0258, "step": 249700 }, { "epoch": 19.59, "learning_rate": 0.0005, "loss": 1.0016, "step": 249800 }, { "epoch": 19.6, "learning_rate": 0.0005, "loss": 1.0227, "step": 249900 }, { "epoch": 19.6, "learning_rate": 0.0005, "loss": 1.0359, "step": 250000 }, { "epoch": 19.6, "eval_gen_len": 18.751302198287277, "eval_loss": 2.4243288040161133, "eval_rouge1": 35.3752, "eval_rouge2": 14.4405, "eval_rougeL": 29.2303, "eval_rougeLsum": 29.2273, "eval_runtime": 342.217, "eval_samples_per_second": 33.099, "eval_steps_per_second": 2.069, "step": 250000 }, { "epoch": 19.61, "learning_rate": 0.0005, "loss": 1.0146, "step": 250100 }, { "epoch": 19.62, "learning_rate": 0.0005, "loss": 1.0322, "step": 250200 }, { "epoch": 19.63, "learning_rate": 0.0005, "loss": 1.0361, "step": 250300 }, { "epoch": 19.64, "learning_rate": 0.0005, "loss": 1.0092, "step": 250400 }, { "epoch": 19.64, "learning_rate": 0.0005, "loss": 1.0137, "step": 250500 }, { "epoch": 19.65, "learning_rate": 0.0005, "loss": 1.0234, "step": 250600 }, { "epoch": 19.66, "learning_rate": 0.0005, "loss": 1.0318, "step": 250700 }, { "epoch": 19.67, "learning_rate": 0.0005, "loss": 1.0336, "step": 250800 }, { "epoch": 19.68, "learning_rate": 0.0005, "loss": 1.0132, "step": 250900 }, { "epoch": 19.68, "learning_rate": 0.0005, "loss": 1.0267, "step": 251000 }, { "epoch": 19.69, "learning_rate": 0.0005, "loss": 1.018, "step": 251100 }, { "epoch": 19.7, "learning_rate": 0.0005, "loss": 1.0231, "step": 251200 }, { "epoch": 19.71, "learning_rate": 0.0005, "loss": 1.0028, "step": 251300 }, { "epoch": 19.71, "learning_rate": 0.0005, "loss": 1.0123, "step": 251400 }, { "epoch": 19.72, "learning_rate": 0.0005, "loss": 1.0257, "step": 251500 }, { "epoch": 19.73, "learning_rate": 0.0005, "loss": 1.013, "step": 251600 }, { "epoch": 19.74, "learning_rate": 0.0005, "loss": 1.0356, "step": 251700 }, { "epoch": 19.75, "learning_rate": 0.0005, "loss": 1.0293, "step": 251800 }, { "epoch": 19.75, "learning_rate": 0.0005, "loss": 1.0391, "step": 251900 }, { "epoch": 19.76, "learning_rate": 0.0005, "loss": 1.0325, "step": 252000 }, { "epoch": 19.77, "learning_rate": 0.0005, "loss": 1.0476, "step": 252100 }, { "epoch": 19.78, "learning_rate": 0.0005, "loss": 1.0539, "step": 252200 }, { "epoch": 19.79, "learning_rate": 0.0005, "loss": 1.0288, "step": 252300 }, { "epoch": 19.79, "learning_rate": 0.0005, "loss": 1.0317, "step": 252400 }, { "epoch": 19.8, "learning_rate": 0.0005, "loss": 1.0352, "step": 252500 }, { "epoch": 19.81, "learning_rate": 0.0005, "loss": 1.0252, "step": 252600 }, { "epoch": 19.82, "learning_rate": 0.0005, "loss": 1.0328, "step": 252700 }, { "epoch": 19.82, "learning_rate": 0.0005, "loss": 1.0312, "step": 252800 }, { "epoch": 19.83, "learning_rate": 0.0005, "loss": 1.0343, "step": 252900 }, { "epoch": 19.84, "learning_rate": 0.0005, "loss": 1.0557, "step": 253000 }, { "epoch": 19.85, "learning_rate": 0.0005, "loss": 1.019, "step": 253100 }, { "epoch": 19.86, "learning_rate": 0.0005, "loss": 1.0414, "step": 253200 }, { "epoch": 19.86, "learning_rate": 0.0005, "loss": 1.0328, "step": 253300 }, { "epoch": 19.87, "learning_rate": 0.0005, "loss": 1.0358, "step": 253400 }, { "epoch": 19.88, "learning_rate": 0.0005, "loss": 1.0557, "step": 253500 }, { "epoch": 19.89, "learning_rate": 0.0005, "loss": 1.0232, "step": 253600 }, { "epoch": 19.89, "learning_rate": 0.0005, "loss": 1.0468, "step": 253700 }, { "epoch": 19.9, "learning_rate": 0.0005, "loss": 1.0587, "step": 253800 }, { "epoch": 19.91, "learning_rate": 0.0005, "loss": 1.0396, "step": 253900 }, { "epoch": 19.92, "learning_rate": 0.0005, "loss": 1.0713, "step": 254000 }, { "epoch": 19.93, "learning_rate": 0.0005, "loss": 1.0377, "step": 254100 }, { "epoch": 19.93, "learning_rate": 0.0005, "loss": 1.0257, "step": 254200 }, { "epoch": 19.94, "learning_rate": 0.0005, "loss": 1.0425, "step": 254300 }, { "epoch": 19.95, "learning_rate": 0.0005, "loss": 1.0259, "step": 254400 }, { "epoch": 19.96, "learning_rate": 0.0005, "loss": 1.0423, "step": 254500 }, { "epoch": 19.97, "learning_rate": 0.0005, "loss": 1.0305, "step": 254600 }, { "epoch": 19.97, "learning_rate": 0.0005, "loss": 1.0248, "step": 254700 }, { "epoch": 19.98, "learning_rate": 0.0005, "loss": 1.0344, "step": 254800 }, { "epoch": 19.99, "learning_rate": 0.0005, "loss": 1.0227, "step": 254900 }, { "epoch": 20.0, "learning_rate": 0.0005, "loss": 1.0442, "step": 255000 }, { "epoch": 20.0, "learning_rate": 0.0005, "loss": 0.9903, "step": 255100 }, { "epoch": 20.01, "learning_rate": 0.0005, "loss": 0.9586, "step": 255200 }, { "epoch": 20.02, "learning_rate": 0.0005, "loss": 0.9437, "step": 255300 }, { "epoch": 20.03, "learning_rate": 0.0005, "loss": 0.9741, "step": 255400 }, { "epoch": 20.04, "learning_rate": 0.0005, "loss": 0.9624, "step": 255500 }, { "epoch": 20.04, "learning_rate": 0.0005, "loss": 0.9602, "step": 255600 }, { "epoch": 20.05, "learning_rate": 0.0005, "loss": 0.9789, "step": 255700 }, { "epoch": 20.06, "learning_rate": 0.0005, "loss": 0.9414, "step": 255800 }, { "epoch": 20.07, "learning_rate": 0.0005, "loss": 0.9911, "step": 255900 }, { "epoch": 20.08, "learning_rate": 0.0005, "loss": 0.9909, "step": 256000 }, { "epoch": 20.08, "learning_rate": 0.0005, "loss": 0.9742, "step": 256100 }, { "epoch": 20.09, "learning_rate": 0.0005, "loss": 0.9853, "step": 256200 }, { "epoch": 20.1, "learning_rate": 0.0005, "loss": 0.9748, "step": 256300 }, { "epoch": 20.11, "learning_rate": 0.0005, "loss": 0.9825, "step": 256400 }, { "epoch": 20.11, "learning_rate": 0.0005, "loss": 0.9797, "step": 256500 }, { "epoch": 20.12, "learning_rate": 0.0005, "loss": 0.9804, "step": 256600 }, { "epoch": 20.13, "learning_rate": 0.0005, "loss": 0.9567, "step": 256700 }, { "epoch": 20.14, "learning_rate": 0.0005, "loss": 0.9746, "step": 256800 }, { "epoch": 20.15, "learning_rate": 0.0005, "loss": 0.9547, "step": 256900 }, { "epoch": 20.15, "learning_rate": 0.0005, "loss": 0.9787, "step": 257000 }, { "epoch": 20.16, "learning_rate": 0.0005, "loss": 0.9768, "step": 257100 }, { "epoch": 20.17, "learning_rate": 0.0005, "loss": 0.9795, "step": 257200 }, { "epoch": 20.18, "learning_rate": 0.0005, "loss": 0.9594, "step": 257300 }, { "epoch": 20.19, "learning_rate": 0.0005, "loss": 0.9685, "step": 257400 }, { "epoch": 20.19, "learning_rate": 0.0005, "loss": 0.9883, "step": 257500 }, { "epoch": 20.2, "learning_rate": 0.0005, "loss": 0.9843, "step": 257600 }, { "epoch": 20.21, "learning_rate": 0.0005, "loss": 0.9766, "step": 257700 }, { "epoch": 20.22, "learning_rate": 0.0005, "loss": 0.9899, "step": 257800 }, { "epoch": 20.22, "learning_rate": 0.0005, "loss": 0.9939, "step": 257900 }, { "epoch": 20.23, "learning_rate": 0.0005, "loss": 0.9716, "step": 258000 }, { "epoch": 20.24, "learning_rate": 0.0005, "loss": 0.9831, "step": 258100 }, { "epoch": 20.25, "learning_rate": 0.0005, "loss": 0.9773, "step": 258200 }, { "epoch": 20.26, "learning_rate": 0.0005, "loss": 0.9931, "step": 258300 }, { "epoch": 20.26, "learning_rate": 0.0005, "loss": 0.9704, "step": 258400 }, { "epoch": 20.27, "learning_rate": 0.0005, "loss": 0.9973, "step": 258500 }, { "epoch": 20.28, "learning_rate": 0.0005, "loss": 0.972, "step": 258600 }, { "epoch": 20.29, "learning_rate": 0.0005, "loss": 0.9754, "step": 258700 }, { "epoch": 20.29, "learning_rate": 0.0005, "loss": 0.9839, "step": 258800 }, { "epoch": 20.3, "learning_rate": 0.0005, "loss": 0.9971, "step": 258900 }, { "epoch": 20.31, "learning_rate": 0.0005, "loss": 0.9784, "step": 259000 }, { "epoch": 20.32, "learning_rate": 0.0005, "loss": 0.9969, "step": 259100 }, { "epoch": 20.33, "learning_rate": 0.0005, "loss": 0.974, "step": 259200 }, { "epoch": 20.33, "learning_rate": 0.0005, "loss": 0.9913, "step": 259300 }, { "epoch": 20.34, "learning_rate": 0.0005, "loss": 1.0049, "step": 259400 }, { "epoch": 20.35, "learning_rate": 0.0005, "loss": 0.9758, "step": 259500 }, { "epoch": 20.36, "learning_rate": 0.0005, "loss": 0.9923, "step": 259600 }, { "epoch": 20.37, "learning_rate": 0.0005, "loss": 0.9768, "step": 259700 }, { "epoch": 20.37, "learning_rate": 0.0005, "loss": 0.9961, "step": 259800 }, { "epoch": 20.38, "learning_rate": 0.0005, "loss": 0.9883, "step": 259900 }, { "epoch": 20.39, "learning_rate": 0.0005, "loss": 0.9967, "step": 260000 }, { "epoch": 20.39, "eval_gen_len": 18.706630175686414, "eval_loss": 2.452528476715088, "eval_rouge1": 35.327, "eval_rouge2": 14.3972, "eval_rougeL": 29.2321, "eval_rougeLsum": 29.2316, "eval_runtime": 343.2241, "eval_samples_per_second": 33.002, "eval_steps_per_second": 2.063, "step": 260000 }, { "epoch": 20.4, "learning_rate": 0.0005, "loss": 0.9936, "step": 260100 }, { "epoch": 20.4, "learning_rate": 0.0005, "loss": 0.9856, "step": 260200 }, { "epoch": 20.41, "learning_rate": 0.0005, "loss": 0.9917, "step": 260300 }, { "epoch": 20.42, "learning_rate": 0.0005, "loss": 0.9936, "step": 260400 }, { "epoch": 20.43, "learning_rate": 0.0005, "loss": 0.9985, "step": 260500 }, { "epoch": 20.44, "learning_rate": 0.0005, "loss": 1.0116, "step": 260600 }, { "epoch": 20.44, "learning_rate": 0.0005, "loss": 1.0058, "step": 260700 }, { "epoch": 20.45, "learning_rate": 0.0005, "loss": 0.998, "step": 260800 }, { "epoch": 20.46, "learning_rate": 0.0005, "loss": 1.0119, "step": 260900 }, { "epoch": 20.47, "learning_rate": 0.0005, "loss": 1.0024, "step": 261000 }, { "epoch": 20.48, "learning_rate": 0.0005, "loss": 0.9831, "step": 261100 }, { "epoch": 20.48, "learning_rate": 0.0005, "loss": 0.998, "step": 261200 }, { "epoch": 20.49, "learning_rate": 0.0005, "loss": 1.0103, "step": 261300 }, { "epoch": 20.5, "learning_rate": 0.0005, "loss": 0.9915, "step": 261400 }, { "epoch": 20.51, "learning_rate": 0.0005, "loss": 0.9759, "step": 261500 }, { "epoch": 20.51, "learning_rate": 0.0005, "loss": 0.9999, "step": 261600 }, { "epoch": 20.52, "learning_rate": 0.0005, "loss": 0.9837, "step": 261700 }, { "epoch": 20.53, "learning_rate": 0.0005, "loss": 1.0255, "step": 261800 }, { "epoch": 20.54, "learning_rate": 0.0005, "loss": 0.9982, "step": 261900 }, { "epoch": 20.55, "learning_rate": 0.0005, "loss": 1.0194, "step": 262000 }, { "epoch": 20.55, "learning_rate": 0.0005, "loss": 0.9831, "step": 262100 }, { "epoch": 20.56, "learning_rate": 0.0005, "loss": 1.0041, "step": 262200 }, { "epoch": 20.57, "learning_rate": 0.0005, "loss": 1.0116, "step": 262300 }, { "epoch": 20.58, "learning_rate": 0.0005, "loss": 0.9942, "step": 262400 }, { "epoch": 20.59, "learning_rate": 0.0005, "loss": 1.0259, "step": 262500 }, { "epoch": 20.59, "learning_rate": 0.0005, "loss": 1.0009, "step": 262600 }, { "epoch": 20.6, "learning_rate": 0.0005, "loss": 0.9986, "step": 262700 }, { "epoch": 20.61, "learning_rate": 0.0005, "loss": 0.9902, "step": 262800 }, { "epoch": 20.62, "learning_rate": 0.0005, "loss": 1.0143, "step": 262900 }, { "epoch": 20.62, "learning_rate": 0.0005, "loss": 1.0043, "step": 263000 }, { "epoch": 20.63, "learning_rate": 0.0005, "loss": 0.9882, "step": 263100 }, { "epoch": 20.64, "learning_rate": 0.0005, "loss": 1.0132, "step": 263200 }, { "epoch": 20.65, "learning_rate": 0.0005, "loss": 0.9972, "step": 263300 }, { "epoch": 20.66, "learning_rate": 0.0005, "loss": 1.0014, "step": 263400 }, { "epoch": 20.66, "learning_rate": 0.0005, "loss": 1.0263, "step": 263500 }, { "epoch": 20.67, "learning_rate": 0.0005, "loss": 0.9992, "step": 263600 }, { "epoch": 20.68, "learning_rate": 0.0005, "loss": 1.0181, "step": 263700 }, { "epoch": 20.69, "learning_rate": 0.0005, "loss": 1.0116, "step": 263800 }, { "epoch": 20.69, "learning_rate": 0.0005, "loss": 1.0202, "step": 263900 }, { "epoch": 20.7, "learning_rate": 0.0005, "loss": 1.0037, "step": 264000 }, { "epoch": 20.71, "learning_rate": 0.0005, "loss": 1.0129, "step": 264100 }, { "epoch": 20.72, "learning_rate": 0.0005, "loss": 0.988, "step": 264200 }, { "epoch": 20.73, "learning_rate": 0.0005, "loss": 1.0108, "step": 264300 }, { "epoch": 20.73, "learning_rate": 0.0005, "loss": 1.007, "step": 264400 }, { "epoch": 20.74, "learning_rate": 0.0005, "loss": 1.0244, "step": 264500 }, { "epoch": 20.75, "learning_rate": 0.0005, "loss": 1.006, "step": 264600 }, { "epoch": 20.76, "learning_rate": 0.0005, "loss": 1.0086, "step": 264700 }, { "epoch": 20.77, "learning_rate": 0.0005, "loss": 1.0056, "step": 264800 }, { "epoch": 20.77, "learning_rate": 0.0005, "loss": 0.9948, "step": 264900 }, { "epoch": 20.78, "learning_rate": 0.0005, "loss": 0.9973, "step": 265000 }, { "epoch": 20.79, "learning_rate": 0.0005, "loss": 1.0202, "step": 265100 }, { "epoch": 20.8, "learning_rate": 0.0005, "loss": 1.0289, "step": 265200 }, { "epoch": 20.8, "learning_rate": 0.0005, "loss": 1.004, "step": 265300 }, { "epoch": 20.81, "learning_rate": 0.0005, "loss": 1.0016, "step": 265400 }, { "epoch": 20.82, "learning_rate": 0.0005, "loss": 1.0172, "step": 265500 }, { "epoch": 20.83, "learning_rate": 0.0005, "loss": 1.0211, "step": 265600 }, { "epoch": 20.84, "learning_rate": 0.0005, "loss": 1.022, "step": 265700 }, { "epoch": 20.84, "learning_rate": 0.0005, "loss": 1.0195, "step": 265800 }, { "epoch": 20.85, "learning_rate": 0.0005, "loss": 1.0249, "step": 265900 }, { "epoch": 20.86, "learning_rate": 0.0005, "loss": 1.0184, "step": 266000 }, { "epoch": 20.87, "learning_rate": 0.0005, "loss": 1.0103, "step": 266100 }, { "epoch": 20.88, "learning_rate": 0.0005, "loss": 1.0382, "step": 266200 }, { "epoch": 20.88, "learning_rate": 0.0005, "loss": 1.01, "step": 266300 }, { "epoch": 20.89, "learning_rate": 0.0005, "loss": 1.0226, "step": 266400 }, { "epoch": 20.9, "learning_rate": 0.0005, "loss": 1.0214, "step": 266500 }, { "epoch": 20.91, "learning_rate": 0.0005, "loss": 1.0136, "step": 266600 }, { "epoch": 20.91, "learning_rate": 0.0005, "loss": 1.0151, "step": 266700 }, { "epoch": 20.92, "learning_rate": 0.0005, "loss": 1.037, "step": 266800 }, { "epoch": 20.93, "learning_rate": 0.0005, "loss": 1.0292, "step": 266900 }, { "epoch": 20.94, "learning_rate": 0.0005, "loss": 1.0106, "step": 267000 }, { "epoch": 20.95, "learning_rate": 0.0005, "loss": 1.0214, "step": 267100 }, { "epoch": 20.95, "learning_rate": 0.0005, "loss": 1.0186, "step": 267200 }, { "epoch": 20.96, "learning_rate": 0.0005, "loss": 1.0375, "step": 267300 }, { "epoch": 20.97, "learning_rate": 0.0005, "loss": 1.0222, "step": 267400 }, { "epoch": 20.98, "learning_rate": 0.0005, "loss": 1.0314, "step": 267500 }, { "epoch": 20.98, "learning_rate": 0.0005, "loss": 1.0544, "step": 267600 }, { "epoch": 20.99, "learning_rate": 0.0005, "loss": 1.0301, "step": 267700 }, { "epoch": 21.0, "learning_rate": 0.0005, "loss": 1.0324, "step": 267800 }, { "epoch": 21.01, "learning_rate": 0.0005, "loss": 0.9279, "step": 267900 }, { "epoch": 21.02, "learning_rate": 0.0005, "loss": 0.9454, "step": 268000 }, { "epoch": 21.02, "learning_rate": 0.0005, "loss": 0.9487, "step": 268100 }, { "epoch": 21.03, "learning_rate": 0.0005, "loss": 0.9441, "step": 268200 }, { "epoch": 21.04, "learning_rate": 0.0005, "loss": 0.9371, "step": 268300 }, { "epoch": 21.05, "learning_rate": 0.0005, "loss": 0.9406, "step": 268400 }, { "epoch": 21.06, "learning_rate": 0.0005, "loss": 0.9463, "step": 268500 }, { "epoch": 21.06, "learning_rate": 0.0005, "loss": 0.9527, "step": 268600 }, { "epoch": 21.07, "learning_rate": 0.0005, "loss": 0.9422, "step": 268700 }, { "epoch": 21.08, "learning_rate": 0.0005, "loss": 0.9628, "step": 268800 }, { "epoch": 21.09, "learning_rate": 0.0005, "loss": 0.9451, "step": 268900 }, { "epoch": 21.09, "learning_rate": 0.0005, "loss": 0.9577, "step": 269000 }, { "epoch": 21.1, "learning_rate": 0.0005, "loss": 0.9565, "step": 269100 }, { "epoch": 21.11, "learning_rate": 0.0005, "loss": 0.9538, "step": 269200 }, { "epoch": 21.12, "learning_rate": 0.0005, "loss": 0.9585, "step": 269300 }, { "epoch": 21.13, "learning_rate": 0.0005, "loss": 0.9591, "step": 269400 }, { "epoch": 21.13, "learning_rate": 0.0005, "loss": 0.9521, "step": 269500 }, { "epoch": 21.14, "learning_rate": 0.0005, "loss": 0.9673, "step": 269600 }, { "epoch": 21.15, "learning_rate": 0.0005, "loss": 0.9683, "step": 269700 }, { "epoch": 21.16, "learning_rate": 0.0005, "loss": 0.9678, "step": 269800 }, { "epoch": 21.17, "learning_rate": 0.0005, "loss": 0.9586, "step": 269900 }, { "epoch": 21.17, "learning_rate": 0.0005, "loss": 0.9795, "step": 270000 }, { "epoch": 21.17, "eval_gen_len": 18.693564050498807, "eval_loss": 2.467229127883911, "eval_rouge1": 35.4965, "eval_rouge2": 14.6108, "eval_rougeL": 29.35, "eval_rougeLsum": 29.3506, "eval_runtime": 343.7678, "eval_samples_per_second": 32.95, "eval_steps_per_second": 2.06, "step": 270000 }, { "epoch": 21.18, "learning_rate": 0.0005, "loss": 0.9695, "step": 270100 }, { "epoch": 21.19, "learning_rate": 0.0005, "loss": 0.9805, "step": 270200 }, { "epoch": 21.2, "learning_rate": 0.0005, "loss": 0.9727, "step": 270300 }, { "epoch": 21.2, "learning_rate": 0.0005, "loss": 0.9616, "step": 270400 }, { "epoch": 21.21, "learning_rate": 0.0005, "loss": 0.9516, "step": 270500 }, { "epoch": 21.22, "learning_rate": 0.0005, "loss": 0.9587, "step": 270600 }, { "epoch": 21.23, "learning_rate": 0.0005, "loss": 0.9734, "step": 270700 }, { "epoch": 21.24, "learning_rate": 0.0005, "loss": 0.986, "step": 270800 }, { "epoch": 21.24, "learning_rate": 0.0005, "loss": 0.9704, "step": 270900 }, { "epoch": 21.25, "learning_rate": 0.0005, "loss": 0.9682, "step": 271000 }, { "epoch": 21.26, "learning_rate": 0.0005, "loss": 0.9564, "step": 271100 }, { "epoch": 21.27, "learning_rate": 0.0005, "loss": 0.9675, "step": 271200 }, { "epoch": 21.28, "learning_rate": 0.0005, "loss": 0.9716, "step": 271300 }, { "epoch": 21.28, "learning_rate": 0.0005, "loss": 0.98, "step": 271400 }, { "epoch": 21.29, "learning_rate": 0.0005, "loss": 0.9725, "step": 271500 }, { "epoch": 21.3, "learning_rate": 0.0005, "loss": 0.9896, "step": 271600 }, { "epoch": 21.31, "learning_rate": 0.0005, "loss": 0.9696, "step": 271700 }, { "epoch": 21.31, "learning_rate": 0.0005, "loss": 0.9725, "step": 271800 }, { "epoch": 21.32, "learning_rate": 0.0005, "loss": 0.9607, "step": 271900 }, { "epoch": 21.33, "learning_rate": 0.0005, "loss": 0.9783, "step": 272000 }, { "epoch": 21.34, "learning_rate": 0.0005, "loss": 0.9735, "step": 272100 }, { "epoch": 21.35, "learning_rate": 0.0005, "loss": 0.9913, "step": 272200 }, { "epoch": 21.35, "learning_rate": 0.0005, "loss": 0.97, "step": 272300 }, { "epoch": 21.36, "learning_rate": 0.0005, "loss": 0.9813, "step": 272400 }, { "epoch": 21.37, "learning_rate": 0.0005, "loss": 0.9614, "step": 272500 }, { "epoch": 21.38, "learning_rate": 0.0005, "loss": 0.9659, "step": 272600 }, { "epoch": 21.38, "learning_rate": 0.0005, "loss": 0.9846, "step": 272700 }, { "epoch": 21.39, "learning_rate": 0.0005, "loss": 0.9823, "step": 272800 }, { "epoch": 21.4, "learning_rate": 0.0005, "loss": 0.9821, "step": 272900 }, { "epoch": 21.41, "learning_rate": 0.0005, "loss": 0.9821, "step": 273000 }, { "epoch": 21.42, "learning_rate": 0.0005, "loss": 0.9652, "step": 273100 }, { "epoch": 21.42, "learning_rate": 0.0005, "loss": 0.998, "step": 273200 }, { "epoch": 21.43, "learning_rate": 0.0005, "loss": 0.9841, "step": 273300 }, { "epoch": 21.44, "learning_rate": 0.0005, "loss": 0.9719, "step": 273400 }, { "epoch": 21.45, "learning_rate": 0.0005, "loss": 0.9823, "step": 273500 }, { "epoch": 21.46, "learning_rate": 0.0005, "loss": 0.9653, "step": 273600 }, { "epoch": 21.46, "learning_rate": 0.0005, "loss": 0.9888, "step": 273700 }, { "epoch": 21.47, "learning_rate": 0.0005, "loss": 0.985, "step": 273800 }, { "epoch": 21.48, "learning_rate": 0.0005, "loss": 0.9825, "step": 273900 }, { "epoch": 21.49, "learning_rate": 0.0005, "loss": 0.9731, "step": 274000 }, { "epoch": 21.49, "learning_rate": 0.0005, "loss": 0.973, "step": 274100 }, { "epoch": 21.5, "learning_rate": 0.0005, "loss": 0.9733, "step": 274200 }, { "epoch": 21.51, "learning_rate": 0.0005, "loss": 0.9774, "step": 274300 }, { "epoch": 21.52, "learning_rate": 0.0005, "loss": 0.9793, "step": 274400 }, { "epoch": 21.53, "learning_rate": 0.0005, "loss": 0.9911, "step": 274500 }, { "epoch": 21.53, "learning_rate": 0.0005, "loss": 0.9896, "step": 274600 }, { "epoch": 21.54, "learning_rate": 0.0005, "loss": 0.9881, "step": 274700 }, { "epoch": 21.55, "learning_rate": 0.0005, "loss": 0.9874, "step": 274800 }, { "epoch": 21.56, "learning_rate": 0.0005, "loss": 0.9969, "step": 274900 }, { "epoch": 21.57, "learning_rate": 0.0005, "loss": 0.9889, "step": 275000 }, { "epoch": 21.57, "learning_rate": 0.0005, "loss": 0.9803, "step": 275100 }, { "epoch": 21.58, "learning_rate": 0.0005, "loss": 0.9862, "step": 275200 }, { "epoch": 21.59, "learning_rate": 0.0005, "loss": 1.0038, "step": 275300 }, { "epoch": 21.6, "learning_rate": 0.0005, "loss": 0.9891, "step": 275400 }, { "epoch": 21.6, "learning_rate": 0.0005, "loss": 0.9918, "step": 275500 }, { "epoch": 21.61, "learning_rate": 0.0005, "loss": 1.0091, "step": 275600 }, { "epoch": 21.62, "learning_rate": 0.0005, "loss": 0.9937, "step": 275700 }, { "epoch": 21.63, "learning_rate": 0.0005, "loss": 0.9942, "step": 275800 }, { "epoch": 21.64, "learning_rate": 0.0005, "loss": 0.9806, "step": 275900 }, { "epoch": 21.64, "learning_rate": 0.0005, "loss": 0.9961, "step": 276000 }, { "epoch": 21.65, "learning_rate": 0.0005, "loss": 0.9939, "step": 276100 }, { "epoch": 21.66, "learning_rate": 0.0005, "loss": 1.003, "step": 276200 }, { "epoch": 21.67, "learning_rate": 0.0005, "loss": 0.9841, "step": 276300 }, { "epoch": 21.68, "learning_rate": 0.0005, "loss": 0.9813, "step": 276400 }, { "epoch": 21.68, "learning_rate": 0.0005, "loss": 0.9821, "step": 276500 }, { "epoch": 21.69, "learning_rate": 0.0005, "loss": 0.9906, "step": 276600 }, { "epoch": 21.7, "learning_rate": 0.0005, "loss": 0.999, "step": 276700 }, { "epoch": 21.71, "learning_rate": 0.0005, "loss": 1.0202, "step": 276800 }, { "epoch": 21.71, "learning_rate": 0.0005, "loss": 0.9839, "step": 276900 }, { "epoch": 21.72, "learning_rate": 0.0005, "loss": 0.9964, "step": 277000 }, { "epoch": 21.73, "learning_rate": 0.0005, "loss": 1.0038, "step": 277100 }, { "epoch": 21.74, "learning_rate": 0.0005, "loss": 1.0373, "step": 277200 }, { "epoch": 21.75, "learning_rate": 0.0005, "loss": 0.9909, "step": 277300 }, { "epoch": 21.75, "learning_rate": 0.0005, "loss": 1.002, "step": 277400 }, { "epoch": 21.76, "learning_rate": 0.0005, "loss": 0.9934, "step": 277500 }, { "epoch": 21.77, "learning_rate": 0.0005, "loss": 0.9949, "step": 277600 }, { "epoch": 21.78, "learning_rate": 0.0005, "loss": 0.9884, "step": 277700 }, { "epoch": 21.78, "learning_rate": 0.0005, "loss": 1.0014, "step": 277800 }, { "epoch": 21.79, "learning_rate": 0.0005, "loss": 1.005, "step": 277900 }, { "epoch": 21.8, "learning_rate": 0.0005, "loss": 1.0149, "step": 278000 }, { "epoch": 21.81, "learning_rate": 0.0005, "loss": 0.9846, "step": 278100 }, { "epoch": 21.82, "learning_rate": 0.0005, "loss": 1.0009, "step": 278200 }, { "epoch": 21.82, "learning_rate": 0.0005, "loss": 1.0093, "step": 278300 }, { "epoch": 21.83, "learning_rate": 0.0005, "loss": 0.9975, "step": 278400 }, { "epoch": 21.84, "learning_rate": 0.0005, "loss": 1.0008, "step": 278500 }, { "epoch": 21.85, "learning_rate": 0.0005, "loss": 0.9921, "step": 278600 }, { "epoch": 21.86, "learning_rate": 0.0005, "loss": 1.0073, "step": 278700 }, { "epoch": 21.86, "learning_rate": 0.0005, "loss": 0.9945, "step": 278800 }, { "epoch": 21.87, "learning_rate": 0.0005, "loss": 0.9944, "step": 278900 }, { "epoch": 21.88, "learning_rate": 0.0005, "loss": 1.0061, "step": 279000 }, { "epoch": 21.89, "learning_rate": 0.0005, "loss": 1.0033, "step": 279100 }, { "epoch": 21.89, "learning_rate": 0.0005, "loss": 1.0135, "step": 279200 }, { "epoch": 21.9, "learning_rate": 0.0005, "loss": 1.0141, "step": 279300 }, { "epoch": 21.91, "learning_rate": 0.0005, "loss": 0.9956, "step": 279400 }, { "epoch": 21.92, "learning_rate": 0.0005, "loss": 0.9999, "step": 279500 }, { "epoch": 21.93, "learning_rate": 0.0005, "loss": 1.0112, "step": 279600 }, { "epoch": 21.93, "learning_rate": 0.0005, "loss": 0.987, "step": 279700 }, { "epoch": 21.94, "learning_rate": 0.0005, "loss": 0.9968, "step": 279800 }, { "epoch": 21.95, "learning_rate": 0.0005, "loss": 1.0061, "step": 279900 }, { "epoch": 21.96, "learning_rate": 0.0005, "loss": 1.0227, "step": 280000 }, { "epoch": 21.96, "eval_gen_len": 18.737794649951443, "eval_loss": 2.455573081970215, "eval_rouge1": 35.5589, "eval_rouge2": 14.6062, "eval_rougeL": 29.3551, "eval_rougeLsum": 29.3506, "eval_runtime": 342.0853, "eval_samples_per_second": 33.112, "eval_steps_per_second": 2.07, "step": 280000 }, { "epoch": 21.97, "learning_rate": 0.0005, "loss": 1.0039, "step": 280100 }, { "epoch": 21.97, "learning_rate": 0.0005, "loss": 0.9851, "step": 280200 }, { "epoch": 21.98, "learning_rate": 0.0005, "loss": 1.0024, "step": 280300 }, { "epoch": 21.99, "learning_rate": 0.0005, "loss": 1.028, "step": 280400 }, { "epoch": 22.0, "learning_rate": 0.0005, "loss": 1.0062, "step": 280500 }, { "epoch": 22.0, "learning_rate": 0.0005, "loss": 0.9581, "step": 280600 }, { "epoch": 22.01, "learning_rate": 0.0005, "loss": 0.9343, "step": 280700 }, { "epoch": 22.02, "learning_rate": 0.0005, "loss": 0.9261, "step": 280800 }, { "epoch": 22.03, "learning_rate": 0.0005, "loss": 0.9291, "step": 280900 }, { "epoch": 22.04, "learning_rate": 0.0005, "loss": 0.9247, "step": 281000 }, { "epoch": 22.04, "learning_rate": 0.0005, "loss": 0.9282, "step": 281100 }, { "epoch": 22.05, "learning_rate": 0.0005, "loss": 0.9436, "step": 281200 }, { "epoch": 22.06, "learning_rate": 0.0005, "loss": 0.9216, "step": 281300 }, { "epoch": 22.07, "learning_rate": 0.0005, "loss": 0.9422, "step": 281400 }, { "epoch": 22.07, "learning_rate": 0.0005, "loss": 0.9206, "step": 281500 }, { "epoch": 22.08, "learning_rate": 0.0005, "loss": 0.9291, "step": 281600 }, { "epoch": 22.09, "learning_rate": 0.0005, "loss": 0.942, "step": 281700 }, { "epoch": 22.1, "learning_rate": 0.0005, "loss": 0.9529, "step": 281800 }, { "epoch": 22.11, "learning_rate": 0.0005, "loss": 0.9295, "step": 281900 }, { "epoch": 22.11, "learning_rate": 0.0005, "loss": 0.9375, "step": 282000 }, { "epoch": 22.12, "learning_rate": 0.0005, "loss": 0.9517, "step": 282100 }, { "epoch": 22.13, "learning_rate": 0.0005, "loss": 0.9386, "step": 282200 }, { "epoch": 22.14, "learning_rate": 0.0005, "loss": 0.9447, "step": 282300 }, { "epoch": 22.15, "learning_rate": 0.0005, "loss": 0.9393, "step": 282400 }, { "epoch": 22.15, "learning_rate": 0.0005, "loss": 0.9287, "step": 282500 }, { "epoch": 22.16, "learning_rate": 0.0005, "loss": 0.9471, "step": 282600 }, { "epoch": 22.17, "learning_rate": 0.0005, "loss": 0.9314, "step": 282700 }, { "epoch": 22.18, "learning_rate": 0.0005, "loss": 0.9519, "step": 282800 }, { "epoch": 22.18, "learning_rate": 0.0005, "loss": 0.9508, "step": 282900 }, { "epoch": 22.19, "learning_rate": 0.0005, "loss": 0.9471, "step": 283000 }, { "epoch": 22.2, "learning_rate": 0.0005, "loss": 0.9551, "step": 283100 }, { "epoch": 22.21, "learning_rate": 0.0005, "loss": 0.9452, "step": 283200 }, { "epoch": 22.22, "learning_rate": 0.0005, "loss": 0.939, "step": 283300 }, { "epoch": 22.22, "learning_rate": 0.0005, "loss": 0.9668, "step": 283400 }, { "epoch": 22.23, "learning_rate": 0.0005, "loss": 0.9455, "step": 283500 }, { "epoch": 22.24, "learning_rate": 0.0005, "loss": 0.9514, "step": 283600 }, { "epoch": 22.25, "learning_rate": 0.0005, "loss": 0.9538, "step": 283700 }, { "epoch": 22.26, "learning_rate": 0.0005, "loss": 0.957, "step": 283800 }, { "epoch": 22.26, "learning_rate": 0.0005, "loss": 0.9721, "step": 283900 }, { "epoch": 22.27, "learning_rate": 0.0005, "loss": 0.95, "step": 284000 }, { "epoch": 22.28, "learning_rate": 0.0005, "loss": 0.9582, "step": 284100 }, { "epoch": 22.29, "learning_rate": 0.0005, "loss": 0.9633, "step": 284200 }, { "epoch": 22.29, "learning_rate": 0.0005, "loss": 0.9668, "step": 284300 }, { "epoch": 22.3, "learning_rate": 0.0005, "loss": 0.9461, "step": 284400 }, { "epoch": 22.31, "learning_rate": 0.0005, "loss": 0.9372, "step": 284500 }, { "epoch": 22.32, "learning_rate": 0.0005, "loss": 0.9617, "step": 284600 }, { "epoch": 22.33, "learning_rate": 0.0005, "loss": 0.9605, "step": 284700 }, { "epoch": 22.33, "learning_rate": 0.0005, "loss": 0.969, "step": 284800 }, { "epoch": 22.34, "learning_rate": 0.0005, "loss": 0.968, "step": 284900 }, { "epoch": 22.35, "learning_rate": 0.0005, "loss": 0.9552, "step": 285000 }, { "epoch": 22.36, "learning_rate": 0.0005, "loss": 0.9512, "step": 285100 }, { "epoch": 22.37, "learning_rate": 0.0005, "loss": 0.9615, "step": 285200 }, { "epoch": 22.37, "learning_rate": 0.0005, "loss": 0.9674, "step": 285300 }, { "epoch": 22.38, "learning_rate": 0.0005, "loss": 0.967, "step": 285400 }, { "epoch": 22.39, "learning_rate": 0.0005, "loss": 0.9632, "step": 285500 }, { "epoch": 22.4, "learning_rate": 0.0005, "loss": 0.9451, "step": 285600 }, { "epoch": 22.4, "learning_rate": 0.0005, "loss": 0.9425, "step": 285700 }, { "epoch": 22.41, "learning_rate": 0.0005, "loss": 0.9785, "step": 285800 }, { "epoch": 22.42, "learning_rate": 0.0005, "loss": 0.9427, "step": 285900 }, { "epoch": 22.43, "learning_rate": 0.0005, "loss": 0.9843, "step": 286000 }, { "epoch": 22.44, "learning_rate": 0.0005, "loss": 0.9629, "step": 286100 }, { "epoch": 22.44, "learning_rate": 0.0005, "loss": 0.9794, "step": 286200 }, { "epoch": 22.45, "learning_rate": 0.0005, "loss": 0.9716, "step": 286300 }, { "epoch": 22.46, "learning_rate": 0.0005, "loss": 0.97, "step": 286400 }, { "epoch": 22.47, "learning_rate": 0.0005, "loss": 0.9727, "step": 286500 }, { "epoch": 22.47, "learning_rate": 0.0005, "loss": 0.9604, "step": 286600 }, { "epoch": 22.48, "learning_rate": 0.0005, "loss": 0.9535, "step": 286700 }, { "epoch": 22.49, "learning_rate": 0.0005, "loss": 0.985, "step": 286800 }, { "epoch": 22.5, "learning_rate": 0.0005, "loss": 0.975, "step": 286900 }, { "epoch": 22.51, "learning_rate": 0.0005, "loss": 0.9729, "step": 287000 }, { "epoch": 22.51, "learning_rate": 0.0005, "loss": 0.9601, "step": 287100 }, { "epoch": 22.52, "learning_rate": 0.0005, "loss": 0.9857, "step": 287200 }, { "epoch": 22.53, "learning_rate": 0.0005, "loss": 0.9483, "step": 287300 }, { "epoch": 22.54, "learning_rate": 0.0005, "loss": 0.9885, "step": 287400 }, { "epoch": 22.55, "learning_rate": 0.0005, "loss": 0.9737, "step": 287500 }, { "epoch": 22.55, "learning_rate": 0.0005, "loss": 0.9759, "step": 287600 }, { "epoch": 22.56, "learning_rate": 0.0005, "loss": 0.9752, "step": 287700 }, { "epoch": 22.57, "learning_rate": 0.0005, "loss": 0.9863, "step": 287800 }, { "epoch": 22.58, "learning_rate": 0.0005, "loss": 0.9626, "step": 287900 }, { "epoch": 22.58, "learning_rate": 0.0005, "loss": 0.9761, "step": 288000 }, { "epoch": 22.59, "learning_rate": 0.0005, "loss": 0.9795, "step": 288100 }, { "epoch": 22.6, "learning_rate": 0.0005, "loss": 0.9566, "step": 288200 }, { "epoch": 22.61, "learning_rate": 0.0005, "loss": 0.9886, "step": 288300 }, { "epoch": 22.62, "learning_rate": 0.0005, "loss": 0.9908, "step": 288400 }, { "epoch": 22.62, "learning_rate": 0.0005, "loss": 0.9809, "step": 288500 }, { "epoch": 22.63, "learning_rate": 0.0005, "loss": 0.9773, "step": 288600 }, { "epoch": 22.64, "learning_rate": 0.0005, "loss": 0.9723, "step": 288700 }, { "epoch": 22.65, "learning_rate": 0.0005, "loss": 0.9736, "step": 288800 }, { "epoch": 22.66, "learning_rate": 0.0005, "loss": 0.9768, "step": 288900 }, { "epoch": 22.66, "learning_rate": 0.0005, "loss": 0.9986, "step": 289000 }, { "epoch": 22.67, "learning_rate": 0.0005, "loss": 0.9831, "step": 289100 }, { "epoch": 22.68, "learning_rate": 0.0005, "loss": 0.976, "step": 289200 }, { "epoch": 22.69, "learning_rate": 0.0005, "loss": 0.9756, "step": 289300 }, { "epoch": 22.69, "learning_rate": 0.0005, "loss": 0.9809, "step": 289400 }, { "epoch": 22.7, "learning_rate": 0.0005, "loss": 0.9535, "step": 289500 }, { "epoch": 22.71, "learning_rate": 0.0005, "loss": 1.0008, "step": 289600 }, { "epoch": 22.72, "learning_rate": 0.0005, "loss": 0.9864, "step": 289700 }, { "epoch": 22.73, "learning_rate": 0.0005, "loss": 0.9981, "step": 289800 }, { "epoch": 22.73, "learning_rate": 0.0005, "loss": 1.0064, "step": 289900 }, { "epoch": 22.74, "learning_rate": 0.0005, "loss": 0.9895, "step": 290000 }, { "epoch": 22.74, "eval_gen_len": 18.690474088461198, "eval_loss": 2.4708948135375977, "eval_rouge1": 35.6683, "eval_rouge2": 14.696, "eval_rougeL": 29.5244, "eval_rougeLsum": 29.516, "eval_runtime": 342.2396, "eval_samples_per_second": 33.097, "eval_steps_per_second": 2.069, "step": 290000 }, { "epoch": 22.75, "learning_rate": 0.0005, "loss": 0.9688, "step": 290100 }, { "epoch": 22.76, "learning_rate": 0.0005, "loss": 0.9683, "step": 290200 }, { "epoch": 22.77, "learning_rate": 0.0005, "loss": 0.9704, "step": 290300 }, { "epoch": 22.77, "learning_rate": 0.0005, "loss": 0.963, "step": 290400 }, { "epoch": 22.78, "learning_rate": 0.0005, "loss": 0.9841, "step": 290500 }, { "epoch": 22.79, "learning_rate": 0.0005, "loss": 0.9654, "step": 290600 }, { "epoch": 22.8, "learning_rate": 0.0005, "loss": 0.9909, "step": 290700 }, { "epoch": 22.8, "learning_rate": 0.0005, "loss": 1.0001, "step": 290800 }, { "epoch": 22.81, "learning_rate": 0.0005, "loss": 0.956, "step": 290900 }, { "epoch": 22.82, "learning_rate": 0.0005, "loss": 0.9809, "step": 291000 }, { "epoch": 22.83, "learning_rate": 0.0005, "loss": 0.9805, "step": 291100 }, { "epoch": 22.84, "learning_rate": 0.0005, "loss": 0.9826, "step": 291200 }, { "epoch": 22.84, "learning_rate": 0.0005, "loss": 0.9794, "step": 291300 }, { "epoch": 22.85, "learning_rate": 0.0005, "loss": 0.9889, "step": 291400 }, { "epoch": 22.86, "learning_rate": 0.0005, "loss": 0.9799, "step": 291500 }, { "epoch": 22.87, "learning_rate": 0.0005, "loss": 1.0019, "step": 291600 }, { "epoch": 22.87, "learning_rate": 0.0005, "loss": 0.9861, "step": 291700 }, { "epoch": 22.88, "learning_rate": 0.0005, "loss": 0.9767, "step": 291800 }, { "epoch": 22.89, "learning_rate": 0.0005, "loss": 0.9774, "step": 291900 }, { "epoch": 22.9, "learning_rate": 0.0005, "loss": 0.9976, "step": 292000 }, { "epoch": 22.91, "learning_rate": 0.0005, "loss": 0.9868, "step": 292100 }, { "epoch": 22.91, "learning_rate": 0.0005, "loss": 0.9814, "step": 292200 }, { "epoch": 22.92, "learning_rate": 0.0005, "loss": 1.0007, "step": 292300 }, { "epoch": 22.93, "learning_rate": 0.0005, "loss": 0.986, "step": 292400 }, { "epoch": 22.94, "learning_rate": 0.0005, "loss": 1.0014, "step": 292500 }, { "epoch": 22.95, "learning_rate": 0.0005, "loss": 0.9739, "step": 292600 }, { "epoch": 22.95, "learning_rate": 0.0005, "loss": 0.9743, "step": 292700 }, { "epoch": 22.96, "learning_rate": 0.0005, "loss": 0.9843, "step": 292800 }, { "epoch": 22.97, "learning_rate": 0.0005, "loss": 0.9942, "step": 292900 }, { "epoch": 22.98, "learning_rate": 0.0005, "loss": 1.0039, "step": 293000 }, { "epoch": 22.98, "learning_rate": 0.0005, "loss": 1.0217, "step": 293100 }, { "epoch": 22.99, "learning_rate": 0.0005, "loss": 1.0022, "step": 293200 }, { "epoch": 23.0, "learning_rate": 0.0005, "loss": 0.9824, "step": 293300 }, { "epoch": 23.01, "learning_rate": 0.0005, "loss": 0.9044, "step": 293400 }, { "epoch": 23.02, "learning_rate": 0.0005, "loss": 0.9098, "step": 293500 }, { "epoch": 23.02, "learning_rate": 0.0005, "loss": 0.9264, "step": 293600 }, { "epoch": 23.03, "learning_rate": 0.0005, "loss": 0.9192, "step": 293700 }, { "epoch": 23.04, "learning_rate": 0.0005, "loss": 0.9024, "step": 293800 }, { "epoch": 23.05, "learning_rate": 0.0005, "loss": 0.9148, "step": 293900 }, { "epoch": 23.06, "learning_rate": 0.0005, "loss": 0.9014, "step": 294000 }, { "epoch": 23.06, "learning_rate": 0.0005, "loss": 0.9089, "step": 294100 }, { "epoch": 23.07, "learning_rate": 0.0005, "loss": 0.9291, "step": 294200 }, { "epoch": 23.08, "learning_rate": 0.0005, "loss": 0.9165, "step": 294300 }, { "epoch": 23.09, "learning_rate": 0.0005, "loss": 0.9362, "step": 294400 }, { "epoch": 23.09, "learning_rate": 0.0005, "loss": 0.9204, "step": 294500 }, { "epoch": 23.1, "learning_rate": 0.0005, "loss": 0.9362, "step": 294600 }, { "epoch": 23.11, "learning_rate": 0.0005, "loss": 0.9093, "step": 294700 }, { "epoch": 23.12, "learning_rate": 0.0005, "loss": 0.9178, "step": 294800 }, { "epoch": 23.13, "learning_rate": 0.0005, "loss": 0.9327, "step": 294900 }, { "epoch": 23.13, "learning_rate": 0.0005, "loss": 0.931, "step": 295000 }, { "epoch": 23.14, "learning_rate": 0.0005, "loss": 0.934, "step": 295100 }, { "epoch": 23.15, "learning_rate": 0.0005, "loss": 0.9421, "step": 295200 }, { "epoch": 23.16, "learning_rate": 0.0005, "loss": 0.9302, "step": 295300 }, { "epoch": 23.16, "learning_rate": 0.0005, "loss": 0.9291, "step": 295400 }, { "epoch": 23.17, "learning_rate": 0.0005, "loss": 0.9308, "step": 295500 }, { "epoch": 23.18, "learning_rate": 0.0005, "loss": 0.9408, "step": 295600 }, { "epoch": 23.19, "learning_rate": 0.0005, "loss": 0.9283, "step": 295700 }, { "epoch": 23.2, "learning_rate": 0.0005, "loss": 0.9234, "step": 295800 }, { "epoch": 23.2, "learning_rate": 0.0005, "loss": 0.9234, "step": 295900 }, { "epoch": 23.21, "learning_rate": 0.0005, "loss": 0.9442, "step": 296000 }, { "epoch": 23.22, "learning_rate": 0.0005, "loss": 0.9326, "step": 296100 }, { "epoch": 23.23, "learning_rate": 0.0005, "loss": 0.9359, "step": 296200 }, { "epoch": 23.24, "learning_rate": 0.0005, "loss": 0.9191, "step": 296300 }, { "epoch": 23.24, "learning_rate": 0.0005, "loss": 0.9371, "step": 296400 }, { "epoch": 23.25, "learning_rate": 0.0005, "loss": 0.9316, "step": 296500 }, { "epoch": 23.26, "learning_rate": 0.0005, "loss": 0.9369, "step": 296600 }, { "epoch": 23.27, "learning_rate": 0.0005, "loss": 0.9274, "step": 296700 }, { "epoch": 23.27, "learning_rate": 0.0005, "loss": 0.9196, "step": 296800 }, { "epoch": 23.28, "learning_rate": 0.0005, "loss": 0.9472, "step": 296900 }, { "epoch": 23.29, "learning_rate": 0.0005, "loss": 0.9214, "step": 297000 }, { "epoch": 23.3, "learning_rate": 0.0005, "loss": 0.9558, "step": 297100 }, { "epoch": 23.31, "learning_rate": 0.0005, "loss": 0.96, "step": 297200 }, { "epoch": 23.31, "learning_rate": 0.0005, "loss": 0.9244, "step": 297300 }, { "epoch": 23.32, "learning_rate": 0.0005, "loss": 0.9418, "step": 297400 }, { "epoch": 23.33, "learning_rate": 0.0005, "loss": 0.9385, "step": 297500 }, { "epoch": 23.34, "learning_rate": 0.0005, "loss": 0.9549, "step": 297600 }, { "epoch": 23.35, "learning_rate": 0.0005, "loss": 0.9449, "step": 297700 }, { "epoch": 23.35, "learning_rate": 0.0005, "loss": 0.965, "step": 297800 }, { "epoch": 23.36, "learning_rate": 0.0005, "loss": 0.9288, "step": 297900 }, { "epoch": 23.37, "learning_rate": 0.0005, "loss": 0.9475, "step": 298000 }, { "epoch": 23.38, "learning_rate": 0.0005, "loss": 0.9444, "step": 298100 }, { "epoch": 23.38, "learning_rate": 0.0005, "loss": 0.95, "step": 298200 }, { "epoch": 23.39, "learning_rate": 0.0005, "loss": 0.9717, "step": 298300 }, { "epoch": 23.4, "learning_rate": 0.0005, "loss": 0.9358, "step": 298400 }, { "epoch": 23.41, "learning_rate": 0.0005, "loss": 0.9449, "step": 298500 }, { "epoch": 23.42, "learning_rate": 0.0005, "loss": 0.9601, "step": 298600 }, { "epoch": 23.42, "learning_rate": 0.0005, "loss": 0.954, "step": 298700 }, { "epoch": 23.43, "learning_rate": 0.0005, "loss": 0.9564, "step": 298800 }, { "epoch": 23.44, "learning_rate": 0.0005, "loss": 0.9391, "step": 298900 }, { "epoch": 23.45, "learning_rate": 0.0005, "loss": 0.9517, "step": 299000 }, { "epoch": 23.46, "learning_rate": 0.0005, "loss": 0.9769, "step": 299100 }, { "epoch": 23.46, "learning_rate": 0.0005, "loss": 0.9563, "step": 299200 }, { "epoch": 23.47, "learning_rate": 0.0005, "loss": 0.9446, "step": 299300 }, { "epoch": 23.48, "learning_rate": 0.0005, "loss": 0.9597, "step": 299400 }, { "epoch": 23.49, "learning_rate": 0.0005, "loss": 0.9432, "step": 299500 }, { "epoch": 23.49, "learning_rate": 0.0005, "loss": 0.9546, "step": 299600 }, { "epoch": 23.5, "learning_rate": 0.0005, "loss": 0.9512, "step": 299700 }, { "epoch": 23.51, "learning_rate": 0.0005, "loss": 0.9726, "step": 299800 }, { "epoch": 23.52, "learning_rate": 0.0005, "loss": 0.9428, "step": 299900 }, { "epoch": 23.53, "learning_rate": 0.0005, "loss": 0.9506, "step": 300000 }, { "epoch": 23.53, "eval_gen_len": 18.740001765692593, "eval_loss": 2.510540246963501, "eval_rouge1": 35.6195, "eval_rouge2": 14.6871, "eval_rougeL": 29.4076, "eval_rougeLsum": 29.4076, "eval_runtime": 342.9376, "eval_samples_per_second": 33.029, "eval_steps_per_second": 2.065, "step": 300000 }, { "epoch": 23.53, "learning_rate": 0.0005, "loss": 0.9789, "step": 300100 }, { "epoch": 23.54, "learning_rate": 0.0005, "loss": 0.9619, "step": 300200 }, { "epoch": 23.55, "learning_rate": 0.0005, "loss": 0.961, "step": 300300 }, { "epoch": 23.56, "learning_rate": 0.0005, "loss": 0.9542, "step": 300400 }, { "epoch": 23.56, "learning_rate": 0.0005, "loss": 0.9636, "step": 300500 }, { "epoch": 23.57, "learning_rate": 0.0005, "loss": 0.9378, "step": 300600 }, { "epoch": 23.58, "learning_rate": 0.0005, "loss": 0.9712, "step": 300700 }, { "epoch": 23.59, "learning_rate": 0.0005, "loss": 0.9617, "step": 300800 }, { "epoch": 23.6, "learning_rate": 0.0005, "loss": 0.9551, "step": 300900 }, { "epoch": 23.6, "learning_rate": 0.0005, "loss": 0.9565, "step": 301000 }, { "epoch": 23.61, "learning_rate": 0.0005, "loss": 0.9667, "step": 301100 }, { "epoch": 23.62, "learning_rate": 0.0005, "loss": 0.9635, "step": 301200 }, { "epoch": 23.63, "learning_rate": 0.0005, "loss": 0.9572, "step": 301300 }, { "epoch": 23.64, "learning_rate": 0.0005, "loss": 0.949, "step": 301400 }, { "epoch": 23.64, "learning_rate": 0.0005, "loss": 0.964, "step": 301500 }, { "epoch": 23.65, "learning_rate": 0.0005, "loss": 0.9588, "step": 301600 }, { "epoch": 23.66, "learning_rate": 0.0005, "loss": 0.9772, "step": 301700 }, { "epoch": 23.67, "learning_rate": 0.0005, "loss": 0.9718, "step": 301800 }, { "epoch": 23.67, "learning_rate": 0.0005, "loss": 0.9754, "step": 301900 }, { "epoch": 23.68, "learning_rate": 0.0005, "loss": 0.9525, "step": 302000 }, { "epoch": 23.69, "learning_rate": 0.0005, "loss": 0.9535, "step": 302100 }, { "epoch": 23.7, "learning_rate": 0.0005, "loss": 0.9448, "step": 302200 }, { "epoch": 23.71, "learning_rate": 0.0005, "loss": 0.9672, "step": 302300 }, { "epoch": 23.71, "learning_rate": 0.0005, "loss": 0.9591, "step": 302400 }, { "epoch": 23.72, "learning_rate": 0.0005, "loss": 0.9598, "step": 302500 }, { "epoch": 23.73, "learning_rate": 0.0005, "loss": 0.9723, "step": 302600 }, { "epoch": 23.74, "learning_rate": 0.0005, "loss": 0.9555, "step": 302700 }, { "epoch": 23.75, "learning_rate": 0.0005, "loss": 0.9657, "step": 302800 }, { "epoch": 23.75, "learning_rate": 0.0005, "loss": 0.9562, "step": 302900 }, { "epoch": 23.76, "learning_rate": 0.0005, "loss": 0.9602, "step": 303000 }, { "epoch": 23.77, "learning_rate": 0.0005, "loss": 0.9468, "step": 303100 }, { "epoch": 23.78, "learning_rate": 0.0005, "loss": 0.9736, "step": 303200 }, { "epoch": 23.78, "learning_rate": 0.0005, "loss": 0.967, "step": 303300 }, { "epoch": 23.79, "learning_rate": 0.0005, "loss": 0.9763, "step": 303400 }, { "epoch": 23.8, "learning_rate": 0.0005, "loss": 0.969, "step": 303500 }, { "epoch": 23.81, "learning_rate": 0.0005, "loss": 0.9665, "step": 303600 }, { "epoch": 23.82, "learning_rate": 0.0005, "loss": 0.9804, "step": 303700 }, { "epoch": 23.82, "learning_rate": 0.0005, "loss": 0.9462, "step": 303800 }, { "epoch": 23.83, "learning_rate": 0.0005, "loss": 0.9625, "step": 303900 }, { "epoch": 23.84, "learning_rate": 0.0005, "loss": 0.9743, "step": 304000 }, { "epoch": 23.85, "learning_rate": 0.0005, "loss": 0.9707, "step": 304100 }, { "epoch": 23.86, "learning_rate": 0.0005, "loss": 0.9713, "step": 304200 }, { "epoch": 23.86, "learning_rate": 0.0005, "loss": 0.9709, "step": 304300 }, { "epoch": 23.87, "learning_rate": 0.0005, "loss": 0.9839, "step": 304400 }, { "epoch": 23.88, "learning_rate": 0.0005, "loss": 0.983, "step": 304500 }, { "epoch": 23.89, "learning_rate": 0.0005, "loss": 0.9687, "step": 304600 }, { "epoch": 23.89, "learning_rate": 0.0005, "loss": 0.9677, "step": 304700 }, { "epoch": 23.9, "learning_rate": 0.0005, "loss": 0.9705, "step": 304800 }, { "epoch": 23.91, "learning_rate": 0.0005, "loss": 0.9711, "step": 304900 }, { "epoch": 23.92, "learning_rate": 0.0005, "loss": 0.9754, "step": 305000 }, { "epoch": 23.93, "learning_rate": 0.0005, "loss": 0.9583, "step": 305100 }, { "epoch": 23.93, "learning_rate": 0.0005, "loss": 0.9885, "step": 305200 }, { "epoch": 23.94, "learning_rate": 0.0005, "loss": 0.9866, "step": 305300 }, { "epoch": 23.95, "learning_rate": 0.0005, "loss": 0.976, "step": 305400 }, { "epoch": 23.96, "learning_rate": 0.0005, "loss": 0.9722, "step": 305500 }, { "epoch": 23.96, "learning_rate": 0.0005, "loss": 0.9872, "step": 305600 }, { "epoch": 23.97, "learning_rate": 0.0005, "loss": 0.9762, "step": 305700 }, { "epoch": 23.98, "learning_rate": 0.0005, "loss": 0.9781, "step": 305800 }, { "epoch": 23.99, "learning_rate": 0.0005, "loss": 0.9819, "step": 305900 }, { "epoch": 24.0, "learning_rate": 0.0005, "loss": 0.9992, "step": 306000 }, { "epoch": 24.0, "learning_rate": 0.0005, "loss": 0.9518, "step": 306100 }, { "epoch": 24.01, "learning_rate": 0.0005, "loss": 0.8936, "step": 306200 }, { "epoch": 24.02, "learning_rate": 0.0005, "loss": 0.8861, "step": 306300 }, { "epoch": 24.03, "learning_rate": 0.0005, "loss": 0.8966, "step": 306400 }, { "epoch": 24.04, "learning_rate": 0.0005, "loss": 0.8952, "step": 306500 }, { "epoch": 24.04, "learning_rate": 0.0005, "loss": 0.9118, "step": 306600 }, { "epoch": 24.05, "learning_rate": 0.0005, "loss": 0.9187, "step": 306700 }, { "epoch": 24.06, "learning_rate": 0.0005, "loss": 0.92, "step": 306800 }, { "epoch": 24.07, "learning_rate": 0.0005, "loss": 0.889, "step": 306900 }, { "epoch": 24.07, "learning_rate": 0.0005, "loss": 0.9044, "step": 307000 }, { "epoch": 24.08, "learning_rate": 0.0005, "loss": 0.8956, "step": 307100 }, { "epoch": 24.09, "learning_rate": 0.0005, "loss": 0.8973, "step": 307200 }, { "epoch": 24.1, "learning_rate": 0.0005, "loss": 0.9062, "step": 307300 }, { "epoch": 24.11, "learning_rate": 0.0005, "loss": 0.9122, "step": 307400 }, { "epoch": 24.11, "learning_rate": 0.0005, "loss": 0.8848, "step": 307500 }, { "epoch": 24.12, "learning_rate": 0.0005, "loss": 0.917, "step": 307600 }, { "epoch": 24.13, "learning_rate": 0.0005, "loss": 0.9064, "step": 307700 }, { "epoch": 24.14, "learning_rate": 0.0005, "loss": 0.9, "step": 307800 }, { "epoch": 24.15, "learning_rate": 0.0005, "loss": 0.9087, "step": 307900 }, { "epoch": 24.15, "learning_rate": 0.0005, "loss": 0.9075, "step": 308000 }, { "epoch": 24.16, "learning_rate": 0.0005, "loss": 0.9061, "step": 308100 }, { "epoch": 24.17, "learning_rate": 0.0005, "loss": 0.9002, "step": 308200 }, { "epoch": 24.18, "learning_rate": 0.0005, "loss": 0.9232, "step": 308300 }, { "epoch": 24.18, "learning_rate": 0.0005, "loss": 0.9232, "step": 308400 }, { "epoch": 24.19, "learning_rate": 0.0005, "loss": 0.9097, "step": 308500 }, { "epoch": 24.2, "learning_rate": 0.0005, "loss": 0.923, "step": 308600 }, { "epoch": 24.21, "learning_rate": 0.0005, "loss": 0.9297, "step": 308700 }, { "epoch": 24.22, "learning_rate": 0.0005, "loss": 0.9235, "step": 308800 }, { "epoch": 24.22, "learning_rate": 0.0005, "loss": 0.9112, "step": 308900 }, { "epoch": 24.23, "learning_rate": 0.0005, "loss": 0.9345, "step": 309000 }, { "epoch": 24.24, "learning_rate": 0.0005, "loss": 0.9348, "step": 309100 }, { "epoch": 24.25, "learning_rate": 0.0005, "loss": 0.921, "step": 309200 }, { "epoch": 24.26, "learning_rate": 0.0005, "loss": 0.9245, "step": 309300 }, { "epoch": 24.26, "learning_rate": 0.0005, "loss": 0.9134, "step": 309400 }, { "epoch": 24.27, "learning_rate": 0.0005, "loss": 0.9355, "step": 309500 }, { "epoch": 24.28, "learning_rate": 0.0005, "loss": 0.9352, "step": 309600 }, { "epoch": 24.29, "learning_rate": 0.0005, "loss": 0.9359, "step": 309700 }, { "epoch": 24.29, "learning_rate": 0.0005, "loss": 0.935, "step": 309800 }, { "epoch": 24.3, "learning_rate": 0.0005, "loss": 0.9262, "step": 309900 }, { "epoch": 24.31, "learning_rate": 0.0005, "loss": 0.9499, "step": 310000 }, { "epoch": 24.31, "eval_gen_len": 18.726317648097467, "eval_loss": 2.5155093669891357, "eval_rouge1": 35.6119, "eval_rouge2": 14.7476, "eval_rougeL": 29.3748, "eval_rougeLsum": 29.3711, "eval_runtime": 343.5455, "eval_samples_per_second": 32.971, "eval_steps_per_second": 2.061, "step": 310000 }, { "epoch": 24.32, "learning_rate": 0.0005, "loss": 0.9157, "step": 310100 }, { "epoch": 24.33, "learning_rate": 0.0005, "loss": 0.9398, "step": 310200 }, { "epoch": 24.33, "learning_rate": 0.0005, "loss": 0.9503, "step": 310300 }, { "epoch": 24.34, "learning_rate": 0.0005, "loss": 0.9284, "step": 310400 }, { "epoch": 24.35, "learning_rate": 0.0005, "loss": 0.9337, "step": 310500 }, { "epoch": 24.36, "learning_rate": 0.0005, "loss": 0.9406, "step": 310600 }, { "epoch": 24.36, "learning_rate": 0.0005, "loss": 0.9304, "step": 310700 }, { "epoch": 24.37, "learning_rate": 0.0005, "loss": 0.9302, "step": 310800 }, { "epoch": 24.38, "learning_rate": 0.0005, "loss": 0.9162, "step": 310900 }, { "epoch": 24.39, "learning_rate": 0.0005, "loss": 0.9313, "step": 311000 }, { "epoch": 24.4, "learning_rate": 0.0005, "loss": 0.9174, "step": 311100 }, { "epoch": 24.4, "learning_rate": 0.0005, "loss": 0.9431, "step": 311200 }, { "epoch": 24.41, "learning_rate": 0.0005, "loss": 0.93, "step": 311300 }, { "epoch": 24.42, "learning_rate": 0.0005, "loss": 0.943, "step": 311400 }, { "epoch": 24.43, "learning_rate": 0.0005, "loss": 0.9217, "step": 311500 }, { "epoch": 24.44, "learning_rate": 0.0005, "loss": 0.9348, "step": 311600 }, { "epoch": 24.44, "learning_rate": 0.0005, "loss": 0.9392, "step": 311700 }, { "epoch": 24.45, "learning_rate": 0.0005, "loss": 0.9436, "step": 311800 }, { "epoch": 24.46, "learning_rate": 0.0005, "loss": 0.9374, "step": 311900 }, { "epoch": 24.47, "learning_rate": 0.0005, "loss": 0.9466, "step": 312000 }, { "epoch": 24.47, "learning_rate": 0.0005, "loss": 0.95, "step": 312100 }, { "epoch": 24.48, "learning_rate": 0.0005, "loss": 0.9483, "step": 312200 }, { "epoch": 24.49, "learning_rate": 0.0005, "loss": 0.9408, "step": 312300 }, { "epoch": 24.5, "learning_rate": 0.0005, "loss": 0.9281, "step": 312400 }, { "epoch": 24.51, "learning_rate": 0.0005, "loss": 0.94, "step": 312500 }, { "epoch": 24.51, "learning_rate": 0.0005, "loss": 0.9492, "step": 312600 }, { "epoch": 24.52, "learning_rate": 0.0005, "loss": 0.9343, "step": 312700 }, { "epoch": 24.53, "learning_rate": 0.0005, "loss": 0.9294, "step": 312800 }, { "epoch": 24.54, "learning_rate": 0.0005, "loss": 0.9483, "step": 312900 }, { "epoch": 24.55, "learning_rate": 0.0005, "loss": 0.9525, "step": 313000 }, { "epoch": 24.55, "learning_rate": 0.0005, "loss": 0.9486, "step": 313100 }, { "epoch": 24.56, "learning_rate": 0.0005, "loss": 0.9371, "step": 313200 }, { "epoch": 24.57, "learning_rate": 0.0005, "loss": 0.9395, "step": 313300 }, { "epoch": 24.58, "learning_rate": 0.0005, "loss": 0.9323, "step": 313400 }, { "epoch": 24.58, "learning_rate": 0.0005, "loss": 0.9456, "step": 313500 }, { "epoch": 24.59, "learning_rate": 0.0005, "loss": 0.9501, "step": 313600 }, { "epoch": 24.6, "learning_rate": 0.0005, "loss": 0.948, "step": 313700 }, { "epoch": 24.61, "learning_rate": 0.0005, "loss": 0.9544, "step": 313800 }, { "epoch": 24.62, "learning_rate": 0.0005, "loss": 0.9495, "step": 313900 }, { "epoch": 24.62, "learning_rate": 0.0005, "loss": 0.9236, "step": 314000 }, { "epoch": 24.63, "learning_rate": 0.0005, "loss": 0.9539, "step": 314100 }, { "epoch": 24.64, "learning_rate": 0.0005, "loss": 0.9539, "step": 314200 }, { "epoch": 24.65, "learning_rate": 0.0005, "loss": 0.9588, "step": 314300 }, { "epoch": 24.65, "learning_rate": 0.0005, "loss": 0.9363, "step": 314400 }, { "epoch": 24.66, "learning_rate": 0.0005, "loss": 0.973, "step": 314500 }, { "epoch": 24.67, "learning_rate": 0.0005, "loss": 0.9378, "step": 314600 }, { "epoch": 24.68, "learning_rate": 0.0005, "loss": 0.9579, "step": 314700 }, { "epoch": 24.69, "learning_rate": 0.0005, "loss": 0.9457, "step": 314800 }, { "epoch": 24.69, "learning_rate": 0.0005, "loss": 0.9462, "step": 314900 }, { "epoch": 24.7, "learning_rate": 0.0005, "loss": 0.9528, "step": 315000 }, { "epoch": 24.71, "learning_rate": 0.0005, "loss": 0.9593, "step": 315100 }, { "epoch": 24.72, "learning_rate": 0.0005, "loss": 0.9437, "step": 315200 }, { "epoch": 24.73, "learning_rate": 0.0005, "loss": 0.9418, "step": 315300 }, { "epoch": 24.73, "learning_rate": 0.0005, "loss": 0.9584, "step": 315400 }, { "epoch": 24.74, "learning_rate": 0.0005, "loss": 0.9642, "step": 315500 }, { "epoch": 24.75, "learning_rate": 0.0005, "loss": 0.9411, "step": 315600 }, { "epoch": 24.76, "learning_rate": 0.0005, "loss": 0.9644, "step": 315700 }, { "epoch": 24.76, "learning_rate": 0.0005, "loss": 0.9629, "step": 315800 }, { "epoch": 24.77, "learning_rate": 0.0005, "loss": 0.9649, "step": 315900 }, { "epoch": 24.78, "learning_rate": 0.0005, "loss": 0.959, "step": 316000 }, { "epoch": 24.79, "learning_rate": 0.0005, "loss": 0.9554, "step": 316100 }, { "epoch": 24.8, "learning_rate": 0.0005, "loss": 0.9371, "step": 316200 }, { "epoch": 24.8, "learning_rate": 0.0005, "loss": 0.9448, "step": 316300 }, { "epoch": 24.81, "learning_rate": 0.0005, "loss": 0.9506, "step": 316400 }, { "epoch": 24.82, "learning_rate": 0.0005, "loss": 0.9453, "step": 316500 }, { "epoch": 24.83, "learning_rate": 0.0005, "loss": 0.9538, "step": 316600 }, { "epoch": 24.84, "learning_rate": 0.0005, "loss": 0.9692, "step": 316700 }, { "epoch": 24.84, "learning_rate": 0.0005, "loss": 0.9621, "step": 316800 }, { "epoch": 24.85, "learning_rate": 0.0005, "loss": 0.9688, "step": 316900 }, { "epoch": 24.86, "learning_rate": 0.0005, "loss": 0.9607, "step": 317000 }, { "epoch": 24.87, "learning_rate": 0.0005, "loss": 0.965, "step": 317100 }, { "epoch": 24.87, "learning_rate": 0.0005, "loss": 0.9317, "step": 317200 }, { "epoch": 24.88, "learning_rate": 0.0005, "loss": 0.964, "step": 317300 }, { "epoch": 24.89, "learning_rate": 0.0005, "loss": 0.9664, "step": 317400 }, { "epoch": 24.9, "learning_rate": 0.0005, "loss": 0.9636, "step": 317500 }, { "epoch": 24.91, "learning_rate": 0.0005, "loss": 0.9622, "step": 317600 }, { "epoch": 24.91, "learning_rate": 0.0005, "loss": 0.9588, "step": 317700 }, { "epoch": 24.92, "learning_rate": 0.0005, "loss": 0.9647, "step": 317800 }, { "epoch": 24.93, "learning_rate": 0.0005, "loss": 0.9625, "step": 317900 }, { "epoch": 24.94, "learning_rate": 0.0005, "loss": 0.9411, "step": 318000 }, { "epoch": 24.95, "learning_rate": 0.0005, "loss": 0.9632, "step": 318100 }, { "epoch": 24.95, "learning_rate": 0.0005, "loss": 0.9674, "step": 318200 }, { "epoch": 24.96, "learning_rate": 0.0005, "loss": 0.9758, "step": 318300 }, { "epoch": 24.97, "learning_rate": 0.0005, "loss": 0.9692, "step": 318400 }, { "epoch": 24.98, "learning_rate": 0.0005, "loss": 0.99, "step": 318500 }, { "epoch": 24.98, "learning_rate": 0.0005, "loss": 0.9506, "step": 318600 }, { "epoch": 24.99, "learning_rate": 0.0005, "loss": 0.9591, "step": 318700 }, { "epoch": 25.0, "learning_rate": 0.0005, "loss": 0.9722, "step": 318800 }, { "epoch": 25.01, "learning_rate": 0.0005, "loss": 0.8809, "step": 318900 }, { "epoch": 25.02, "learning_rate": 0.0005, "loss": 0.9027, "step": 319000 }, { "epoch": 25.02, "learning_rate": 0.0005, "loss": 0.8877, "step": 319100 }, { "epoch": 25.03, "learning_rate": 0.0005, "loss": 0.8942, "step": 319200 }, { "epoch": 25.04, "learning_rate": 0.0005, "loss": 0.8973, "step": 319300 }, { "epoch": 25.05, "learning_rate": 0.0005, "loss": 0.8892, "step": 319400 }, { "epoch": 25.05, "learning_rate": 0.0005, "loss": 0.8824, "step": 319500 }, { "epoch": 25.06, "learning_rate": 0.0005, "loss": 0.907, "step": 319600 }, { "epoch": 25.07, "learning_rate": 0.0005, "loss": 0.8954, "step": 319700 }, { "epoch": 25.08, "learning_rate": 0.0005, "loss": 0.8879, "step": 319800 }, { "epoch": 25.09, "learning_rate": 0.0005, "loss": 0.8861, "step": 319900 }, { "epoch": 25.09, "learning_rate": 0.0005, "loss": 0.893, "step": 320000 }, { "epoch": 25.09, "eval_gen_len": 18.719696300874016, "eval_loss": 2.545152187347412, "eval_rouge1": 35.5957, "eval_rouge2": 14.6319, "eval_rougeL": 29.3381, "eval_rougeLsum": 29.3398, "eval_runtime": 342.8631, "eval_samples_per_second": 33.037, "eval_steps_per_second": 2.065, "step": 320000 }, { "epoch": 25.1, "learning_rate": 0.0005, "loss": 0.8925, "step": 320100 }, { "epoch": 25.11, "learning_rate": 0.0005, "loss": 0.904, "step": 320200 }, { "epoch": 25.12, "learning_rate": 0.0005, "loss": 0.8881, "step": 320300 }, { "epoch": 25.13, "learning_rate": 0.0005, "loss": 0.8957, "step": 320400 }, { "epoch": 25.13, "learning_rate": 0.0005, "loss": 0.8987, "step": 320500 }, { "epoch": 25.14, "learning_rate": 0.0005, "loss": 0.8994, "step": 320600 }, { "epoch": 25.15, "learning_rate": 0.0005, "loss": 0.9095, "step": 320700 }, { "epoch": 25.16, "learning_rate": 0.0005, "loss": 0.9188, "step": 320800 }, { "epoch": 25.16, "learning_rate": 0.0005, "loss": 0.9201, "step": 320900 }, { "epoch": 25.17, "learning_rate": 0.0005, "loss": 0.8953, "step": 321000 }, { "epoch": 25.18, "learning_rate": 0.0005, "loss": 0.8988, "step": 321100 }, { "epoch": 25.19, "learning_rate": 0.0005, "loss": 0.9004, "step": 321200 }, { "epoch": 25.2, "learning_rate": 0.0005, "loss": 0.8827, "step": 321300 }, { "epoch": 25.2, "learning_rate": 0.0005, "loss": 0.9016, "step": 321400 }, { "epoch": 25.21, "learning_rate": 0.0005, "loss": 0.8842, "step": 321500 }, { "epoch": 25.22, "learning_rate": 0.0005, "loss": 0.9222, "step": 321600 }, { "epoch": 25.23, "learning_rate": 0.0005, "loss": 0.9102, "step": 321700 }, { "epoch": 25.24, "learning_rate": 0.0005, "loss": 0.9217, "step": 321800 }, { "epoch": 25.24, "learning_rate": 0.0005, "loss": 0.9109, "step": 321900 }, { "epoch": 25.25, "learning_rate": 0.0005, "loss": 0.9193, "step": 322000 }, { "epoch": 25.26, "learning_rate": 0.0005, "loss": 0.9195, "step": 322100 }, { "epoch": 25.27, "learning_rate": 0.0005, "loss": 0.9034, "step": 322200 }, { "epoch": 25.27, "learning_rate": 0.0005, "loss": 0.9053, "step": 322300 }, { "epoch": 25.28, "learning_rate": 0.0005, "loss": 0.9163, "step": 322400 }, { "epoch": 25.29, "learning_rate": 0.0005, "loss": 0.9113, "step": 322500 }, { "epoch": 25.3, "learning_rate": 0.0005, "loss": 0.9067, "step": 322600 }, { "epoch": 25.31, "learning_rate": 0.0005, "loss": 0.9168, "step": 322700 }, { "epoch": 25.31, "learning_rate": 0.0005, "loss": 0.9271, "step": 322800 }, { "epoch": 25.32, "learning_rate": 0.0005, "loss": 0.9024, "step": 322900 }, { "epoch": 25.33, "learning_rate": 0.0005, "loss": 0.9126, "step": 323000 }, { "epoch": 25.34, "learning_rate": 0.0005, "loss": 0.9137, "step": 323100 }, { "epoch": 25.35, "learning_rate": 0.0005, "loss": 0.922, "step": 323200 }, { "epoch": 25.35, "learning_rate": 0.0005, "loss": 0.9191, "step": 323300 }, { "epoch": 25.36, "learning_rate": 0.0005, "loss": 0.9099, "step": 323400 }, { "epoch": 25.37, "learning_rate": 0.0005, "loss": 0.9366, "step": 323500 }, { "epoch": 25.38, "learning_rate": 0.0005, "loss": 0.9236, "step": 323600 }, { "epoch": 25.38, "learning_rate": 0.0005, "loss": 0.9264, "step": 323700 }, { "epoch": 25.39, "learning_rate": 0.0005, "loss": 0.9089, "step": 323800 }, { "epoch": 25.4, "learning_rate": 0.0005, "loss": 0.9362, "step": 323900 }, { "epoch": 25.41, "learning_rate": 0.0005, "loss": 0.9119, "step": 324000 }, { "epoch": 25.42, "learning_rate": 0.0005, "loss": 0.9206, "step": 324100 }, { "epoch": 25.42, "learning_rate": 0.0005, "loss": 0.9215, "step": 324200 }, { "epoch": 25.43, "learning_rate": 0.0005, "loss": 0.9125, "step": 324300 }, { "epoch": 25.44, "learning_rate": 0.0005, "loss": 0.9413, "step": 324400 }, { "epoch": 25.45, "learning_rate": 0.0005, "loss": 0.906, "step": 324500 }, { "epoch": 25.45, "learning_rate": 0.0005, "loss": 0.9407, "step": 324600 }, { "epoch": 25.46, "learning_rate": 0.0005, "loss": 0.9256, "step": 324700 }, { "epoch": 25.47, "learning_rate": 0.0005, "loss": 0.9124, "step": 324800 }, { "epoch": 25.48, "learning_rate": 0.0005, "loss": 0.9072, "step": 324900 }, { "epoch": 25.49, "learning_rate": 0.0005, "loss": 0.9083, "step": 325000 }, { "epoch": 25.49, "learning_rate": 0.0005, "loss": 0.9406, "step": 325100 }, { "epoch": 25.5, "learning_rate": 0.0005, "loss": 0.9231, "step": 325200 }, { "epoch": 25.51, "learning_rate": 0.0005, "loss": 0.9372, "step": 325300 }, { "epoch": 25.52, "learning_rate": 0.0005, "loss": 0.932, "step": 325400 }, { "epoch": 25.53, "learning_rate": 0.0005, "loss": 0.9148, "step": 325500 }, { "epoch": 25.53, "learning_rate": 0.0005, "loss": 0.9375, "step": 325600 }, { "epoch": 25.54, "learning_rate": 0.0005, "loss": 0.9312, "step": 325700 }, { "epoch": 25.55, "learning_rate": 0.0005, "loss": 0.9183, "step": 325800 }, { "epoch": 25.56, "learning_rate": 0.0005, "loss": 0.9026, "step": 325900 }, { "epoch": 25.56, "learning_rate": 0.0005, "loss": 0.9128, "step": 326000 }, { "epoch": 25.57, "learning_rate": 0.0005, "loss": 0.9456, "step": 326100 }, { "epoch": 25.58, "learning_rate": 0.0005, "loss": 0.9392, "step": 326200 }, { "epoch": 25.59, "learning_rate": 0.0005, "loss": 0.9533, "step": 326300 }, { "epoch": 25.6, "learning_rate": 0.0005, "loss": 0.9302, "step": 326400 }, { "epoch": 25.6, "learning_rate": 0.0005, "loss": 0.9388, "step": 326500 }, { "epoch": 25.61, "learning_rate": 0.0005, "loss": 0.9214, "step": 326600 }, { "epoch": 25.62, "learning_rate": 0.0005, "loss": 0.9304, "step": 326700 }, { "epoch": 25.63, "learning_rate": 0.0005, "loss": 0.9424, "step": 326800 }, { "epoch": 25.64, "learning_rate": 0.0005, "loss": 0.9408, "step": 326900 }, { "epoch": 25.64, "learning_rate": 0.0005, "loss": 0.9365, "step": 327000 }, { "epoch": 25.65, "learning_rate": 0.0005, "loss": 0.9276, "step": 327100 }, { "epoch": 25.66, "learning_rate": 0.0005, "loss": 0.9325, "step": 327200 }, { "epoch": 25.67, "learning_rate": 0.0005, "loss": 0.9497, "step": 327300 }, { "epoch": 25.67, "learning_rate": 0.0005, "loss": 0.9383, "step": 327400 }, { "epoch": 25.68, "learning_rate": 0.0005, "loss": 0.9388, "step": 327500 }, { "epoch": 25.69, "learning_rate": 0.0005, "loss": 0.9443, "step": 327600 }, { "epoch": 25.7, "learning_rate": 0.0005, "loss": 0.926, "step": 327700 }, { "epoch": 25.71, "learning_rate": 0.0005, "loss": 0.9586, "step": 327800 }, { "epoch": 25.71, "learning_rate": 0.0005, "loss": 0.9442, "step": 327900 }, { "epoch": 25.72, "learning_rate": 0.0005, "loss": 0.9529, "step": 328000 }, { "epoch": 25.73, "learning_rate": 0.0005, "loss": 0.9246, "step": 328100 }, { "epoch": 25.74, "learning_rate": 0.0005, "loss": 0.9344, "step": 328200 }, { "epoch": 25.74, "learning_rate": 0.0005, "loss": 0.9399, "step": 328300 }, { "epoch": 25.75, "learning_rate": 0.0005, "loss": 0.9506, "step": 328400 }, { "epoch": 25.76, "learning_rate": 0.0005, "loss": 0.9207, "step": 328500 }, { "epoch": 25.77, "learning_rate": 0.0005, "loss": 0.9567, "step": 328600 }, { "epoch": 25.78, "learning_rate": 0.0005, "loss": 0.9376, "step": 328700 }, { "epoch": 25.78, "learning_rate": 0.0005, "loss": 0.9391, "step": 328800 }, { "epoch": 25.79, "learning_rate": 0.0005, "loss": 0.9409, "step": 328900 }, { "epoch": 25.8, "learning_rate": 0.0005, "loss": 0.9482, "step": 329000 }, { "epoch": 25.81, "learning_rate": 0.0005, "loss": 0.9488, "step": 329100 }, { "epoch": 25.82, "learning_rate": 0.0005, "loss": 0.9563, "step": 329200 }, { "epoch": 25.82, "learning_rate": 0.0005, "loss": 0.939, "step": 329300 }, { "epoch": 25.83, "learning_rate": 0.0005, "loss": 0.9355, "step": 329400 }, { "epoch": 25.84, "learning_rate": 0.0005, "loss": 0.9403, "step": 329500 }, { "epoch": 25.85, "learning_rate": 0.0005, "loss": 0.9377, "step": 329600 }, { "epoch": 25.85, "learning_rate": 0.0005, "loss": 0.9481, "step": 329700 }, { "epoch": 25.86, "learning_rate": 0.0005, "loss": 0.9491, "step": 329800 }, { "epoch": 25.87, "learning_rate": 0.0005, "loss": 0.9412, "step": 329900 }, { "epoch": 25.88, "learning_rate": 0.0005, "loss": 0.9335, "step": 330000 }, { "epoch": 25.88, "eval_gen_len": 18.692769488831996, "eval_loss": 2.503575086593628, "eval_rouge1": 35.8974, "eval_rouge2": 14.958, "eval_rougeL": 29.7435, "eval_rougeLsum": 29.7515, "eval_runtime": 343.7355, "eval_samples_per_second": 32.953, "eval_steps_per_second": 2.06, "step": 330000 }, { "epoch": 25.89, "learning_rate": 0.0005, "loss": 0.9622, "step": 330100 }, { "epoch": 25.89, "learning_rate": 0.0005, "loss": 0.9253, "step": 330200 }, { "epoch": 25.9, "learning_rate": 0.0005, "loss": 0.9583, "step": 330300 }, { "epoch": 25.91, "learning_rate": 0.0005, "loss": 0.949, "step": 330400 }, { "epoch": 25.92, "learning_rate": 0.0005, "loss": 0.9608, "step": 330500 }, { "epoch": 25.93, "learning_rate": 0.0005, "loss": 0.9474, "step": 330600 }, { "epoch": 25.93, "learning_rate": 0.0005, "loss": 0.9446, "step": 330700 }, { "epoch": 25.94, "learning_rate": 0.0005, "loss": 0.9424, "step": 330800 }, { "epoch": 25.95, "learning_rate": 0.0005, "loss": 0.9293, "step": 330900 }, { "epoch": 25.96, "learning_rate": 0.0005, "loss": 0.9392, "step": 331000 }, { "epoch": 25.96, "learning_rate": 0.0005, "loss": 0.9519, "step": 331100 }, { "epoch": 25.97, "learning_rate": 0.0005, "loss": 0.9674, "step": 331200 }, { "epoch": 25.98, "learning_rate": 0.0005, "loss": 0.947, "step": 331300 }, { "epoch": 25.99, "learning_rate": 0.0005, "loss": 0.9739, "step": 331400 }, { "epoch": 26.0, "learning_rate": 0.0005, "loss": 0.9587, "step": 331500 }, { "epoch": 26.0, "learning_rate": 0.0005, "loss": 0.9201, "step": 331600 }, { "epoch": 26.01, "learning_rate": 0.0005, "loss": 0.8644, "step": 331700 }, { "epoch": 26.02, "learning_rate": 0.0005, "loss": 0.8581, "step": 331800 }, { "epoch": 26.03, "learning_rate": 0.0005, "loss": 0.8868, "step": 331900 }, { "epoch": 26.04, "learning_rate": 0.0005, "loss": 0.8868, "step": 332000 }, { "epoch": 26.04, "learning_rate": 0.0005, "loss": 0.8634, "step": 332100 }, { "epoch": 26.05, "learning_rate": 0.0005, "loss": 0.8793, "step": 332200 }, { "epoch": 26.06, "learning_rate": 0.0005, "loss": 0.8779, "step": 332300 }, { "epoch": 26.07, "learning_rate": 0.0005, "loss": 0.8793, "step": 332400 }, { "epoch": 26.07, "learning_rate": 0.0005, "loss": 0.8923, "step": 332500 }, { "epoch": 26.08, "learning_rate": 0.0005, "loss": 0.8774, "step": 332600 }, { "epoch": 26.09, "learning_rate": 0.0005, "loss": 0.8699, "step": 332700 }, { "epoch": 26.1, "learning_rate": 0.0005, "loss": 0.8708, "step": 332800 }, { "epoch": 26.11, "learning_rate": 0.0005, "loss": 0.8899, "step": 332900 }, { "epoch": 26.11, "learning_rate": 0.0005, "loss": 0.8779, "step": 333000 }, { "epoch": 26.12, "learning_rate": 0.0005, "loss": 0.8926, "step": 333100 }, { "epoch": 26.13, "learning_rate": 0.0005, "loss": 0.8917, "step": 333200 }, { "epoch": 26.14, "learning_rate": 0.0005, "loss": 0.9002, "step": 333300 }, { "epoch": 26.14, "learning_rate": 0.0005, "loss": 0.8864, "step": 333400 }, { "epoch": 26.15, "learning_rate": 0.0005, "loss": 0.8914, "step": 333500 }, { "epoch": 26.16, "learning_rate": 0.0005, "loss": 0.8801, "step": 333600 }, { "epoch": 26.17, "learning_rate": 0.0005, "loss": 0.8886, "step": 333700 }, { "epoch": 26.18, "learning_rate": 0.0005, "loss": 0.8956, "step": 333800 }, { "epoch": 26.18, "learning_rate": 0.0005, "loss": 0.8889, "step": 333900 }, { "epoch": 26.19, "learning_rate": 0.0005, "loss": 0.8937, "step": 334000 }, { "epoch": 26.2, "learning_rate": 0.0005, "loss": 0.8842, "step": 334100 }, { "epoch": 26.21, "learning_rate": 0.0005, "loss": 0.9001, "step": 334200 }, { "epoch": 26.22, "learning_rate": 0.0005, "loss": 0.8991, "step": 334300 }, { "epoch": 26.22, "learning_rate": 0.0005, "loss": 0.899, "step": 334400 }, { "epoch": 26.23, "learning_rate": 0.0005, "loss": 0.9057, "step": 334500 }, { "epoch": 26.24, "learning_rate": 0.0005, "loss": 0.8923, "step": 334600 }, { "epoch": 26.25, "learning_rate": 0.0005, "loss": 0.9065, "step": 334700 }, { "epoch": 26.25, "learning_rate": 0.0005, "loss": 0.889, "step": 334800 }, { "epoch": 26.26, "learning_rate": 0.0005, "loss": 0.8918, "step": 334900 }, { "epoch": 26.27, "learning_rate": 0.0005, "loss": 0.9046, "step": 335000 }, { "epoch": 26.28, "learning_rate": 0.0005, "loss": 0.8922, "step": 335100 }, { "epoch": 26.29, "learning_rate": 0.0005, "loss": 0.9002, "step": 335200 }, { "epoch": 26.29, "learning_rate": 0.0005, "loss": 0.8995, "step": 335300 }, { "epoch": 26.3, "learning_rate": 0.0005, "loss": 0.8953, "step": 335400 }, { "epoch": 26.31, "learning_rate": 0.0005, "loss": 0.9064, "step": 335500 }, { "epoch": 26.32, "learning_rate": 0.0005, "loss": 0.9207, "step": 335600 }, { "epoch": 26.33, "learning_rate": 0.0005, "loss": 0.8872, "step": 335700 }, { "epoch": 26.33, "learning_rate": 0.0005, "loss": 0.8966, "step": 335800 }, { "epoch": 26.34, "learning_rate": 0.0005, "loss": 0.9181, "step": 335900 }, { "epoch": 26.35, "learning_rate": 0.0005, "loss": 0.9238, "step": 336000 }, { "epoch": 26.36, "learning_rate": 0.0005, "loss": 0.9176, "step": 336100 }, { "epoch": 26.36, "learning_rate": 0.0005, "loss": 0.8847, "step": 336200 }, { "epoch": 26.37, "learning_rate": 0.0005, "loss": 0.9041, "step": 336300 }, { "epoch": 26.38, "learning_rate": 0.0005, "loss": 0.901, "step": 336400 }, { "epoch": 26.39, "learning_rate": 0.0005, "loss": 0.8913, "step": 336500 }, { "epoch": 26.4, "learning_rate": 0.0005, "loss": 0.916, "step": 336600 }, { "epoch": 26.4, "learning_rate": 0.0005, "loss": 0.9105, "step": 336700 }, { "epoch": 26.41, "learning_rate": 0.0005, "loss": 0.911, "step": 336800 }, { "epoch": 26.42, "learning_rate": 0.0005, "loss": 0.919, "step": 336900 }, { "epoch": 26.43, "learning_rate": 0.0005, "loss": 0.8979, "step": 337000 }, { "epoch": 26.44, "learning_rate": 0.0005, "loss": 0.9193, "step": 337100 }, { "epoch": 26.44, "learning_rate": 0.0005, "loss": 0.8913, "step": 337200 }, { "epoch": 26.45, "learning_rate": 0.0005, "loss": 0.9286, "step": 337300 }, { "epoch": 26.46, "learning_rate": 0.0005, "loss": 0.9046, "step": 337400 }, { "epoch": 26.47, "learning_rate": 0.0005, "loss": 0.9207, "step": 337500 }, { "epoch": 26.47, "learning_rate": 0.0005, "loss": 0.9098, "step": 337600 }, { "epoch": 26.48, "learning_rate": 0.0005, "loss": 0.9177, "step": 337700 }, { "epoch": 26.49, "learning_rate": 0.0005, "loss": 0.9051, "step": 337800 }, { "epoch": 26.5, "learning_rate": 0.0005, "loss": 0.9074, "step": 337900 }, { "epoch": 26.51, "learning_rate": 0.0005, "loss": 0.9157, "step": 338000 }, { "epoch": 26.51, "learning_rate": 0.0005, "loss": 0.9032, "step": 338100 }, { "epoch": 26.52, "learning_rate": 0.0005, "loss": 0.9159, "step": 338200 }, { "epoch": 26.53, "learning_rate": 0.0005, "loss": 0.9227, "step": 338300 }, { "epoch": 26.54, "learning_rate": 0.0005, "loss": 0.935, "step": 338400 }, { "epoch": 26.54, "learning_rate": 0.0005, "loss": 0.9078, "step": 338500 }, { "epoch": 26.55, "learning_rate": 0.0005, "loss": 0.9201, "step": 338600 }, { "epoch": 26.56, "learning_rate": 0.0005, "loss": 0.9367, "step": 338700 }, { "epoch": 26.57, "learning_rate": 0.0005, "loss": 0.9325, "step": 338800 }, { "epoch": 26.58, "learning_rate": 0.0005, "loss": 0.8986, "step": 338900 }, { "epoch": 26.58, "learning_rate": 0.0005, "loss": 0.9171, "step": 339000 }, { "epoch": 26.59, "learning_rate": 0.0005, "loss": 0.9199, "step": 339100 }, { "epoch": 26.6, "learning_rate": 0.0005, "loss": 0.8902, "step": 339200 }, { "epoch": 26.61, "learning_rate": 0.0005, "loss": 0.9426, "step": 339300 }, { "epoch": 26.62, "learning_rate": 0.0005, "loss": 0.9074, "step": 339400 }, { "epoch": 26.62, "learning_rate": 0.0005, "loss": 0.9046, "step": 339500 }, { "epoch": 26.63, "learning_rate": 0.0005, "loss": 0.9325, "step": 339600 }, { "epoch": 26.64, "learning_rate": 0.0005, "loss": 0.934, "step": 339700 }, { "epoch": 26.65, "learning_rate": 0.0005, "loss": 0.929, "step": 339800 }, { "epoch": 26.65, "learning_rate": 0.0005, "loss": 0.9233, "step": 339900 }, { "epoch": 26.66, "learning_rate": 0.0005, "loss": 0.9324, "step": 340000 }, { "epoch": 26.66, "eval_gen_len": 18.74794738236073, "eval_loss": 2.5247809886932373, "eval_rouge1": 35.8077, "eval_rouge2": 14.9114, "eval_rougeL": 29.627, "eval_rougeLsum": 29.6285, "eval_runtime": 344.6219, "eval_samples_per_second": 32.868, "eval_steps_per_second": 2.054, "step": 340000 }, { "epoch": 26.67, "learning_rate": 0.0005, "loss": 0.923, "step": 340100 }, { "epoch": 26.68, "learning_rate": 0.0005, "loss": 0.9162, "step": 340200 }, { "epoch": 26.69, "learning_rate": 0.0005, "loss": 0.9267, "step": 340300 }, { "epoch": 26.69, "learning_rate": 0.0005, "loss": 0.9172, "step": 340400 }, { "epoch": 26.7, "learning_rate": 0.0005, "loss": 0.9393, "step": 340500 }, { "epoch": 26.71, "learning_rate": 0.0005, "loss": 0.9338, "step": 340600 }, { "epoch": 26.72, "learning_rate": 0.0005, "loss": 0.944, "step": 340700 }, { "epoch": 26.73, "learning_rate": 0.0005, "loss": 0.9187, "step": 340800 }, { "epoch": 26.73, "learning_rate": 0.0005, "loss": 0.9293, "step": 340900 }, { "epoch": 26.74, "learning_rate": 0.0005, "loss": 0.9361, "step": 341000 }, { "epoch": 26.75, "learning_rate": 0.0005, "loss": 0.9298, "step": 341100 }, { "epoch": 26.76, "learning_rate": 0.0005, "loss": 0.9349, "step": 341200 }, { "epoch": 26.76, "learning_rate": 0.0005, "loss": 0.9216, "step": 341300 }, { "epoch": 26.77, "learning_rate": 0.0005, "loss": 0.9445, "step": 341400 }, { "epoch": 26.78, "learning_rate": 0.0005, "loss": 0.9376, "step": 341500 }, { "epoch": 26.79, "learning_rate": 0.0005, "loss": 0.9266, "step": 341600 }, { "epoch": 26.8, "learning_rate": 0.0005, "loss": 0.9311, "step": 341700 }, { "epoch": 26.8, "learning_rate": 0.0005, "loss": 0.9399, "step": 341800 }, { "epoch": 26.81, "learning_rate": 0.0005, "loss": 0.9059, "step": 341900 }, { "epoch": 26.82, "learning_rate": 0.0005, "loss": 0.9241, "step": 342000 }, { "epoch": 26.83, "learning_rate": 0.0005, "loss": 0.9276, "step": 342100 }, { "epoch": 26.84, "learning_rate": 0.0005, "loss": 0.9259, "step": 342200 }, { "epoch": 26.84, "learning_rate": 0.0005, "loss": 0.9262, "step": 342300 }, { "epoch": 26.85, "learning_rate": 0.0005, "loss": 0.9346, "step": 342400 }, { "epoch": 26.86, "learning_rate": 0.0005, "loss": 0.9406, "step": 342500 }, { "epoch": 26.87, "learning_rate": 0.0005, "loss": 0.9522, "step": 342600 }, { "epoch": 26.87, "learning_rate": 0.0005, "loss": 0.9023, "step": 342700 }, { "epoch": 26.88, "learning_rate": 0.0005, "loss": 0.9389, "step": 342800 }, { "epoch": 26.89, "learning_rate": 0.0005, "loss": 0.9364, "step": 342900 }, { "epoch": 26.9, "learning_rate": 0.0005, "loss": 0.9338, "step": 343000 }, { "epoch": 26.91, "learning_rate": 0.0005, "loss": 0.9258, "step": 343100 }, { "epoch": 26.91, "learning_rate": 0.0005, "loss": 0.9315, "step": 343200 }, { "epoch": 26.92, "learning_rate": 0.0005, "loss": 0.9448, "step": 343300 }, { "epoch": 26.93, "learning_rate": 0.0005, "loss": 0.9283, "step": 343400 }, { "epoch": 26.94, "learning_rate": 0.0005, "loss": 0.919, "step": 343500 }, { "epoch": 26.94, "learning_rate": 0.0005, "loss": 0.9423, "step": 343600 }, { "epoch": 26.95, "learning_rate": 0.0005, "loss": 0.9215, "step": 343700 }, { "epoch": 26.96, "learning_rate": 0.0005, "loss": 0.9399, "step": 343800 }, { "epoch": 26.97, "learning_rate": 0.0005, "loss": 0.9367, "step": 343900 }, { "epoch": 26.98, "learning_rate": 0.0005, "loss": 0.956, "step": 344000 }, { "epoch": 26.98, "learning_rate": 0.0005, "loss": 0.9347, "step": 344100 }, { "epoch": 26.99, "learning_rate": 0.0005, "loss": 0.9514, "step": 344200 }, { "epoch": 27.0, "learning_rate": 0.0005, "loss": 0.9499, "step": 344300 }, { "epoch": 27.01, "learning_rate": 0.0005, "loss": 0.8721, "step": 344400 }, { "epoch": 27.02, "learning_rate": 0.0005, "loss": 0.8669, "step": 344500 }, { "epoch": 27.02, "learning_rate": 0.0005, "loss": 0.8606, "step": 344600 }, { "epoch": 27.03, "learning_rate": 0.0005, "loss": 0.8757, "step": 344700 }, { "epoch": 27.04, "learning_rate": 0.0005, "loss": 0.8588, "step": 344800 }, { "epoch": 27.05, "learning_rate": 0.0005, "loss": 0.8642, "step": 344900 }, { "epoch": 27.05, "learning_rate": 0.0005, "loss": 0.8582, "step": 345000 }, { "epoch": 27.06, "learning_rate": 0.0005, "loss": 0.8578, "step": 345100 }, { "epoch": 27.07, "learning_rate": 0.0005, "loss": 0.871, "step": 345200 }, { "epoch": 27.08, "learning_rate": 0.0005, "loss": 0.8786, "step": 345300 }, { "epoch": 27.09, "learning_rate": 0.0005, "loss": 0.8583, "step": 345400 }, { "epoch": 27.09, "learning_rate": 0.0005, "loss": 0.8721, "step": 345500 }, { "epoch": 27.1, "learning_rate": 0.0005, "loss": 0.8601, "step": 345600 }, { "epoch": 27.11, "learning_rate": 0.0005, "loss": 0.8734, "step": 345700 }, { "epoch": 27.12, "learning_rate": 0.0005, "loss": 0.8737, "step": 345800 }, { "epoch": 27.13, "learning_rate": 0.0005, "loss": 0.8626, "step": 345900 }, { "epoch": 27.13, "learning_rate": 0.0005, "loss": 0.8671, "step": 346000 }, { "epoch": 27.14, "learning_rate": 0.0005, "loss": 0.8733, "step": 346100 }, { "epoch": 27.15, "learning_rate": 0.0005, "loss": 0.8733, "step": 346200 }, { "epoch": 27.16, "learning_rate": 0.0005, "loss": 0.8874, "step": 346300 }, { "epoch": 27.16, "learning_rate": 0.0005, "loss": 0.8659, "step": 346400 }, { "epoch": 27.17, "learning_rate": 0.0005, "loss": 0.8888, "step": 346500 }, { "epoch": 27.18, "learning_rate": 0.0005, "loss": 0.8842, "step": 346600 }, { "epoch": 27.19, "learning_rate": 0.0005, "loss": 0.8901, "step": 346700 }, { "epoch": 27.2, "learning_rate": 0.0005, "loss": 0.8829, "step": 346800 }, { "epoch": 27.2, "learning_rate": 0.0005, "loss": 0.8869, "step": 346900 }, { "epoch": 27.21, "learning_rate": 0.0005, "loss": 0.8792, "step": 347000 }, { "epoch": 27.22, "learning_rate": 0.0005, "loss": 0.8895, "step": 347100 }, { "epoch": 27.23, "learning_rate": 0.0005, "loss": 0.8756, "step": 347200 }, { "epoch": 27.23, "learning_rate": 0.0005, "loss": 0.8804, "step": 347300 }, { "epoch": 27.24, "learning_rate": 0.0005, "loss": 0.8754, "step": 347400 }, { "epoch": 27.25, "learning_rate": 0.0005, "loss": 0.8823, "step": 347500 }, { "epoch": 27.26, "learning_rate": 0.0005, "loss": 0.8957, "step": 347600 }, { "epoch": 27.27, "learning_rate": 0.0005, "loss": 0.8939, "step": 347700 }, { "epoch": 27.27, "learning_rate": 0.0005, "loss": 0.8829, "step": 347800 }, { "epoch": 27.28, "learning_rate": 0.0005, "loss": 0.8767, "step": 347900 }, { "epoch": 27.29, "learning_rate": 0.0005, "loss": 0.8877, "step": 348000 }, { "epoch": 27.3, "learning_rate": 0.0005, "loss": 0.8929, "step": 348100 }, { "epoch": 27.31, "learning_rate": 0.0005, "loss": 0.884, "step": 348200 }, { "epoch": 27.31, "learning_rate": 0.0005, "loss": 0.8813, "step": 348300 }, { "epoch": 27.32, "learning_rate": 0.0005, "loss": 0.8865, "step": 348400 }, { "epoch": 27.33, "learning_rate": 0.0005, "loss": 0.8928, "step": 348500 }, { "epoch": 27.34, "learning_rate": 0.0005, "loss": 0.8821, "step": 348600 }, { "epoch": 27.34, "learning_rate": 0.0005, "loss": 0.8857, "step": 348700 }, { "epoch": 27.35, "learning_rate": 0.0005, "loss": 0.8991, "step": 348800 }, { "epoch": 27.36, "learning_rate": 0.0005, "loss": 0.8767, "step": 348900 }, { "epoch": 27.37, "learning_rate": 0.0005, "loss": 0.892, "step": 349000 }, { "epoch": 27.38, "learning_rate": 0.0005, "loss": 0.9086, "step": 349100 }, { "epoch": 27.38, "learning_rate": 0.0005, "loss": 0.8922, "step": 349200 }, { "epoch": 27.39, "learning_rate": 0.0005, "loss": 0.8966, "step": 349300 }, { "epoch": 27.4, "learning_rate": 0.0005, "loss": 0.9145, "step": 349400 }, { "epoch": 27.41, "learning_rate": 0.0005, "loss": 0.8957, "step": 349500 }, { "epoch": 27.42, "learning_rate": 0.0005, "loss": 0.9082, "step": 349600 }, { "epoch": 27.42, "learning_rate": 0.0005, "loss": 0.8994, "step": 349700 }, { "epoch": 27.43, "learning_rate": 0.0005, "loss": 0.8818, "step": 349800 }, { "epoch": 27.44, "learning_rate": 0.0005, "loss": 0.8782, "step": 349900 }, { "epoch": 27.45, "learning_rate": 0.0005, "loss": 0.9131, "step": 350000 }, { "epoch": 27.45, "eval_gen_len": 18.75324446013949, "eval_loss": 2.5338451862335205, "eval_rouge1": 35.4764, "eval_rouge2": 14.6233, "eval_rougeL": 29.3295, "eval_rougeLsum": 29.3271, "eval_runtime": 342.5159, "eval_samples_per_second": 33.07, "eval_steps_per_second": 2.067, "step": 350000 }, { "epoch": 27.45, "learning_rate": 0.0005, "loss": 0.8981, "step": 350100 }, { "epoch": 27.46, "learning_rate": 0.0005, "loss": 0.8919, "step": 350200 }, { "epoch": 27.47, "learning_rate": 0.0005, "loss": 0.8962, "step": 350300 }, { "epoch": 27.48, "learning_rate": 0.0005, "loss": 0.9065, "step": 350400 }, { "epoch": 27.49, "learning_rate": 0.0005, "loss": 0.8839, "step": 350500 }, { "epoch": 27.49, "learning_rate": 0.0005, "loss": 0.9226, "step": 350600 }, { "epoch": 27.5, "learning_rate": 0.0005, "loss": 0.8949, "step": 350700 }, { "epoch": 27.51, "learning_rate": 0.0005, "loss": 0.9005, "step": 350800 }, { "epoch": 27.52, "learning_rate": 0.0005, "loss": 0.8977, "step": 350900 }, { "epoch": 27.53, "learning_rate": 0.0005, "loss": 0.908, "step": 351000 }, { "epoch": 27.53, "learning_rate": 0.0005, "loss": 0.8956, "step": 351100 }, { "epoch": 27.54, "learning_rate": 0.0005, "loss": 0.8906, "step": 351200 }, { "epoch": 27.55, "learning_rate": 0.0005, "loss": 0.8817, "step": 351300 }, { "epoch": 27.56, "learning_rate": 0.0005, "loss": 0.9132, "step": 351400 }, { "epoch": 27.56, "learning_rate": 0.0005, "loss": 0.9114, "step": 351500 }, { "epoch": 27.57, "learning_rate": 0.0005, "loss": 0.9161, "step": 351600 }, { "epoch": 27.58, "learning_rate": 0.0005, "loss": 0.9128, "step": 351700 }, { "epoch": 27.59, "learning_rate": 0.0005, "loss": 0.895, "step": 351800 }, { "epoch": 27.6, "learning_rate": 0.0005, "loss": 0.914, "step": 351900 }, { "epoch": 27.6, "learning_rate": 0.0005, "loss": 0.9082, "step": 352000 }, { "epoch": 27.61, "learning_rate": 0.0005, "loss": 0.9267, "step": 352100 }, { "epoch": 27.62, "learning_rate": 0.0005, "loss": 0.8968, "step": 352200 }, { "epoch": 27.63, "learning_rate": 0.0005, "loss": 0.9162, "step": 352300 }, { "epoch": 27.63, "learning_rate": 0.0005, "loss": 0.9174, "step": 352400 }, { "epoch": 27.64, "learning_rate": 0.0005, "loss": 0.9075, "step": 352500 }, { "epoch": 27.65, "learning_rate": 0.0005, "loss": 0.9275, "step": 352600 }, { "epoch": 27.66, "learning_rate": 0.0005, "loss": 0.9007, "step": 352700 }, { "epoch": 27.67, "learning_rate": 0.0005, "loss": 0.9094, "step": 352800 }, { "epoch": 27.67, "learning_rate": 0.0005, "loss": 0.8958, "step": 352900 }, { "epoch": 27.68, "learning_rate": 0.0005, "loss": 0.9158, "step": 353000 }, { "epoch": 27.69, "learning_rate": 0.0005, "loss": 0.9148, "step": 353100 }, { "epoch": 27.7, "learning_rate": 0.0005, "loss": 0.9048, "step": 353200 }, { "epoch": 27.71, "learning_rate": 0.0005, "loss": 0.9168, "step": 353300 }, { "epoch": 27.71, "learning_rate": 0.0005, "loss": 0.9027, "step": 353400 }, { "epoch": 27.72, "learning_rate": 0.0005, "loss": 0.8975, "step": 353500 }, { "epoch": 27.73, "learning_rate": 0.0005, "loss": 0.907, "step": 353600 }, { "epoch": 27.74, "learning_rate": 0.0005, "loss": 0.929, "step": 353700 }, { "epoch": 27.74, "learning_rate": 0.0005, "loss": 0.9171, "step": 353800 }, { "epoch": 27.75, "learning_rate": 0.0005, "loss": 0.9269, "step": 353900 }, { "epoch": 27.76, "learning_rate": 0.0005, "loss": 0.9198, "step": 354000 }, { "epoch": 27.77, "learning_rate": 0.0005, "loss": 0.9323, "step": 354100 }, { "epoch": 27.78, "learning_rate": 0.0005, "loss": 0.9231, "step": 354200 }, { "epoch": 27.78, "learning_rate": 0.0005, "loss": 0.9159, "step": 354300 }, { "epoch": 27.79, "learning_rate": 0.0005, "loss": 0.9269, "step": 354400 }, { "epoch": 27.8, "learning_rate": 0.0005, "loss": 0.906, "step": 354500 }, { "epoch": 27.81, "learning_rate": 0.0005, "loss": 0.9268, "step": 354600 }, { "epoch": 27.82, "learning_rate": 0.0005, "loss": 0.9293, "step": 354700 }, { "epoch": 27.82, "learning_rate": 0.0005, "loss": 0.9302, "step": 354800 }, { "epoch": 27.83, "learning_rate": 0.0005, "loss": 0.9257, "step": 354900 }, { "epoch": 27.84, "learning_rate": 0.0005, "loss": 0.926, "step": 355000 }, { "epoch": 27.85, "learning_rate": 0.0005, "loss": 0.9234, "step": 355100 }, { "epoch": 27.85, "learning_rate": 0.0005, "loss": 0.9299, "step": 355200 }, { "epoch": 27.86, "learning_rate": 0.0005, "loss": 0.9336, "step": 355300 }, { "epoch": 27.87, "learning_rate": 0.0005, "loss": 0.9407, "step": 355400 }, { "epoch": 27.88, "learning_rate": 0.0005, "loss": 0.9227, "step": 355500 }, { "epoch": 27.89, "learning_rate": 0.0005, "loss": 0.9281, "step": 355600 }, { "epoch": 27.89, "learning_rate": 0.0005, "loss": 0.902, "step": 355700 }, { "epoch": 27.9, "learning_rate": 0.0005, "loss": 0.909, "step": 355800 }, { "epoch": 27.91, "learning_rate": 0.0005, "loss": 0.9284, "step": 355900 }, { "epoch": 27.92, "learning_rate": 0.0005, "loss": 0.9104, "step": 356000 }, { "epoch": 27.93, "learning_rate": 0.0005, "loss": 0.9239, "step": 356100 }, { "epoch": 27.93, "learning_rate": 0.0005, "loss": 0.9364, "step": 356200 }, { "epoch": 27.94, "learning_rate": 0.0005, "loss": 0.9207, "step": 356300 }, { "epoch": 27.95, "learning_rate": 0.0005, "loss": 0.9208, "step": 356400 }, { "epoch": 27.96, "learning_rate": 0.0005, "loss": 0.9331, "step": 356500 }, { "epoch": 27.96, "learning_rate": 0.0005, "loss": 0.9394, "step": 356600 }, { "epoch": 27.97, "learning_rate": 0.0005, "loss": 0.9175, "step": 356700 }, { "epoch": 27.98, "learning_rate": 0.0005, "loss": 0.9379, "step": 356800 }, { "epoch": 27.99, "learning_rate": 0.0005, "loss": 0.9293, "step": 356900 }, { "epoch": 28.0, "learning_rate": 0.0005, "loss": 0.9164, "step": 357000 }, { "epoch": 28.0, "learning_rate": 0.0005, "loss": 0.8842, "step": 357100 }, { "epoch": 28.01, "learning_rate": 0.0005, "loss": 0.8249, "step": 357200 }, { "epoch": 28.02, "learning_rate": 0.0005, "loss": 0.8539, "step": 357300 }, { "epoch": 28.03, "learning_rate": 0.0005, "loss": 0.8682, "step": 357400 }, { "epoch": 28.03, "learning_rate": 0.0005, "loss": 0.8495, "step": 357500 }, { "epoch": 28.04, "learning_rate": 0.0005, "loss": 0.8562, "step": 357600 }, { "epoch": 28.05, "learning_rate": 0.0005, "loss": 0.8425, "step": 357700 }, { "epoch": 28.06, "learning_rate": 0.0005, "loss": 0.8658, "step": 357800 }, { "epoch": 28.07, "learning_rate": 0.0005, "loss": 0.8537, "step": 357900 }, { "epoch": 28.07, "learning_rate": 0.0005, "loss": 0.8539, "step": 358000 }, { "epoch": 28.08, "learning_rate": 0.0005, "loss": 0.8655, "step": 358100 }, { "epoch": 28.09, "learning_rate": 0.0005, "loss": 0.8737, "step": 358200 }, { "epoch": 28.1, "learning_rate": 0.0005, "loss": 0.8596, "step": 358300 }, { "epoch": 28.11, "learning_rate": 0.0005, "loss": 0.8669, "step": 358400 }, { "epoch": 28.11, "learning_rate": 0.0005, "loss": 0.8513, "step": 358500 }, { "epoch": 28.12, "learning_rate": 0.0005, "loss": 0.8544, "step": 358600 }, { "epoch": 28.13, "learning_rate": 0.0005, "loss": 0.8656, "step": 358700 }, { "epoch": 28.14, "learning_rate": 0.0005, "loss": 0.8528, "step": 358800 }, { "epoch": 28.14, "learning_rate": 0.0005, "loss": 0.856, "step": 358900 }, { "epoch": 28.15, "learning_rate": 0.0005, "loss": 0.8561, "step": 359000 }, { "epoch": 28.16, "learning_rate": 0.0005, "loss": 0.8437, "step": 359100 }, { "epoch": 28.17, "learning_rate": 0.0005, "loss": 0.871, "step": 359200 }, { "epoch": 28.18, "learning_rate": 0.0005, "loss": 0.8716, "step": 359300 }, { "epoch": 28.18, "learning_rate": 0.0005, "loss": 0.8601, "step": 359400 }, { "epoch": 28.19, "learning_rate": 0.0005, "loss": 0.8735, "step": 359500 }, { "epoch": 28.2, "learning_rate": 0.0005, "loss": 0.8689, "step": 359600 }, { "epoch": 28.21, "learning_rate": 0.0005, "loss": 0.8721, "step": 359700 }, { "epoch": 28.22, "learning_rate": 0.0005, "loss": 0.8717, "step": 359800 }, { "epoch": 28.22, "learning_rate": 0.0005, "loss": 0.8497, "step": 359900 }, { "epoch": 28.23, "learning_rate": 0.0005, "loss": 0.8805, "step": 360000 }, { "epoch": 28.23, "eval_gen_len": 18.76789970866072, "eval_loss": 2.5372776985168457, "eval_rouge1": 35.7063, "eval_rouge2": 14.7166, "eval_rougeL": 29.4219, "eval_rougeLsum": 29.4206, "eval_runtime": 342.7529, "eval_samples_per_second": 33.047, "eval_steps_per_second": 2.066, "step": 360000 }, { "epoch": 28.24, "learning_rate": 0.0005, "loss": 0.8709, "step": 360100 }, { "epoch": 28.25, "learning_rate": 0.0005, "loss": 0.8777, "step": 360200 }, { "epoch": 28.25, "learning_rate": 0.0005, "loss": 0.8792, "step": 360300 }, { "epoch": 28.26, "learning_rate": 0.0005, "loss": 0.8701, "step": 360400 }, { "epoch": 28.27, "learning_rate": 0.0005, "loss": 0.8977, "step": 360500 }, { "epoch": 28.28, "learning_rate": 0.0005, "loss": 0.8732, "step": 360600 }, { "epoch": 28.29, "learning_rate": 0.0005, "loss": 0.8741, "step": 360700 }, { "epoch": 28.29, "learning_rate": 0.0005, "loss": 0.8822, "step": 360800 }, { "epoch": 28.3, "learning_rate": 0.0005, "loss": 0.8917, "step": 360900 }, { "epoch": 28.31, "learning_rate": 0.0005, "loss": 0.8797, "step": 361000 }, { "epoch": 28.32, "learning_rate": 0.0005, "loss": 0.8814, "step": 361100 }, { "epoch": 28.32, "learning_rate": 0.0005, "loss": 0.8797, "step": 361200 }, { "epoch": 28.33, "learning_rate": 0.0005, "loss": 0.8891, "step": 361300 }, { "epoch": 28.34, "learning_rate": 0.0005, "loss": 0.8845, "step": 361400 }, { "epoch": 28.35, "learning_rate": 0.0005, "loss": 0.8806, "step": 361500 }, { "epoch": 28.36, "learning_rate": 0.0005, "loss": 0.88, "step": 361600 }, { "epoch": 28.36, "learning_rate": 0.0005, "loss": 0.8906, "step": 361700 }, { "epoch": 28.37, "learning_rate": 0.0005, "loss": 0.8814, "step": 361800 }, { "epoch": 28.38, "learning_rate": 0.0005, "loss": 0.8719, "step": 361900 }, { "epoch": 28.39, "learning_rate": 0.0005, "loss": 0.8853, "step": 362000 }, { "epoch": 28.4, "learning_rate": 0.0005, "loss": 0.8691, "step": 362100 }, { "epoch": 28.4, "learning_rate": 0.0005, "loss": 0.8678, "step": 362200 }, { "epoch": 28.41, "learning_rate": 0.0005, "loss": 0.8867, "step": 362300 }, { "epoch": 28.42, "learning_rate": 0.0005, "loss": 0.898, "step": 362400 }, { "epoch": 28.43, "learning_rate": 0.0005, "loss": 0.9137, "step": 362500 }, { "epoch": 28.43, "learning_rate": 0.0005, "loss": 0.8931, "step": 362600 }, { "epoch": 28.44, "learning_rate": 0.0005, "loss": 0.8859, "step": 362700 }, { "epoch": 28.45, "learning_rate": 0.0005, "loss": 0.9005, "step": 362800 }, { "epoch": 28.46, "learning_rate": 0.0005, "loss": 0.8763, "step": 362900 }, { "epoch": 28.47, "learning_rate": 0.0005, "loss": 0.8683, "step": 363000 }, { "epoch": 28.47, "learning_rate": 0.0005, "loss": 0.8866, "step": 363100 }, { "epoch": 28.48, "learning_rate": 0.0005, "loss": 0.8963, "step": 363200 }, { "epoch": 28.49, "learning_rate": 0.0005, "loss": 0.8846, "step": 363300 }, { "epoch": 28.5, "learning_rate": 0.0005, "loss": 0.886, "step": 363400 }, { "epoch": 28.51, "learning_rate": 0.0005, "loss": 0.8921, "step": 363500 }, { "epoch": 28.51, "learning_rate": 0.0005, "loss": 0.898, "step": 363600 }, { "epoch": 28.52, "learning_rate": 0.0005, "loss": 0.8784, "step": 363700 }, { "epoch": 28.53, "learning_rate": 0.0005, "loss": 0.8979, "step": 363800 }, { "epoch": 28.54, "learning_rate": 0.0005, "loss": 0.8878, "step": 363900 }, { "epoch": 28.54, "learning_rate": 0.0005, "loss": 0.8999, "step": 364000 }, { "epoch": 28.55, "learning_rate": 0.0005, "loss": 0.8778, "step": 364100 }, { "epoch": 28.56, "learning_rate": 0.0005, "loss": 0.8856, "step": 364200 }, { "epoch": 28.57, "learning_rate": 0.0005, "loss": 0.8938, "step": 364300 }, { "epoch": 28.58, "learning_rate": 0.0005, "loss": 0.9011, "step": 364400 }, { "epoch": 28.58, "learning_rate": 0.0005, "loss": 0.8987, "step": 364500 }, { "epoch": 28.59, "learning_rate": 0.0005, "loss": 0.9002, "step": 364600 }, { "epoch": 28.6, "learning_rate": 0.0005, "loss": 0.9035, "step": 364700 }, { "epoch": 28.61, "learning_rate": 0.0005, "loss": 0.8931, "step": 364800 }, { "epoch": 28.62, "learning_rate": 0.0005, "loss": 0.8884, "step": 364900 }, { "epoch": 28.62, "learning_rate": 0.0005, "loss": 0.8985, "step": 365000 }, { "epoch": 28.63, "learning_rate": 0.0005, "loss": 0.8982, "step": 365100 }, { "epoch": 28.64, "learning_rate": 0.0005, "loss": 0.8899, "step": 365200 }, { "epoch": 28.65, "learning_rate": 0.0005, "loss": 0.8943, "step": 365300 }, { "epoch": 28.65, "learning_rate": 0.0005, "loss": 0.9133, "step": 365400 }, { "epoch": 28.66, "learning_rate": 0.0005, "loss": 0.8853, "step": 365500 }, { "epoch": 28.67, "learning_rate": 0.0005, "loss": 0.9038, "step": 365600 }, { "epoch": 28.68, "learning_rate": 0.0005, "loss": 0.8863, "step": 365700 }, { "epoch": 28.69, "learning_rate": 0.0005, "loss": 0.9016, "step": 365800 }, { "epoch": 28.69, "learning_rate": 0.0005, "loss": 0.8923, "step": 365900 }, { "epoch": 28.7, "learning_rate": 0.0005, "loss": 0.9003, "step": 366000 }, { "epoch": 28.71, "learning_rate": 0.0005, "loss": 0.886, "step": 366100 }, { "epoch": 28.72, "learning_rate": 0.0005, "loss": 0.9096, "step": 366200 }, { "epoch": 28.72, "learning_rate": 0.0005, "loss": 0.9223, "step": 366300 }, { "epoch": 28.73, "learning_rate": 0.0005, "loss": 0.9097, "step": 366400 }, { "epoch": 28.74, "learning_rate": 0.0005, "loss": 0.8826, "step": 366500 }, { "epoch": 28.75, "learning_rate": 0.0005, "loss": 0.9088, "step": 366600 }, { "epoch": 28.76, "learning_rate": 0.0005, "loss": 0.9049, "step": 366700 }, { "epoch": 28.76, "learning_rate": 0.0005, "loss": 0.9141, "step": 366800 }, { "epoch": 28.77, "learning_rate": 0.0005, "loss": 0.9033, "step": 366900 }, { "epoch": 28.78, "learning_rate": 0.0005, "loss": 0.9207, "step": 367000 }, { "epoch": 28.79, "learning_rate": 0.0005, "loss": 0.9334, "step": 367100 }, { "epoch": 28.8, "learning_rate": 0.0005, "loss": 0.9036, "step": 367200 }, { "epoch": 28.8, "learning_rate": 0.0005, "loss": 0.9075, "step": 367300 }, { "epoch": 28.81, "learning_rate": 0.0005, "loss": 0.9105, "step": 367400 }, { "epoch": 28.82, "learning_rate": 0.0005, "loss": 0.8969, "step": 367500 }, { "epoch": 28.83, "learning_rate": 0.0005, "loss": 0.883, "step": 367600 }, { "epoch": 28.83, "learning_rate": 0.0005, "loss": 0.9088, "step": 367700 }, { "epoch": 28.84, "learning_rate": 0.0005, "loss": 0.9039, "step": 367800 }, { "epoch": 28.85, "learning_rate": 0.0005, "loss": 0.8916, "step": 367900 }, { "epoch": 28.86, "learning_rate": 0.0005, "loss": 0.9001, "step": 368000 }, { "epoch": 28.87, "learning_rate": 0.0005, "loss": 0.921, "step": 368100 }, { "epoch": 28.87, "learning_rate": 0.0005, "loss": 0.9076, "step": 368200 }, { "epoch": 28.88, "learning_rate": 0.0005, "loss": 0.8996, "step": 368300 }, { "epoch": 28.89, "learning_rate": 0.0005, "loss": 0.8978, "step": 368400 }, { "epoch": 28.9, "learning_rate": 0.0005, "loss": 0.9024, "step": 368500 }, { "epoch": 28.91, "learning_rate": 0.0005, "loss": 0.9298, "step": 368600 }, { "epoch": 28.91, "learning_rate": 0.0005, "loss": 0.9093, "step": 368700 }, { "epoch": 28.92, "learning_rate": 0.0005, "loss": 0.9188, "step": 368800 }, { "epoch": 28.93, "learning_rate": 0.0005, "loss": 0.8863, "step": 368900 }, { "epoch": 28.94, "learning_rate": 0.0005, "loss": 0.9089, "step": 369000 }, { "epoch": 28.94, "learning_rate": 0.0005, "loss": 0.9015, "step": 369100 }, { "epoch": 28.95, "learning_rate": 0.0005, "loss": 0.9233, "step": 369200 }, { "epoch": 28.96, "learning_rate": 0.0005, "loss": 0.8923, "step": 369300 }, { "epoch": 28.97, "learning_rate": 0.0005, "loss": 0.9033, "step": 369400 }, { "epoch": 28.98, "learning_rate": 0.0005, "loss": 0.9084, "step": 369500 }, { "epoch": 28.98, "learning_rate": 0.0005, "loss": 0.9086, "step": 369600 }, { "epoch": 28.99, "learning_rate": 0.0005, "loss": 0.9156, "step": 369700 }, { "epoch": 29.0, "learning_rate": 0.0005, "loss": 0.9118, "step": 369800 }, { "epoch": 29.01, "learning_rate": 0.0005, "loss": 0.8237, "step": 369900 }, { "epoch": 29.02, "learning_rate": 0.0005, "loss": 0.8535, "step": 370000 }, { "epoch": 29.02, "eval_gen_len": 18.747417674582856, "eval_loss": 2.6049208641052246, "eval_rouge1": 35.5556, "eval_rouge2": 14.7079, "eval_rougeL": 29.4102, "eval_rougeLsum": 29.4123, "eval_runtime": 342.2048, "eval_samples_per_second": 33.1, "eval_steps_per_second": 2.069, "step": 370000 }, { "epoch": 29.02, "learning_rate": 0.0005, "loss": 0.8266, "step": 370100 }, { "epoch": 29.03, "learning_rate": 0.0005, "loss": 0.8482, "step": 370200 }, { "epoch": 29.04, "learning_rate": 0.0005, "loss": 0.8397, "step": 370300 }, { "epoch": 29.05, "learning_rate": 0.0005, "loss": 0.8545, "step": 370400 }, { "epoch": 29.05, "learning_rate": 0.0005, "loss": 0.8445, "step": 370500 }, { "epoch": 29.06, "learning_rate": 0.0005, "loss": 0.8375, "step": 370600 }, { "epoch": 29.07, "learning_rate": 0.0005, "loss": 0.8407, "step": 370700 }, { "epoch": 29.08, "learning_rate": 0.0005, "loss": 0.8441, "step": 370800 }, { "epoch": 29.09, "learning_rate": 0.0005, "loss": 0.8493, "step": 370900 }, { "epoch": 29.09, "learning_rate": 0.0005, "loss": 0.8368, "step": 371000 }, { "epoch": 29.1, "learning_rate": 0.0005, "loss": 0.8442, "step": 371100 }, { "epoch": 29.11, "learning_rate": 0.0005, "loss": 0.8446, "step": 371200 }, { "epoch": 29.12, "learning_rate": 0.0005, "loss": 0.8462, "step": 371300 }, { "epoch": 29.12, "learning_rate": 0.0005, "loss": 0.8461, "step": 371400 }, { "epoch": 29.13, "learning_rate": 0.0005, "loss": 0.8486, "step": 371500 }, { "epoch": 29.14, "learning_rate": 0.0005, "loss": 0.8603, "step": 371600 }, { "epoch": 29.15, "learning_rate": 0.0005, "loss": 0.8696, "step": 371700 }, { "epoch": 29.16, "learning_rate": 0.0005, "loss": 0.8638, "step": 371800 }, { "epoch": 29.16, "learning_rate": 0.0005, "loss": 0.8647, "step": 371900 }, { "epoch": 29.17, "learning_rate": 0.0005, "loss": 0.8609, "step": 372000 }, { "epoch": 29.18, "learning_rate": 0.0005, "loss": 0.852, "step": 372100 }, { "epoch": 29.19, "learning_rate": 0.0005, "loss": 0.8582, "step": 372200 }, { "epoch": 29.2, "learning_rate": 0.0005, "loss": 0.8579, "step": 372300 }, { "epoch": 29.2, "learning_rate": 0.0005, "loss": 0.834, "step": 372400 }, { "epoch": 29.21, "learning_rate": 0.0005, "loss": 0.8731, "step": 372500 }, { "epoch": 29.22, "learning_rate": 0.0005, "loss": 0.8474, "step": 372600 }, { "epoch": 29.23, "learning_rate": 0.0005, "loss": 0.8539, "step": 372700 }, { "epoch": 29.23, "learning_rate": 0.0005, "loss": 0.8584, "step": 372800 }, { "epoch": 29.24, "learning_rate": 0.0005, "loss": 0.8567, "step": 372900 }, { "epoch": 29.25, "learning_rate": 0.0005, "loss": 0.8579, "step": 373000 }, { "epoch": 29.26, "learning_rate": 0.0005, "loss": 0.8576, "step": 373100 }, { "epoch": 29.27, "learning_rate": 0.0005, "loss": 0.8628, "step": 373200 }, { "epoch": 29.27, "learning_rate": 0.0005, "loss": 0.8639, "step": 373300 }, { "epoch": 29.28, "learning_rate": 0.0005, "loss": 0.8677, "step": 373400 }, { "epoch": 29.29, "learning_rate": 0.0005, "loss": 0.8661, "step": 373500 }, { "epoch": 29.3, "learning_rate": 0.0005, "loss": 0.8609, "step": 373600 }, { "epoch": 29.31, "learning_rate": 0.0005, "loss": 0.8804, "step": 373700 }, { "epoch": 29.31, "learning_rate": 0.0005, "loss": 0.8659, "step": 373800 }, { "epoch": 29.32, "learning_rate": 0.0005, "loss": 0.8627, "step": 373900 }, { "epoch": 29.33, "learning_rate": 0.0005, "loss": 0.8622, "step": 374000 }, { "epoch": 29.34, "learning_rate": 0.0005, "loss": 0.873, "step": 374100 }, { "epoch": 29.34, "learning_rate": 0.0005, "loss": 0.8749, "step": 374200 }, { "epoch": 29.35, "learning_rate": 0.0005, "loss": 0.8654, "step": 374300 }, { "epoch": 29.36, "learning_rate": 0.0005, "loss": 0.8854, "step": 374400 }, { "epoch": 29.37, "learning_rate": 0.0005, "loss": 0.861, "step": 374500 }, { "epoch": 29.38, "learning_rate": 0.0005, "loss": 0.8615, "step": 374600 }, { "epoch": 29.38, "learning_rate": 0.0005, "loss": 0.8702, "step": 374700 }, { "epoch": 29.39, "learning_rate": 0.0005, "loss": 0.8605, "step": 374800 }, { "epoch": 29.4, "learning_rate": 0.0005, "loss": 0.8555, "step": 374900 }, { "epoch": 29.41, "learning_rate": 0.0005, "loss": 0.8733, "step": 375000 }, { "epoch": 29.41, "learning_rate": 0.0005, "loss": 0.8929, "step": 375100 }, { "epoch": 29.42, "learning_rate": 0.0005, "loss": 0.8697, "step": 375200 }, { "epoch": 29.43, "learning_rate": 0.0005, "loss": 0.8687, "step": 375300 }, { "epoch": 29.44, "learning_rate": 0.0005, "loss": 0.8573, "step": 375400 }, { "epoch": 29.45, "learning_rate": 0.0005, "loss": 0.8935, "step": 375500 }, { "epoch": 29.45, "learning_rate": 0.0005, "loss": 0.861, "step": 375600 }, { "epoch": 29.46, "learning_rate": 0.0005, "loss": 0.8742, "step": 375700 }, { "epoch": 29.47, "learning_rate": 0.0005, "loss": 0.8649, "step": 375800 }, { "epoch": 29.48, "learning_rate": 0.0005, "loss": 0.8776, "step": 375900 }, { "epoch": 29.49, "learning_rate": 0.0005, "loss": 0.8897, "step": 376000 }, { "epoch": 29.49, "learning_rate": 0.0005, "loss": 0.8972, "step": 376100 }, { "epoch": 29.5, "learning_rate": 0.0005, "loss": 0.8943, "step": 376200 }, { "epoch": 29.51, "learning_rate": 0.0005, "loss": 0.8796, "step": 376300 }, { "epoch": 29.52, "learning_rate": 0.0005, "loss": 0.8884, "step": 376400 }, { "epoch": 29.52, "learning_rate": 0.0005, "loss": 0.8724, "step": 376500 }, { "epoch": 29.53, "learning_rate": 0.0005, "loss": 0.8796, "step": 376600 }, { "epoch": 29.54, "learning_rate": 0.0005, "loss": 0.873, "step": 376700 }, { "epoch": 29.55, "learning_rate": 0.0005, "loss": 0.885, "step": 376800 }, { "epoch": 29.56, "learning_rate": 0.0005, "loss": 0.864, "step": 376900 }, { "epoch": 29.56, "learning_rate": 0.0005, "loss": 0.8937, "step": 377000 }, { "epoch": 29.57, "learning_rate": 0.0005, "loss": 0.8861, "step": 377100 }, { "epoch": 29.58, "learning_rate": 0.0005, "loss": 0.882, "step": 377200 }, { "epoch": 29.59, "learning_rate": 0.0005, "loss": 0.8777, "step": 377300 }, { "epoch": 29.6, "learning_rate": 0.0005, "loss": 0.8981, "step": 377400 }, { "epoch": 29.6, "learning_rate": 0.0005, "loss": 0.8643, "step": 377500 }, { "epoch": 29.61, "learning_rate": 0.0005, "loss": 0.8898, "step": 377600 }, { "epoch": 29.62, "learning_rate": 0.0005, "loss": 0.8839, "step": 377700 }, { "epoch": 29.63, "learning_rate": 0.0005, "loss": 0.9064, "step": 377800 }, { "epoch": 29.63, "learning_rate": 0.0005, "loss": 0.8864, "step": 377900 }, { "epoch": 29.64, "learning_rate": 0.0005, "loss": 0.8682, "step": 378000 }, { "epoch": 29.65, "learning_rate": 0.0005, "loss": 0.9041, "step": 378100 }, { "epoch": 29.66, "learning_rate": 0.0005, "loss": 0.8807, "step": 378200 }, { "epoch": 29.67, "learning_rate": 0.0005, "loss": 0.8757, "step": 378300 }, { "epoch": 29.67, "learning_rate": 0.0005, "loss": 0.8837, "step": 378400 }, { "epoch": 29.68, "learning_rate": 0.0005, "loss": 0.8701, "step": 378500 }, { "epoch": 29.69, "learning_rate": 0.0005, "loss": 0.8868, "step": 378600 }, { "epoch": 29.7, "learning_rate": 0.0005, "loss": 0.8875, "step": 378700 }, { "epoch": 29.71, "learning_rate": 0.0005, "loss": 0.8934, "step": 378800 }, { "epoch": 29.71, "learning_rate": 0.0005, "loss": 0.8924, "step": 378900 }, { "epoch": 29.72, "learning_rate": 0.0005, "loss": 0.8787, "step": 379000 }, { "epoch": 29.73, "learning_rate": 0.0005, "loss": 0.878, "step": 379100 }, { "epoch": 29.74, "learning_rate": 0.0005, "loss": 0.9067, "step": 379200 }, { "epoch": 29.74, "learning_rate": 0.0005, "loss": 0.8977, "step": 379300 }, { "epoch": 29.75, "learning_rate": 0.0005, "loss": 0.8806, "step": 379400 }, { "epoch": 29.76, "learning_rate": 0.0005, "loss": 0.8896, "step": 379500 }, { "epoch": 29.77, "learning_rate": 0.0005, "loss": 0.8944, "step": 379600 }, { "epoch": 29.78, "learning_rate": 0.0005, "loss": 0.9, "step": 379700 }, { "epoch": 29.78, "learning_rate": 0.0005, "loss": 0.8928, "step": 379800 }, { "epoch": 29.79, "learning_rate": 0.0005, "loss": 0.8904, "step": 379900 }, { "epoch": 29.8, "learning_rate": 0.0005, "loss": 0.8929, "step": 380000 }, { "epoch": 29.8, "eval_gen_len": 18.703451929019156, "eval_loss": 2.553347110748291, "eval_rouge1": 35.8775, "eval_rouge2": 14.8452, "eval_rougeL": 29.6381, "eval_rougeLsum": 29.6405, "eval_runtime": 344.0885, "eval_samples_per_second": 32.919, "eval_steps_per_second": 2.058, "step": 380000 }, { "epoch": 29.81, "learning_rate": 0.0005, "loss": 0.9025, "step": 380100 }, { "epoch": 29.81, "learning_rate": 0.0005, "loss": 0.8897, "step": 380200 }, { "epoch": 29.82, "learning_rate": 0.0005, "loss": 0.8934, "step": 380300 }, { "epoch": 29.83, "learning_rate": 0.0005, "loss": 0.8792, "step": 380400 }, { "epoch": 29.84, "learning_rate": 0.0005, "loss": 0.9071, "step": 380500 }, { "epoch": 29.85, "learning_rate": 0.0005, "loss": 0.8875, "step": 380600 }, { "epoch": 29.85, "learning_rate": 0.0005, "loss": 0.8598, "step": 380700 }, { "epoch": 29.86, "learning_rate": 0.0005, "loss": 0.9036, "step": 380800 }, { "epoch": 29.87, "learning_rate": 0.0005, "loss": 0.8997, "step": 380900 }, { "epoch": 29.88, "learning_rate": 0.0005, "loss": 0.9014, "step": 381000 }, { "epoch": 29.89, "learning_rate": 0.0005, "loss": 0.8798, "step": 381100 }, { "epoch": 29.89, "learning_rate": 0.0005, "loss": 0.8978, "step": 381200 }, { "epoch": 29.9, "learning_rate": 0.0005, "loss": 0.8987, "step": 381300 }, { "epoch": 29.91, "learning_rate": 0.0005, "loss": 0.877, "step": 381400 }, { "epoch": 29.92, "learning_rate": 0.0005, "loss": 0.8892, "step": 381500 }, { "epoch": 29.92, "learning_rate": 0.0005, "loss": 0.9068, "step": 381600 }, { "epoch": 29.93, "learning_rate": 0.0005, "loss": 0.8889, "step": 381700 }, { "epoch": 29.94, "learning_rate": 0.0005, "loss": 0.8967, "step": 381800 }, { "epoch": 29.95, "learning_rate": 0.0005, "loss": 0.8883, "step": 381900 }, { "epoch": 29.96, "learning_rate": 0.0005, "loss": 0.9073, "step": 382000 }, { "epoch": 29.96, "learning_rate": 0.0005, "loss": 0.9061, "step": 382100 }, { "epoch": 29.97, "learning_rate": 0.0005, "loss": 0.8932, "step": 382200 }, { "epoch": 29.98, "learning_rate": 0.0005, "loss": 0.9205, "step": 382300 }, { "epoch": 29.99, "learning_rate": 0.0005, "loss": 0.9188, "step": 382400 }, { "epoch": 30.0, "learning_rate": 0.0005, "loss": 0.9018, "step": 382500 }, { "epoch": 30.0, "learning_rate": 0.0005, "loss": 0.8755, "step": 382600 }, { "epoch": 30.01, "learning_rate": 0.0005, "loss": 0.8238, "step": 382700 }, { "epoch": 30.02, "learning_rate": 0.0005, "loss": 0.8308, "step": 382800 }, { "epoch": 30.03, "learning_rate": 0.0005, "loss": 0.8384, "step": 382900 }, { "epoch": 30.03, "learning_rate": 0.0005, "loss": 0.8284, "step": 383000 }, { "epoch": 30.04, "learning_rate": 0.0005, "loss": 0.8354, "step": 383100 }, { "epoch": 30.05, "learning_rate": 0.0005, "loss": 0.8276, "step": 383200 }, { "epoch": 30.06, "learning_rate": 0.0005, "loss": 0.8413, "step": 383300 }, { "epoch": 30.07, "learning_rate": 0.0005, "loss": 0.8349, "step": 383400 }, { "epoch": 30.07, "learning_rate": 0.0005, "loss": 0.8465, "step": 383500 }, { "epoch": 30.08, "learning_rate": 0.0005, "loss": 0.8471, "step": 383600 }, { "epoch": 30.09, "learning_rate": 0.0005, "loss": 0.851, "step": 383700 }, { "epoch": 30.1, "learning_rate": 0.0005, "loss": 0.8236, "step": 383800 }, { "epoch": 30.11, "learning_rate": 0.0005, "loss": 0.8438, "step": 383900 }, { "epoch": 30.11, "learning_rate": 0.0005, "loss": 0.8308, "step": 384000 }, { "epoch": 30.12, "learning_rate": 0.0005, "loss": 0.8391, "step": 384100 }, { "epoch": 30.13, "learning_rate": 0.0005, "loss": 0.8476, "step": 384200 }, { "epoch": 30.14, "learning_rate": 0.0005, "loss": 0.8446, "step": 384300 }, { "epoch": 30.14, "learning_rate": 0.0005, "loss": 0.8403, "step": 384400 }, { "epoch": 30.15, "learning_rate": 0.0005, "loss": 0.8481, "step": 384500 }, { "epoch": 30.16, "learning_rate": 0.0005, "loss": 0.8366, "step": 384600 }, { "epoch": 30.17, "learning_rate": 0.0005, "loss": 0.8312, "step": 384700 }, { "epoch": 30.18, "learning_rate": 0.0005, "loss": 0.8391, "step": 384800 }, { "epoch": 30.18, "learning_rate": 0.0005, "loss": 0.825, "step": 384900 }, { "epoch": 30.19, "learning_rate": 0.0005, "loss": 0.8344, "step": 385000 }, { "epoch": 30.2, "learning_rate": 0.0005, "loss": 0.8362, "step": 385100 }, { "epoch": 30.21, "learning_rate": 0.0005, "loss": 0.8422, "step": 385200 }, { "epoch": 30.21, "learning_rate": 0.0005, "loss": 0.8838, "step": 385300 }, { "epoch": 30.22, "learning_rate": 0.0005, "loss": 0.838, "step": 385400 }, { "epoch": 30.23, "learning_rate": 0.0005, "loss": 0.8577, "step": 385500 }, { "epoch": 30.24, "learning_rate": 0.0005, "loss": 0.8481, "step": 385600 }, { "epoch": 30.25, "learning_rate": 0.0005, "loss": 0.8423, "step": 385700 }, { "epoch": 30.25, "learning_rate": 0.0005, "loss": 0.8441, "step": 385800 }, { "epoch": 30.26, "learning_rate": 0.0005, "loss": 0.8661, "step": 385900 }, { "epoch": 30.27, "learning_rate": 0.0005, "loss": 0.8555, "step": 386000 }, { "epoch": 30.28, "learning_rate": 0.0005, "loss": 0.8512, "step": 386100 }, { "epoch": 30.29, "learning_rate": 0.0005, "loss": 0.853, "step": 386200 }, { "epoch": 30.29, "learning_rate": 0.0005, "loss": 0.8591, "step": 386300 }, { "epoch": 30.3, "learning_rate": 0.0005, "loss": 0.8608, "step": 386400 }, { "epoch": 30.31, "learning_rate": 0.0005, "loss": 0.8438, "step": 386500 }, { "epoch": 30.32, "learning_rate": 0.0005, "loss": 0.8514, "step": 386600 }, { "epoch": 30.32, "learning_rate": 0.0005, "loss": 0.8619, "step": 386700 }, { "epoch": 30.33, "learning_rate": 0.0005, "loss": 0.8399, "step": 386800 }, { "epoch": 30.34, "learning_rate": 0.0005, "loss": 0.8481, "step": 386900 }, { "epoch": 30.35, "learning_rate": 0.0005, "loss": 0.8487, "step": 387000 }, { "epoch": 30.36, "learning_rate": 0.0005, "loss": 0.8656, "step": 387100 }, { "epoch": 30.36, "learning_rate": 0.0005, "loss": 0.8567, "step": 387200 }, { "epoch": 30.37, "learning_rate": 0.0005, "loss": 0.8613, "step": 387300 }, { "epoch": 30.38, "learning_rate": 0.0005, "loss": 0.8593, "step": 387400 }, { "epoch": 30.39, "learning_rate": 0.0005, "loss": 0.8511, "step": 387500 }, { "epoch": 30.4, "learning_rate": 0.0005, "loss": 0.8573, "step": 387600 }, { "epoch": 30.4, "learning_rate": 0.0005, "loss": 0.8588, "step": 387700 }, { "epoch": 30.41, "learning_rate": 0.0005, "loss": 0.8709, "step": 387800 }, { "epoch": 30.42, "learning_rate": 0.0005, "loss": 0.874, "step": 387900 }, { "epoch": 30.43, "learning_rate": 0.0005, "loss": 0.8742, "step": 388000 }, { "epoch": 30.43, "learning_rate": 0.0005, "loss": 0.8474, "step": 388100 }, { "epoch": 30.44, "learning_rate": 0.0005, "loss": 0.8629, "step": 388200 }, { "epoch": 30.45, "learning_rate": 0.0005, "loss": 0.8897, "step": 388300 }, { "epoch": 30.46, "learning_rate": 0.0005, "loss": 0.8466, "step": 388400 }, { "epoch": 30.47, "learning_rate": 0.0005, "loss": 0.8659, "step": 388500 }, { "epoch": 30.47, "learning_rate": 0.0005, "loss": 0.871, "step": 388600 }, { "epoch": 30.48, "learning_rate": 0.0005, "loss": 0.871, "step": 388700 }, { "epoch": 30.49, "learning_rate": 0.0005, "loss": 0.8802, "step": 388800 }, { "epoch": 30.5, "learning_rate": 0.0005, "loss": 0.8554, "step": 388900 }, { "epoch": 30.51, "learning_rate": 0.0005, "loss": 0.8594, "step": 389000 }, { "epoch": 30.51, "learning_rate": 0.0005, "loss": 0.8702, "step": 389100 }, { "epoch": 30.52, "learning_rate": 0.0005, "loss": 0.8748, "step": 389200 }, { "epoch": 30.53, "learning_rate": 0.0005, "loss": 0.8588, "step": 389300 }, { "epoch": 30.54, "learning_rate": 0.0005, "loss": 0.8666, "step": 389400 }, { "epoch": 30.54, "learning_rate": 0.0005, "loss": 0.8618, "step": 389500 }, { "epoch": 30.55, "learning_rate": 0.0005, "loss": 0.8643, "step": 389600 }, { "epoch": 30.56, "learning_rate": 0.0005, "loss": 0.8677, "step": 389700 }, { "epoch": 30.57, "learning_rate": 0.0005, "loss": 0.8715, "step": 389800 }, { "epoch": 30.58, "learning_rate": 0.0005, "loss": 0.866, "step": 389900 }, { "epoch": 30.58, "learning_rate": 0.0005, "loss": 0.857, "step": 390000 }, { "epoch": 30.58, "eval_gen_len": 18.76269091551161, "eval_loss": 2.564743757247925, "eval_rouge1": 35.752, "eval_rouge2": 14.8207, "eval_rougeL": 29.5313, "eval_rougeLsum": 29.5352, "eval_runtime": 343.9829, "eval_samples_per_second": 32.929, "eval_steps_per_second": 2.058, "step": 390000 }, { "epoch": 30.59, "learning_rate": 0.0005, "loss": 0.8713, "step": 390100 }, { "epoch": 30.6, "learning_rate": 0.0005, "loss": 0.8539, "step": 390200 }, { "epoch": 30.61, "learning_rate": 0.0005, "loss": 0.8625, "step": 390300 }, { "epoch": 30.61, "learning_rate": 0.0005, "loss": 0.8884, "step": 390400 }, { "epoch": 30.62, "learning_rate": 0.0005, "loss": 0.8713, "step": 390500 }, { "epoch": 30.63, "learning_rate": 0.0005, "loss": 0.8708, "step": 390600 }, { "epoch": 30.64, "learning_rate": 0.0005, "loss": 0.8752, "step": 390700 }, { "epoch": 30.65, "learning_rate": 0.0005, "loss": 0.8657, "step": 390800 }, { "epoch": 30.65, "learning_rate": 0.0005, "loss": 0.8835, "step": 390900 }, { "epoch": 30.66, "learning_rate": 0.0005, "loss": 0.8784, "step": 391000 }, { "epoch": 30.67, "learning_rate": 0.0005, "loss": 0.8621, "step": 391100 }, { "epoch": 30.68, "learning_rate": 0.0005, "loss": 0.8807, "step": 391200 }, { "epoch": 30.69, "learning_rate": 0.0005, "loss": 0.8958, "step": 391300 }, { "epoch": 30.69, "learning_rate": 0.0005, "loss": 0.8668, "step": 391400 }, { "epoch": 30.7, "learning_rate": 0.0005, "loss": 0.8647, "step": 391500 }, { "epoch": 30.71, "learning_rate": 0.0005, "loss": 0.8656, "step": 391600 }, { "epoch": 30.72, "learning_rate": 0.0005, "loss": 0.8991, "step": 391700 }, { "epoch": 30.72, "learning_rate": 0.0005, "loss": 0.8732, "step": 391800 }, { "epoch": 30.73, "learning_rate": 0.0005, "loss": 0.8672, "step": 391900 }, { "epoch": 30.74, "learning_rate": 0.0005, "loss": 0.8937, "step": 392000 }, { "epoch": 30.75, "learning_rate": 0.0005, "loss": 0.8924, "step": 392100 }, { "epoch": 30.76, "learning_rate": 0.0005, "loss": 0.8955, "step": 392200 }, { "epoch": 30.76, "learning_rate": 0.0005, "loss": 0.8815, "step": 392300 }, { "epoch": 30.77, "learning_rate": 0.0005, "loss": 0.8734, "step": 392400 }, { "epoch": 30.78, "learning_rate": 0.0005, "loss": 0.8741, "step": 392500 }, { "epoch": 30.79, "learning_rate": 0.0005, "loss": 0.8778, "step": 392600 }, { "epoch": 30.8, "learning_rate": 0.0005, "loss": 0.8865, "step": 392700 }, { "epoch": 30.8, "learning_rate": 0.0005, "loss": 0.8772, "step": 392800 }, { "epoch": 30.81, "learning_rate": 0.0005, "loss": 0.8612, "step": 392900 }, { "epoch": 30.82, "learning_rate": 0.0005, "loss": 0.8835, "step": 393000 }, { "epoch": 30.83, "learning_rate": 0.0005, "loss": 0.8639, "step": 393100 }, { "epoch": 30.83, "learning_rate": 0.0005, "loss": 0.871, "step": 393200 }, { "epoch": 30.84, "learning_rate": 0.0005, "loss": 0.8982, "step": 393300 }, { "epoch": 30.85, "learning_rate": 0.0005, "loss": 0.8853, "step": 393400 }, { "epoch": 30.86, "learning_rate": 0.0005, "loss": 0.8718, "step": 393500 }, { "epoch": 30.87, "learning_rate": 0.0005, "loss": 0.9022, "step": 393600 }, { "epoch": 30.87, "learning_rate": 0.0005, "loss": 0.8702, "step": 393700 }, { "epoch": 30.88, "learning_rate": 0.0005, "loss": 0.9148, "step": 393800 }, { "epoch": 30.89, "learning_rate": 0.0005, "loss": 0.9045, "step": 393900 }, { "epoch": 30.9, "learning_rate": 0.0005, "loss": 0.8866, "step": 394000 }, { "epoch": 30.9, "learning_rate": 0.0005, "loss": 0.8754, "step": 394100 }, { "epoch": 30.91, "learning_rate": 0.0005, "loss": 0.8849, "step": 394200 }, { "epoch": 30.92, "learning_rate": 0.0005, "loss": 0.8729, "step": 394300 }, { "epoch": 30.93, "learning_rate": 0.0005, "loss": 0.8926, "step": 394400 }, { "epoch": 30.94, "learning_rate": 0.0005, "loss": 0.8843, "step": 394500 }, { "epoch": 30.94, "learning_rate": 0.0005, "loss": 0.8856, "step": 394600 }, { "epoch": 30.95, "learning_rate": 0.0005, "loss": 0.9011, "step": 394700 }, { "epoch": 30.96, "learning_rate": 0.0005, "loss": 0.8963, "step": 394800 }, { "epoch": 30.97, "learning_rate": 0.0005, "loss": 0.8861, "step": 394900 }, { "epoch": 30.98, "learning_rate": 0.0005, "loss": 0.875, "step": 395000 }, { "epoch": 30.98, "learning_rate": 0.0005, "loss": 0.8904, "step": 395100 }, { "epoch": 30.99, "learning_rate": 0.0005, "loss": 0.8788, "step": 395200 }, { "epoch": 31.0, "learning_rate": 0.0005, "loss": 0.901, "step": 395300 }, { "epoch": 31.01, "learning_rate": 0.0005, "loss": 0.8241, "step": 395400 }, { "epoch": 31.01, "learning_rate": 0.0005, "loss": 0.8258, "step": 395500 }, { "epoch": 31.02, "learning_rate": 0.0005, "loss": 0.8048, "step": 395600 }, { "epoch": 31.03, "learning_rate": 0.0005, "loss": 0.8037, "step": 395700 }, { "epoch": 31.04, "learning_rate": 0.0005, "loss": 0.8181, "step": 395800 }, { "epoch": 31.05, "learning_rate": 0.0005, "loss": 0.8227, "step": 395900 }, { "epoch": 31.05, "learning_rate": 0.0005, "loss": 0.826, "step": 396000 }, { "epoch": 31.06, "learning_rate": 0.0005, "loss": 0.8066, "step": 396100 }, { "epoch": 31.07, "learning_rate": 0.0005, "loss": 0.8147, "step": 396200 }, { "epoch": 31.08, "learning_rate": 0.0005, "loss": 0.8222, "step": 396300 }, { "epoch": 31.09, "learning_rate": 0.0005, "loss": 0.8213, "step": 396400 }, { "epoch": 31.09, "learning_rate": 0.0005, "loss": 0.8357, "step": 396500 }, { "epoch": 31.1, "learning_rate": 0.0005, "loss": 0.8089, "step": 396600 }, { "epoch": 31.11, "learning_rate": 0.0005, "loss": 0.8465, "step": 396700 }, { "epoch": 31.12, "learning_rate": 0.0005, "loss": 0.8177, "step": 396800 }, { "epoch": 31.12, "learning_rate": 0.0005, "loss": 0.8205, "step": 396900 }, { "epoch": 31.13, "learning_rate": 0.0005, "loss": 0.8268, "step": 397000 }, { "epoch": 31.14, "learning_rate": 0.0005, "loss": 0.8385, "step": 397100 }, { "epoch": 31.15, "learning_rate": 0.0005, "loss": 0.8185, "step": 397200 }, { "epoch": 31.16, "learning_rate": 0.0005, "loss": 0.8325, "step": 397300 }, { "epoch": 31.16, "learning_rate": 0.0005, "loss": 0.8368, "step": 397400 }, { "epoch": 31.17, "learning_rate": 0.0005, "loss": 0.824, "step": 397500 }, { "epoch": 31.18, "learning_rate": 0.0005, "loss": 0.8224, "step": 397600 }, { "epoch": 31.19, "learning_rate": 0.0005, "loss": 0.8478, "step": 397700 }, { "epoch": 31.2, "learning_rate": 0.0005, "loss": 0.8418, "step": 397800 }, { "epoch": 31.2, "learning_rate": 0.0005, "loss": 0.831, "step": 397900 }, { "epoch": 31.21, "learning_rate": 0.0005, "loss": 0.8311, "step": 398000 }, { "epoch": 31.22, "learning_rate": 0.0005, "loss": 0.8241, "step": 398100 }, { "epoch": 31.23, "learning_rate": 0.0005, "loss": 0.8347, "step": 398200 }, { "epoch": 31.23, "learning_rate": 0.0005, "loss": 0.8421, "step": 398300 }, { "epoch": 31.24, "learning_rate": 0.0005, "loss": 0.8371, "step": 398400 }, { "epoch": 31.25, "learning_rate": 0.0005, "loss": 0.8403, "step": 398500 }, { "epoch": 31.26, "learning_rate": 0.0005, "loss": 0.8554, "step": 398600 }, { "epoch": 31.27, "learning_rate": 0.0005, "loss": 0.8366, "step": 398700 }, { "epoch": 31.27, "learning_rate": 0.0005, "loss": 0.835, "step": 398800 }, { "epoch": 31.28, "learning_rate": 0.0005, "loss": 0.8565, "step": 398900 }, { "epoch": 31.29, "learning_rate": 0.0005, "loss": 0.8365, "step": 399000 }, { "epoch": 31.3, "learning_rate": 0.0005, "loss": 0.8549, "step": 399100 }, { "epoch": 31.3, "learning_rate": 0.0005, "loss": 0.8254, "step": 399200 }, { "epoch": 31.31, "learning_rate": 0.0005, "loss": 0.8566, "step": 399300 }, { "epoch": 31.32, "learning_rate": 0.0005, "loss": 0.8628, "step": 399400 }, { "epoch": 31.33, "learning_rate": 0.0005, "loss": 0.8504, "step": 399500 }, { "epoch": 31.34, "learning_rate": 0.0005, "loss": 0.8688, "step": 399600 }, { "epoch": 31.34, "learning_rate": 0.0005, "loss": 0.8524, "step": 399700 }, { "epoch": 31.35, "learning_rate": 0.0005, "loss": 0.8486, "step": 399800 }, { "epoch": 31.36, "learning_rate": 0.0005, "loss": 0.8542, "step": 399900 }, { "epoch": 31.37, "learning_rate": 0.0005, "loss": 0.8505, "step": 400000 }, { "epoch": 31.37, "eval_gen_len": 18.719872870133308, "eval_loss": 2.595099925994873, "eval_rouge1": 35.7844, "eval_rouge2": 14.8342, "eval_rougeL": 29.5181, "eval_rougeLsum": 29.5218, "eval_runtime": 342.3534, "eval_samples_per_second": 33.086, "eval_steps_per_second": 2.068, "step": 400000 }, { "epoch": 31.38, "learning_rate": 0.0005, "loss": 0.8618, "step": 400100 }, { "epoch": 31.38, "learning_rate": 0.0005, "loss": 0.8535, "step": 400200 }, { "epoch": 31.39, "learning_rate": 0.0005, "loss": 0.8547, "step": 400300 }, { "epoch": 31.4, "learning_rate": 0.0005, "loss": 0.8452, "step": 400400 }, { "epoch": 31.41, "learning_rate": 0.0005, "loss": 0.8375, "step": 400500 }, { "epoch": 31.41, "learning_rate": 0.0005, "loss": 0.8443, "step": 400600 }, { "epoch": 31.42, "learning_rate": 0.0005, "loss": 0.8542, "step": 400700 }, { "epoch": 31.43, "learning_rate": 0.0005, "loss": 0.8519, "step": 400800 }, { "epoch": 31.44, "learning_rate": 0.0005, "loss": 0.8681, "step": 400900 }, { "epoch": 31.45, "learning_rate": 0.0005, "loss": 0.8433, "step": 401000 }, { "epoch": 31.45, "learning_rate": 0.0005, "loss": 0.8371, "step": 401100 }, { "epoch": 31.46, "learning_rate": 0.0005, "loss": 0.8632, "step": 401200 }, { "epoch": 31.47, "learning_rate": 0.0005, "loss": 0.8532, "step": 401300 }, { "epoch": 31.48, "learning_rate": 0.0005, "loss": 0.8427, "step": 401400 }, { "epoch": 31.49, "learning_rate": 0.0005, "loss": 0.8553, "step": 401500 }, { "epoch": 31.49, "learning_rate": 0.0005, "loss": 0.8598, "step": 401600 }, { "epoch": 31.5, "learning_rate": 0.0005, "loss": 0.8512, "step": 401700 }, { "epoch": 31.51, "learning_rate": 0.0005, "loss": 0.8387, "step": 401800 }, { "epoch": 31.52, "learning_rate": 0.0005, "loss": 0.855, "step": 401900 }, { "epoch": 31.52, "learning_rate": 0.0005, "loss": 0.8589, "step": 402000 }, { "epoch": 31.53, "learning_rate": 0.0005, "loss": 0.8537, "step": 402100 }, { "epoch": 31.54, "learning_rate": 0.0005, "loss": 0.8422, "step": 402200 }, { "epoch": 31.55, "learning_rate": 0.0005, "loss": 0.8759, "step": 402300 }, { "epoch": 31.56, "learning_rate": 0.0005, "loss": 0.8618, "step": 402400 }, { "epoch": 31.56, "learning_rate": 0.0005, "loss": 0.859, "step": 402500 }, { "epoch": 31.57, "learning_rate": 0.0005, "loss": 0.8693, "step": 402600 }, { "epoch": 31.58, "learning_rate": 0.0005, "loss": 0.863, "step": 402700 }, { "epoch": 31.59, "learning_rate": 0.0005, "loss": 0.8679, "step": 402800 }, { "epoch": 31.6, "learning_rate": 0.0005, "loss": 0.866, "step": 402900 }, { "epoch": 31.6, "learning_rate": 0.0005, "loss": 0.8508, "step": 403000 }, { "epoch": 31.61, "learning_rate": 0.0005, "loss": 0.851, "step": 403100 }, { "epoch": 31.62, "learning_rate": 0.0005, "loss": 0.8433, "step": 403200 }, { "epoch": 31.63, "learning_rate": 0.0005, "loss": 0.8641, "step": 403300 }, { "epoch": 31.63, "learning_rate": 0.0005, "loss": 0.8781, "step": 403400 }, { "epoch": 31.64, "learning_rate": 0.0005, "loss": 0.8403, "step": 403500 }, { "epoch": 31.65, "learning_rate": 0.0005, "loss": 0.8734, "step": 403600 }, { "epoch": 31.66, "learning_rate": 0.0005, "loss": 0.8626, "step": 403700 }, { "epoch": 31.67, "learning_rate": 0.0005, "loss": 0.8661, "step": 403800 }, { "epoch": 31.67, "learning_rate": 0.0005, "loss": 0.8631, "step": 403900 }, { "epoch": 31.68, "learning_rate": 0.0005, "loss": 0.8629, "step": 404000 }, { "epoch": 31.69, "learning_rate": 0.0005, "loss": 0.8507, "step": 404100 }, { "epoch": 31.7, "learning_rate": 0.0005, "loss": 0.8746, "step": 404200 }, { "epoch": 31.7, "learning_rate": 0.0005, "loss": 0.8681, "step": 404300 }, { "epoch": 31.71, "learning_rate": 0.0005, "loss": 0.8592, "step": 404400 }, { "epoch": 31.72, "learning_rate": 0.0005, "loss": 0.8675, "step": 404500 }, { "epoch": 31.73, "learning_rate": 0.0005, "loss": 0.8671, "step": 404600 }, { "epoch": 31.74, "learning_rate": 0.0005, "loss": 0.8652, "step": 404700 }, { "epoch": 31.74, "learning_rate": 0.0005, "loss": 0.8773, "step": 404800 }, { "epoch": 31.75, "learning_rate": 0.0005, "loss": 0.8654, "step": 404900 }, { "epoch": 31.76, "learning_rate": 0.0005, "loss": 0.8605, "step": 405000 }, { "epoch": 31.77, "learning_rate": 0.0005, "loss": 0.8804, "step": 405100 }, { "epoch": 31.78, "learning_rate": 0.0005, "loss": 0.8659, "step": 405200 }, { "epoch": 31.78, "learning_rate": 0.0005, "loss": 0.8667, "step": 405300 }, { "epoch": 31.79, "learning_rate": 0.0005, "loss": 0.8781, "step": 405400 }, { "epoch": 31.8, "learning_rate": 0.0005, "loss": 0.8708, "step": 405500 }, { "epoch": 31.81, "learning_rate": 0.0005, "loss": 0.8592, "step": 405600 }, { "epoch": 31.81, "learning_rate": 0.0005, "loss": 0.8699, "step": 405700 }, { "epoch": 31.82, "learning_rate": 0.0005, "loss": 0.8521, "step": 405800 }, { "epoch": 31.83, "learning_rate": 0.0005, "loss": 0.8488, "step": 405900 }, { "epoch": 31.84, "learning_rate": 0.0005, "loss": 0.8702, "step": 406000 }, { "epoch": 31.85, "learning_rate": 0.0005, "loss": 0.8754, "step": 406100 }, { "epoch": 31.85, "learning_rate": 0.0005, "loss": 0.8887, "step": 406200 }, { "epoch": 31.86, "learning_rate": 0.0005, "loss": 0.8962, "step": 406300 }, { "epoch": 31.87, "learning_rate": 0.0005, "loss": 0.8809, "step": 406400 }, { "epoch": 31.88, "learning_rate": 0.0005, "loss": 0.8716, "step": 406500 }, { "epoch": 31.89, "learning_rate": 0.0005, "loss": 0.8661, "step": 406600 }, { "epoch": 31.89, "learning_rate": 0.0005, "loss": 0.8727, "step": 406700 }, { "epoch": 31.9, "learning_rate": 0.0005, "loss": 0.8766, "step": 406800 }, { "epoch": 31.91, "learning_rate": 0.0005, "loss": 0.8715, "step": 406900 }, { "epoch": 31.92, "learning_rate": 0.0005, "loss": 0.8755, "step": 407000 }, { "epoch": 31.92, "learning_rate": 0.0005, "loss": 0.8814, "step": 407100 }, { "epoch": 31.93, "learning_rate": 0.0005, "loss": 0.8662, "step": 407200 }, { "epoch": 31.94, "learning_rate": 0.0005, "loss": 0.8679, "step": 407300 }, { "epoch": 31.95, "learning_rate": 0.0005, "loss": 0.884, "step": 407400 }, { "epoch": 31.96, "learning_rate": 0.0005, "loss": 0.8806, "step": 407500 }, { "epoch": 31.96, "learning_rate": 0.0005, "loss": 0.8633, "step": 407600 }, { "epoch": 31.97, "learning_rate": 0.0005, "loss": 0.876, "step": 407700 }, { "epoch": 31.98, "learning_rate": 0.0005, "loss": 0.8903, "step": 407800 }, { "epoch": 31.99, "learning_rate": 0.0005, "loss": 0.8641, "step": 407900 }, { "epoch": 31.99, "learning_rate": 0.0005, "loss": 0.8814, "step": 408000 }, { "epoch": 32.0, "learning_rate": 0.0005, "loss": 0.8659, "step": 408100 }, { "epoch": 32.01, "learning_rate": 0.0005, "loss": 0.8103, "step": 408200 }, { "epoch": 32.02, "learning_rate": 0.0005, "loss": 0.8034, "step": 408300 }, { "epoch": 32.03, "learning_rate": 0.0005, "loss": 0.8117, "step": 408400 }, { "epoch": 32.03, "learning_rate": 0.0005, "loss": 0.8234, "step": 408500 }, { "epoch": 32.04, "learning_rate": 0.0005, "loss": 0.8076, "step": 408600 }, { "epoch": 32.05, "learning_rate": 0.0005, "loss": 0.8231, "step": 408700 }, { "epoch": 32.06, "learning_rate": 0.0005, "loss": 0.8245, "step": 408800 }, { "epoch": 32.07, "learning_rate": 0.0005, "loss": 0.8092, "step": 408900 }, { "epoch": 32.07, "learning_rate": 0.0005, "loss": 0.8033, "step": 409000 }, { "epoch": 32.08, "learning_rate": 0.0005, "loss": 0.802, "step": 409100 }, { "epoch": 32.09, "learning_rate": 0.0005, "loss": 0.8198, "step": 409200 }, { "epoch": 32.1, "learning_rate": 0.0005, "loss": 0.8133, "step": 409300 }, { "epoch": 32.1, "learning_rate": 0.0005, "loss": 0.8177, "step": 409400 }, { "epoch": 32.11, "learning_rate": 0.0005, "loss": 0.8127, "step": 409500 }, { "epoch": 32.12, "learning_rate": 0.0005, "loss": 0.8241, "step": 409600 }, { "epoch": 32.13, "learning_rate": 0.0005, "loss": 0.8276, "step": 409700 }, { "epoch": 32.14, "learning_rate": 0.0005, "loss": 0.815, "step": 409800 }, { "epoch": 32.14, "learning_rate": 0.0005, "loss": 0.8279, "step": 409900 }, { "epoch": 32.15, "learning_rate": 0.0005, "loss": 0.8233, "step": 410000 }, { "epoch": 32.15, "eval_gen_len": 18.735410964951, "eval_loss": 2.6317381858825684, "eval_rouge1": 35.8198, "eval_rouge2": 14.9264, "eval_rougeL": 29.5674, "eval_rougeLsum": 29.564, "eval_runtime": 343.4071, "eval_samples_per_second": 32.984, "eval_steps_per_second": 2.062, "step": 410000 }, { "epoch": 32.16, "learning_rate": 0.0005, "loss": 0.8139, "step": 410100 }, { "epoch": 32.17, "learning_rate": 0.0005, "loss": 0.823, "step": 410200 }, { "epoch": 32.18, "learning_rate": 0.0005, "loss": 0.819, "step": 410300 }, { "epoch": 32.18, "learning_rate": 0.0005, "loss": 0.8439, "step": 410400 }, { "epoch": 32.19, "learning_rate": 0.0005, "loss": 0.8349, "step": 410500 }, { "epoch": 32.2, "learning_rate": 0.0005, "loss": 0.8028, "step": 410600 }, { "epoch": 32.21, "learning_rate": 0.0005, "loss": 0.8309, "step": 410700 }, { "epoch": 32.21, "learning_rate": 0.0005, "loss": 0.8243, "step": 410800 }, { "epoch": 32.22, "learning_rate": 0.0005, "loss": 0.8322, "step": 410900 }, { "epoch": 32.23, "learning_rate": 0.0005, "loss": 0.816, "step": 411000 }, { "epoch": 32.24, "learning_rate": 0.0005, "loss": 0.8437, "step": 411100 }, { "epoch": 32.25, "learning_rate": 0.0005, "loss": 0.8385, "step": 411200 }, { "epoch": 32.25, "learning_rate": 0.0005, "loss": 0.8055, "step": 411300 }, { "epoch": 32.26, "learning_rate": 0.0005, "loss": 0.8231, "step": 411400 }, { "epoch": 32.27, "learning_rate": 0.0005, "loss": 0.8337, "step": 411500 }, { "epoch": 32.28, "learning_rate": 0.0005, "loss": 0.8319, "step": 411600 }, { "epoch": 32.29, "learning_rate": 0.0005, "loss": 0.8245, "step": 411700 }, { "epoch": 32.29, "learning_rate": 0.0005, "loss": 0.8285, "step": 411800 }, { "epoch": 32.3, "learning_rate": 0.0005, "loss": 0.8207, "step": 411900 }, { "epoch": 32.31, "learning_rate": 0.0005, "loss": 0.8266, "step": 412000 }, { "epoch": 32.32, "learning_rate": 0.0005, "loss": 0.832, "step": 412100 }, { "epoch": 32.32, "learning_rate": 0.0005, "loss": 0.84, "step": 412200 }, { "epoch": 32.33, "learning_rate": 0.0005, "loss": 0.8291, "step": 412300 }, { "epoch": 32.34, "learning_rate": 0.0005, "loss": 0.8189, "step": 412400 }, { "epoch": 32.35, "learning_rate": 0.0005, "loss": 0.8504, "step": 412500 }, { "epoch": 32.36, "learning_rate": 0.0005, "loss": 0.843, "step": 412600 }, { "epoch": 32.36, "learning_rate": 0.0005, "loss": 0.8379, "step": 412700 }, { "epoch": 32.37, "learning_rate": 0.0005, "loss": 0.8225, "step": 412800 }, { "epoch": 32.38, "learning_rate": 0.0005, "loss": 0.8354, "step": 412900 }, { "epoch": 32.39, "learning_rate": 0.0005, "loss": 0.8359, "step": 413000 }, { "epoch": 32.39, "learning_rate": 0.0005, "loss": 0.8427, "step": 413100 }, { "epoch": 32.4, "learning_rate": 0.0005, "loss": 0.8146, "step": 413200 }, { "epoch": 32.41, "learning_rate": 0.0005, "loss": 0.8387, "step": 413300 }, { "epoch": 32.42, "learning_rate": 0.0005, "loss": 0.8524, "step": 413400 }, { "epoch": 32.43, "learning_rate": 0.0005, "loss": 0.8341, "step": 413500 }, { "epoch": 32.43, "learning_rate": 0.0005, "loss": 0.8378, "step": 413600 }, { "epoch": 32.44, "learning_rate": 0.0005, "loss": 0.8534, "step": 413700 }, { "epoch": 32.45, "learning_rate": 0.0005, "loss": 0.8507, "step": 413800 }, { "epoch": 32.46, "learning_rate": 0.0005, "loss": 0.8479, "step": 413900 }, { "epoch": 32.47, "learning_rate": 0.0005, "loss": 0.8138, "step": 414000 }, { "epoch": 32.47, "learning_rate": 0.0005, "loss": 0.8471, "step": 414100 }, { "epoch": 32.48, "learning_rate": 0.0005, "loss": 0.8454, "step": 414200 }, { "epoch": 32.49, "learning_rate": 0.0005, "loss": 0.8327, "step": 414300 }, { "epoch": 32.5, "learning_rate": 0.0005, "loss": 0.8529, "step": 414400 }, { "epoch": 32.5, "learning_rate": 0.0005, "loss": 0.8436, "step": 414500 }, { "epoch": 32.51, "learning_rate": 0.0005, "loss": 0.8353, "step": 414600 }, { "epoch": 32.52, "learning_rate": 0.0005, "loss": 0.8485, "step": 414700 }, { "epoch": 32.53, "learning_rate": 0.0005, "loss": 0.8327, "step": 414800 }, { "epoch": 32.54, "learning_rate": 0.0005, "loss": 0.832, "step": 414900 }, { "epoch": 32.54, "learning_rate": 0.0005, "loss": 0.8455, "step": 415000 }, { "epoch": 32.55, "learning_rate": 0.0005, "loss": 0.8402, "step": 415100 }, { "epoch": 32.56, "learning_rate": 0.0005, "loss": 0.8399, "step": 415200 }, { "epoch": 32.57, "learning_rate": 0.0005, "loss": 0.8634, "step": 415300 }, { "epoch": 32.58, "learning_rate": 0.0005, "loss": 0.8414, "step": 415400 }, { "epoch": 32.58, "learning_rate": 0.0005, "loss": 0.848, "step": 415500 }, { "epoch": 32.59, "learning_rate": 0.0005, "loss": 0.8433, "step": 415600 }, { "epoch": 32.6, "learning_rate": 0.0005, "loss": 0.85, "step": 415700 }, { "epoch": 32.61, "learning_rate": 0.0005, "loss": 0.8562, "step": 415800 }, { "epoch": 32.61, "learning_rate": 0.0005, "loss": 0.8374, "step": 415900 }, { "epoch": 32.62, "learning_rate": 0.0005, "loss": 0.8537, "step": 416000 }, { "epoch": 32.63, "learning_rate": 0.0005, "loss": 0.8536, "step": 416100 }, { "epoch": 32.64, "learning_rate": 0.0005, "loss": 0.8637, "step": 416200 }, { "epoch": 32.65, "learning_rate": 0.0005, "loss": 0.8491, "step": 416300 }, { "epoch": 32.65, "learning_rate": 0.0005, "loss": 0.851, "step": 416400 }, { "epoch": 32.66, "learning_rate": 0.0005, "loss": 0.8657, "step": 416500 }, { "epoch": 32.67, "learning_rate": 0.0005, "loss": 0.8447, "step": 416600 }, { "epoch": 32.68, "learning_rate": 0.0005, "loss": 0.8502, "step": 416700 }, { "epoch": 32.69, "learning_rate": 0.0005, "loss": 0.8609, "step": 416800 }, { "epoch": 32.69, "learning_rate": 0.0005, "loss": 0.8596, "step": 416900 }, { "epoch": 32.7, "learning_rate": 0.0005, "loss": 0.8572, "step": 417000 }, { "epoch": 32.71, "learning_rate": 0.0005, "loss": 0.8593, "step": 417100 }, { "epoch": 32.72, "learning_rate": 0.0005, "loss": 0.8561, "step": 417200 }, { "epoch": 32.72, "learning_rate": 0.0005, "loss": 0.8424, "step": 417300 }, { "epoch": 32.73, "learning_rate": 0.0005, "loss": 0.8594, "step": 417400 }, { "epoch": 32.74, "learning_rate": 0.0005, "loss": 0.8809, "step": 417500 }, { "epoch": 32.75, "learning_rate": 0.0005, "loss": 0.8634, "step": 417600 }, { "epoch": 32.76, "learning_rate": 0.0005, "loss": 0.8524, "step": 417700 }, { "epoch": 32.76, "learning_rate": 0.0005, "loss": 0.8551, "step": 417800 }, { "epoch": 32.77, "learning_rate": 0.0005, "loss": 0.8659, "step": 417900 }, { "epoch": 32.78, "learning_rate": 0.0005, "loss": 0.8441, "step": 418000 }, { "epoch": 32.79, "learning_rate": 0.0005, "loss": 0.8558, "step": 418100 }, { "epoch": 32.79, "learning_rate": 0.0005, "loss": 0.8747, "step": 418200 }, { "epoch": 32.8, "learning_rate": 0.0005, "loss": 0.8671, "step": 418300 }, { "epoch": 32.81, "learning_rate": 0.0005, "loss": 0.8614, "step": 418400 }, { "epoch": 32.82, "learning_rate": 0.0005, "loss": 0.8528, "step": 418500 }, { "epoch": 32.83, "learning_rate": 0.0005, "loss": 0.8658, "step": 418600 }, { "epoch": 32.83, "learning_rate": 0.0005, "loss": 0.8742, "step": 418700 }, { "epoch": 32.84, "learning_rate": 0.0005, "loss": 0.868, "step": 418800 }, { "epoch": 32.85, "learning_rate": 0.0005, "loss": 0.8659, "step": 418900 }, { "epoch": 32.86, "learning_rate": 0.0005, "loss": 0.8639, "step": 419000 }, { "epoch": 32.87, "learning_rate": 0.0005, "loss": 0.8603, "step": 419100 }, { "epoch": 32.87, "learning_rate": 0.0005, "loss": 0.8698, "step": 419200 }, { "epoch": 32.88, "learning_rate": 0.0005, "loss": 0.8699, "step": 419300 }, { "epoch": 32.89, "learning_rate": 0.0005, "loss": 0.8617, "step": 419400 }, { "epoch": 32.9, "learning_rate": 0.0005, "loss": 0.8572, "step": 419500 }, { "epoch": 32.9, "learning_rate": 0.0005, "loss": 0.8419, "step": 419600 }, { "epoch": 32.91, "learning_rate": 0.0005, "loss": 0.8746, "step": 419700 }, { "epoch": 32.92, "learning_rate": 0.0005, "loss": 0.8786, "step": 419800 }, { "epoch": 32.93, "learning_rate": 0.0005, "loss": 0.8527, "step": 419900 }, { "epoch": 32.94, "learning_rate": 0.0005, "loss": 0.8825, "step": 420000 }, { "epoch": 32.94, "eval_gen_len": 18.7486536593979, "eval_loss": 2.5763282775878906, "eval_rouge1": 35.6833, "eval_rouge2": 14.7874, "eval_rougeL": 29.531, "eval_rougeLsum": 29.5185, "eval_runtime": 344.2647, "eval_samples_per_second": 32.902, "eval_steps_per_second": 2.057, "step": 420000 }, { "epoch": 32.94, "learning_rate": 0.0005, "loss": 0.8753, "step": 420100 }, { "epoch": 32.95, "learning_rate": 0.0005, "loss": 0.8629, "step": 420200 }, { "epoch": 32.96, "learning_rate": 0.0005, "loss": 0.8628, "step": 420300 }, { "epoch": 32.97, "learning_rate": 0.0005, "loss": 0.8622, "step": 420400 }, { "epoch": 32.98, "learning_rate": 0.0005, "loss": 0.8761, "step": 420500 }, { "epoch": 32.98, "learning_rate": 0.0005, "loss": 0.8817, "step": 420600 }, { "epoch": 32.99, "learning_rate": 0.0005, "loss": 0.8628, "step": 420700 }, { "epoch": 33.0, "learning_rate": 0.0005, "loss": 0.8663, "step": 420800 }, { "epoch": 33.01, "learning_rate": 0.0005, "loss": 0.8209, "step": 420900 }, { "epoch": 33.01, "learning_rate": 0.0005, "loss": 0.7882, "step": 421000 }, { "epoch": 33.02, "learning_rate": 0.0005, "loss": 0.7939, "step": 421100 }, { "epoch": 33.03, "learning_rate": 0.0005, "loss": 0.7939, "step": 421200 }, { "epoch": 33.04, "learning_rate": 0.0005, "loss": 0.7872, "step": 421300 }, { "epoch": 33.05, "learning_rate": 0.0005, "loss": 0.7913, "step": 421400 }, { "epoch": 33.05, "learning_rate": 0.0005, "loss": 0.8159, "step": 421500 }, { "epoch": 33.06, "learning_rate": 0.0005, "loss": 0.7871, "step": 421600 }, { "epoch": 33.07, "learning_rate": 0.0005, "loss": 0.7965, "step": 421700 }, { "epoch": 33.08, "learning_rate": 0.0005, "loss": 0.7984, "step": 421800 }, { "epoch": 33.09, "learning_rate": 0.0005, "loss": 0.8078, "step": 421900 }, { "epoch": 33.09, "learning_rate": 0.0005, "loss": 0.8111, "step": 422000 }, { "epoch": 33.1, "learning_rate": 0.0005, "loss": 0.7984, "step": 422100 }, { "epoch": 33.11, "learning_rate": 0.0005, "loss": 0.8112, "step": 422200 }, { "epoch": 33.12, "learning_rate": 0.0005, "loss": 0.8259, "step": 422300 }, { "epoch": 33.12, "learning_rate": 0.0005, "loss": 0.8179, "step": 422400 }, { "epoch": 33.13, "learning_rate": 0.0005, "loss": 0.8379, "step": 422500 }, { "epoch": 33.14, "learning_rate": 0.0005, "loss": 0.8086, "step": 422600 }, { "epoch": 33.15, "learning_rate": 0.0005, "loss": 0.8182, "step": 422700 }, { "epoch": 33.16, "learning_rate": 0.0005, "loss": 0.8001, "step": 422800 }, { "epoch": 33.16, "learning_rate": 0.0005, "loss": 0.8129, "step": 422900 }, { "epoch": 33.17, "learning_rate": 0.0005, "loss": 0.8063, "step": 423000 }, { "epoch": 33.18, "learning_rate": 0.0005, "loss": 0.8167, "step": 423100 }, { "epoch": 33.19, "learning_rate": 0.0005, "loss": 0.8182, "step": 423200 }, { "epoch": 33.19, "learning_rate": 0.0005, "loss": 0.8283, "step": 423300 }, { "epoch": 33.2, "learning_rate": 0.0005, "loss": 0.807, "step": 423400 }, { "epoch": 33.21, "learning_rate": 0.0005, "loss": 0.8323, "step": 423500 }, { "epoch": 33.22, "learning_rate": 0.0005, "loss": 0.8145, "step": 423600 }, { "epoch": 33.23, "learning_rate": 0.0005, "loss": 0.7967, "step": 423700 }, { "epoch": 33.23, "learning_rate": 0.0005, "loss": 0.8177, "step": 423800 }, { "epoch": 33.24, "learning_rate": 0.0005, "loss": 0.8056, "step": 423900 }, { "epoch": 33.25, "learning_rate": 0.0005, "loss": 0.8275, "step": 424000 }, { "epoch": 33.26, "learning_rate": 0.0005, "loss": 0.8216, "step": 424100 }, { "epoch": 33.27, "learning_rate": 0.0005, "loss": 0.8307, "step": 424200 }, { "epoch": 33.27, "learning_rate": 0.0005, "loss": 0.8285, "step": 424300 }, { "epoch": 33.28, "learning_rate": 0.0005, "loss": 0.8256, "step": 424400 }, { "epoch": 33.29, "learning_rate": 0.0005, "loss": 0.8234, "step": 424500 }, { "epoch": 33.3, "learning_rate": 0.0005, "loss": 0.8097, "step": 424600 }, { "epoch": 33.3, "learning_rate": 0.0005, "loss": 0.8142, "step": 424700 }, { "epoch": 33.31, "learning_rate": 0.0005, "loss": 0.8241, "step": 424800 }, { "epoch": 33.32, "learning_rate": 0.0005, "loss": 0.8156, "step": 424900 }, { "epoch": 33.33, "learning_rate": 0.0005, "loss": 0.8056, "step": 425000 }, { "epoch": 33.34, "learning_rate": 0.0005, "loss": 0.8278, "step": 425100 }, { "epoch": 33.34, "learning_rate": 0.0005, "loss": 0.817, "step": 425200 }, { "epoch": 33.35, "learning_rate": 0.0005, "loss": 0.8169, "step": 425300 }, { "epoch": 33.36, "learning_rate": 0.0005, "loss": 0.8139, "step": 425400 }, { "epoch": 33.37, "learning_rate": 0.0005, "loss": 0.8209, "step": 425500 }, { "epoch": 33.38, "learning_rate": 0.0005, "loss": 0.8239, "step": 425600 }, { "epoch": 33.38, "learning_rate": 0.0005, "loss": 0.8167, "step": 425700 }, { "epoch": 33.39, "learning_rate": 0.0005, "loss": 0.8227, "step": 425800 }, { "epoch": 33.4, "learning_rate": 0.0005, "loss": 0.834, "step": 425900 }, { "epoch": 33.41, "learning_rate": 0.0005, "loss": 0.8476, "step": 426000 }, { "epoch": 33.41, "learning_rate": 0.0005, "loss": 0.8246, "step": 426100 }, { "epoch": 33.42, "learning_rate": 0.0005, "loss": 0.8288, "step": 426200 }, { "epoch": 33.43, "learning_rate": 0.0005, "loss": 0.823, "step": 426300 }, { "epoch": 33.44, "learning_rate": 0.0005, "loss": 0.8368, "step": 426400 }, { "epoch": 33.45, "learning_rate": 0.0005, "loss": 0.8374, "step": 426500 }, { "epoch": 33.45, "learning_rate": 0.0005, "loss": 0.8432, "step": 426600 }, { "epoch": 33.46, "learning_rate": 0.0005, "loss": 0.8436, "step": 426700 }, { "epoch": 33.47, "learning_rate": 0.0005, "loss": 0.8309, "step": 426800 }, { "epoch": 33.48, "learning_rate": 0.0005, "loss": 0.8322, "step": 426900 }, { "epoch": 33.48, "learning_rate": 0.0005, "loss": 0.8413, "step": 427000 }, { "epoch": 33.49, "learning_rate": 0.0005, "loss": 0.8403, "step": 427100 }, { "epoch": 33.5, "learning_rate": 0.0005, "loss": 0.8394, "step": 427200 }, { "epoch": 33.51, "learning_rate": 0.0005, "loss": 0.843, "step": 427300 }, { "epoch": 33.52, "learning_rate": 0.0005, "loss": 0.8253, "step": 427400 }, { "epoch": 33.52, "learning_rate": 0.0005, "loss": 0.8426, "step": 427500 }, { "epoch": 33.53, "learning_rate": 0.0005, "loss": 0.8413, "step": 427600 }, { "epoch": 33.54, "learning_rate": 0.0005, "loss": 0.8456, "step": 427700 }, { "epoch": 33.55, "learning_rate": 0.0005, "loss": 0.8459, "step": 427800 }, { "epoch": 33.56, "learning_rate": 0.0005, "loss": 0.8189, "step": 427900 }, { "epoch": 33.56, "learning_rate": 0.0005, "loss": 0.8424, "step": 428000 }, { "epoch": 33.57, "learning_rate": 0.0005, "loss": 0.8268, "step": 428100 }, { "epoch": 33.58, "learning_rate": 0.0005, "loss": 0.8405, "step": 428200 }, { "epoch": 33.59, "learning_rate": 0.0005, "loss": 0.8277, "step": 428300 }, { "epoch": 33.59, "learning_rate": 0.0005, "loss": 0.8327, "step": 428400 }, { "epoch": 33.6, "learning_rate": 0.0005, "loss": 0.8342, "step": 428500 }, { "epoch": 33.61, "learning_rate": 0.0005, "loss": 0.8367, "step": 428600 }, { "epoch": 33.62, "learning_rate": 0.0005, "loss": 0.829, "step": 428700 }, { "epoch": 33.63, "learning_rate": 0.0005, "loss": 0.8265, "step": 428800 }, { "epoch": 33.63, "learning_rate": 0.0005, "loss": 0.8416, "step": 428900 }, { "epoch": 33.64, "learning_rate": 0.0005, "loss": 0.8204, "step": 429000 }, { "epoch": 33.65, "learning_rate": 0.0005, "loss": 0.834, "step": 429100 }, { "epoch": 33.66, "learning_rate": 0.0005, "loss": 0.8527, "step": 429200 }, { "epoch": 33.67, "learning_rate": 0.0005, "loss": 0.8515, "step": 429300 }, { "epoch": 33.67, "learning_rate": 0.0005, "loss": 0.8335, "step": 429400 }, { "epoch": 33.68, "learning_rate": 0.0005, "loss": 0.8428, "step": 429500 }, { "epoch": 33.69, "learning_rate": 0.0005, "loss": 0.8431, "step": 429600 }, { "epoch": 33.7, "learning_rate": 0.0005, "loss": 0.8593, "step": 429700 }, { "epoch": 33.7, "learning_rate": 0.0005, "loss": 0.8496, "step": 429800 }, { "epoch": 33.71, "learning_rate": 0.0005, "loss": 0.8791, "step": 429900 }, { "epoch": 33.72, "learning_rate": 0.0005, "loss": 0.8553, "step": 430000 }, { "epoch": 33.72, "eval_gen_len": 18.7672817162532, "eval_loss": 2.5715715885162354, "eval_rouge1": 35.804, "eval_rouge2": 14.8203, "eval_rougeL": 29.528, "eval_rougeLsum": 29.5216, "eval_runtime": 342.741, "eval_samples_per_second": 33.048, "eval_steps_per_second": 2.066, "step": 430000 }, { "epoch": 33.73, "learning_rate": 0.0005, "loss": 0.852, "step": 430100 }, { "epoch": 33.74, "learning_rate": 0.0005, "loss": 0.8455, "step": 430200 }, { "epoch": 33.74, "learning_rate": 0.0005, "loss": 0.86, "step": 430300 }, { "epoch": 33.75, "learning_rate": 0.0005, "loss": 0.8403, "step": 430400 }, { "epoch": 33.76, "learning_rate": 0.0005, "loss": 0.8532, "step": 430500 }, { "epoch": 33.77, "learning_rate": 0.0005, "loss": 0.8397, "step": 430600 }, { "epoch": 33.78, "learning_rate": 0.0005, "loss": 0.8421, "step": 430700 }, { "epoch": 33.78, "learning_rate": 0.0005, "loss": 0.8572, "step": 430800 }, { "epoch": 33.79, "learning_rate": 0.0005, "loss": 0.8428, "step": 430900 }, { "epoch": 33.8, "learning_rate": 0.0005, "loss": 0.8125, "step": 431000 }, { "epoch": 33.81, "learning_rate": 0.0005, "loss": 0.8541, "step": 431100 }, { "epoch": 33.81, "learning_rate": 0.0005, "loss": 0.8338, "step": 431200 }, { "epoch": 33.82, "learning_rate": 0.0005, "loss": 0.8576, "step": 431300 }, { "epoch": 33.83, "learning_rate": 0.0005, "loss": 0.8411, "step": 431400 }, { "epoch": 33.84, "learning_rate": 0.0005, "loss": 0.8495, "step": 431500 }, { "epoch": 33.85, "learning_rate": 0.0005, "loss": 0.8603, "step": 431600 }, { "epoch": 33.85, "learning_rate": 0.0005, "loss": 0.8696, "step": 431700 }, { "epoch": 33.86, "learning_rate": 0.0005, "loss": 0.8594, "step": 431800 }, { "epoch": 33.87, "learning_rate": 0.0005, "loss": 0.8386, "step": 431900 }, { "epoch": 33.88, "learning_rate": 0.0005, "loss": 0.8496, "step": 432000 }, { "epoch": 33.88, "learning_rate": 0.0005, "loss": 0.8465, "step": 432100 }, { "epoch": 33.89, "learning_rate": 0.0005, "loss": 0.8594, "step": 432200 }, { "epoch": 33.9, "learning_rate": 0.0005, "loss": 0.854, "step": 432300 }, { "epoch": 33.91, "learning_rate": 0.0005, "loss": 0.8447, "step": 432400 }, { "epoch": 33.92, "learning_rate": 0.0005, "loss": 0.8608, "step": 432500 }, { "epoch": 33.92, "learning_rate": 0.0005, "loss": 0.8572, "step": 432600 }, { "epoch": 33.93, "learning_rate": 0.0005, "loss": 0.8624, "step": 432700 }, { "epoch": 33.94, "learning_rate": 0.0005, "loss": 0.8635, "step": 432800 }, { "epoch": 33.95, "learning_rate": 0.0005, "loss": 0.8521, "step": 432900 }, { "epoch": 33.96, "learning_rate": 0.0005, "loss": 0.8615, "step": 433000 }, { "epoch": 33.96, "learning_rate": 0.0005, "loss": 0.835, "step": 433100 }, { "epoch": 33.97, "learning_rate": 0.0005, "loss": 0.8391, "step": 433200 }, { "epoch": 33.98, "learning_rate": 0.0005, "loss": 0.844, "step": 433300 }, { "epoch": 33.99, "learning_rate": 0.0005, "loss": 0.8657, "step": 433400 }, { "epoch": 33.99, "learning_rate": 0.0005, "loss": 0.8686, "step": 433500 }, { "epoch": 34.0, "learning_rate": 0.0005, "loss": 0.8344, "step": 433600 }, { "epoch": 34.01, "learning_rate": 0.0005, "loss": 0.7766, "step": 433700 }, { "epoch": 34.02, "learning_rate": 0.0005, "loss": 0.7651, "step": 433800 }, { "epoch": 34.03, "learning_rate": 0.0005, "loss": 0.7894, "step": 433900 }, { "epoch": 34.03, "learning_rate": 0.0005, "loss": 0.783, "step": 434000 }, { "epoch": 34.04, "learning_rate": 0.0005, "loss": 0.7997, "step": 434100 }, { "epoch": 34.05, "learning_rate": 0.0005, "loss": 0.7916, "step": 434200 }, { "epoch": 34.06, "learning_rate": 0.0005, "loss": 0.806, "step": 434300 }, { "epoch": 34.07, "learning_rate": 0.0005, "loss": 0.7905, "step": 434400 }, { "epoch": 34.07, "learning_rate": 0.0005, "loss": 0.7827, "step": 434500 }, { "epoch": 34.08, "learning_rate": 0.0005, "loss": 0.7867, "step": 434600 }, { "epoch": 34.09, "learning_rate": 0.0005, "loss": 0.7875, "step": 434700 }, { "epoch": 34.1, "learning_rate": 0.0005, "loss": 0.798, "step": 434800 }, { "epoch": 34.1, "learning_rate": 0.0005, "loss": 0.8086, "step": 434900 }, { "epoch": 34.11, "learning_rate": 0.0005, "loss": 0.7837, "step": 435000 }, { "epoch": 34.12, "learning_rate": 0.0005, "loss": 0.8006, "step": 435100 }, { "epoch": 34.13, "learning_rate": 0.0005, "loss": 0.7904, "step": 435200 }, { "epoch": 34.14, "learning_rate": 0.0005, "loss": 0.7889, "step": 435300 }, { "epoch": 34.14, "learning_rate": 0.0005, "loss": 0.8047, "step": 435400 }, { "epoch": 34.15, "learning_rate": 0.0005, "loss": 0.8064, "step": 435500 }, { "epoch": 34.16, "learning_rate": 0.0005, "loss": 0.7976, "step": 435600 }, { "epoch": 34.17, "learning_rate": 0.0005, "loss": 0.8184, "step": 435700 }, { "epoch": 34.18, "learning_rate": 0.0005, "loss": 0.8029, "step": 435800 }, { "epoch": 34.18, "learning_rate": 0.0005, "loss": 0.814, "step": 435900 }, { "epoch": 34.19, "learning_rate": 0.0005, "loss": 0.8016, "step": 436000 }, { "epoch": 34.2, "learning_rate": 0.0005, "loss": 0.8048, "step": 436100 }, { "epoch": 34.21, "learning_rate": 0.0005, "loss": 0.8168, "step": 436200 }, { "epoch": 34.21, "learning_rate": 0.0005, "loss": 0.8007, "step": 436300 }, { "epoch": 34.22, "learning_rate": 0.0005, "loss": 0.7969, "step": 436400 }, { "epoch": 34.23, "learning_rate": 0.0005, "loss": 0.8047, "step": 436500 }, { "epoch": 34.24, "learning_rate": 0.0005, "loss": 0.81, "step": 436600 }, { "epoch": 34.25, "learning_rate": 0.0005, "loss": 0.8079, "step": 436700 }, { "epoch": 34.25, "learning_rate": 0.0005, "loss": 0.7928, "step": 436800 }, { "epoch": 34.26, "learning_rate": 0.0005, "loss": 0.8157, "step": 436900 }, { "epoch": 34.27, "learning_rate": 0.0005, "loss": 0.8159, "step": 437000 }, { "epoch": 34.28, "learning_rate": 0.0005, "loss": 0.8248, "step": 437100 }, { "epoch": 34.28, "learning_rate": 0.0005, "loss": 0.8182, "step": 437200 }, { "epoch": 34.29, "learning_rate": 0.0005, "loss": 0.8099, "step": 437300 }, { "epoch": 34.3, "learning_rate": 0.0005, "loss": 0.8223, "step": 437400 }, { "epoch": 34.31, "learning_rate": 0.0005, "loss": 0.8014, "step": 437500 }, { "epoch": 34.32, "learning_rate": 0.0005, "loss": 0.8313, "step": 437600 }, { "epoch": 34.32, "learning_rate": 0.0005, "loss": 0.8054, "step": 437700 }, { "epoch": 34.33, "learning_rate": 0.0005, "loss": 0.8245, "step": 437800 }, { "epoch": 34.34, "learning_rate": 0.0005, "loss": 0.8153, "step": 437900 }, { "epoch": 34.35, "learning_rate": 0.0005, "loss": 0.8125, "step": 438000 }, { "epoch": 34.36, "learning_rate": 0.0005, "loss": 0.8074, "step": 438100 }, { "epoch": 34.36, "learning_rate": 0.0005, "loss": 0.8155, "step": 438200 }, { "epoch": 34.37, "learning_rate": 0.0005, "loss": 0.8164, "step": 438300 }, { "epoch": 34.38, "learning_rate": 0.0005, "loss": 0.8073, "step": 438400 }, { "epoch": 34.39, "learning_rate": 0.0005, "loss": 0.83, "step": 438500 }, { "epoch": 34.39, "learning_rate": 0.0005, "loss": 0.8174, "step": 438600 }, { "epoch": 34.4, "learning_rate": 0.0005, "loss": 0.8199, "step": 438700 }, { "epoch": 34.41, "learning_rate": 0.0005, "loss": 0.8117, "step": 438800 }, { "epoch": 34.42, "learning_rate": 0.0005, "loss": 0.8318, "step": 438900 }, { "epoch": 34.43, "learning_rate": 0.0005, "loss": 0.8325, "step": 439000 }, { "epoch": 34.43, "learning_rate": 0.0005, "loss": 0.815, "step": 439100 }, { "epoch": 34.44, "learning_rate": 0.0005, "loss": 0.8214, "step": 439200 }, { "epoch": 34.45, "learning_rate": 0.0005, "loss": 0.8253, "step": 439300 }, { "epoch": 34.46, "learning_rate": 0.0005, "loss": 0.841, "step": 439400 }, { "epoch": 34.47, "learning_rate": 0.0005, "loss": 0.8074, "step": 439500 }, { "epoch": 34.47, "learning_rate": 0.0005, "loss": 0.8385, "step": 439600 }, { "epoch": 34.48, "learning_rate": 0.0005, "loss": 0.822, "step": 439700 }, { "epoch": 34.49, "learning_rate": 0.0005, "loss": 0.8159, "step": 439800 }, { "epoch": 34.5, "learning_rate": 0.0005, "loss": 0.8264, "step": 439900 }, { "epoch": 34.5, "learning_rate": 0.0005, "loss": 0.8183, "step": 440000 }, { "epoch": 34.5, "eval_gen_len": 18.763838615697008, "eval_loss": 2.6066930294036865, "eval_rouge1": 35.8093, "eval_rouge2": 14.8115, "eval_rougeL": 29.5759, "eval_rougeLsum": 29.5784, "eval_runtime": 344.3419, "eval_samples_per_second": 32.895, "eval_steps_per_second": 2.056, "step": 440000 }, { "epoch": 34.51, "learning_rate": 0.0005, "loss": 0.812, "step": 440100 }, { "epoch": 34.52, "learning_rate": 0.0005, "loss": 0.84, "step": 440200 }, { "epoch": 34.53, "learning_rate": 0.0005, "loss": 0.8214, "step": 440300 }, { "epoch": 34.54, "learning_rate": 0.0005, "loss": 0.8131, "step": 440400 }, { "epoch": 34.54, "learning_rate": 0.0005, "loss": 0.8257, "step": 440500 }, { "epoch": 34.55, "learning_rate": 0.0005, "loss": 0.831, "step": 440600 }, { "epoch": 34.56, "learning_rate": 0.0005, "loss": 0.8385, "step": 440700 }, { "epoch": 34.57, "learning_rate": 0.0005, "loss": 0.8258, "step": 440800 }, { "epoch": 34.57, "learning_rate": 0.0005, "loss": 0.8128, "step": 440900 }, { "epoch": 34.58, "learning_rate": 0.0005, "loss": 0.8396, "step": 441000 }, { "epoch": 34.59, "learning_rate": 0.0005, "loss": 0.797, "step": 441100 }, { "epoch": 34.6, "learning_rate": 0.0005, "loss": 0.8389, "step": 441200 }, { "epoch": 34.61, "learning_rate": 0.0005, "loss": 0.8328, "step": 441300 }, { "epoch": 34.61, "learning_rate": 0.0005, "loss": 0.8357, "step": 441400 }, { "epoch": 34.62, "learning_rate": 0.0005, "loss": 0.8416, "step": 441500 }, { "epoch": 34.63, "learning_rate": 0.0005, "loss": 0.8182, "step": 441600 }, { "epoch": 34.64, "learning_rate": 0.0005, "loss": 0.827, "step": 441700 }, { "epoch": 34.65, "learning_rate": 0.0005, "loss": 0.8377, "step": 441800 }, { "epoch": 34.65, "learning_rate": 0.0005, "loss": 0.8376, "step": 441900 }, { "epoch": 34.66, "learning_rate": 0.0005, "loss": 0.8373, "step": 442000 }, { "epoch": 34.67, "learning_rate": 0.0005, "loss": 0.8355, "step": 442100 }, { "epoch": 34.68, "learning_rate": 0.0005, "loss": 0.831, "step": 442200 }, { "epoch": 34.68, "learning_rate": 0.0005, "loss": 0.8319, "step": 442300 }, { "epoch": 34.69, "learning_rate": 0.0005, "loss": 0.838, "step": 442400 }, { "epoch": 34.7, "learning_rate": 0.0005, "loss": 0.8341, "step": 442500 }, { "epoch": 34.71, "learning_rate": 0.0005, "loss": 0.8517, "step": 442600 }, { "epoch": 34.72, "learning_rate": 0.0005, "loss": 0.8445, "step": 442700 }, { "epoch": 34.72, "learning_rate": 0.0005, "loss": 0.8481, "step": 442800 }, { "epoch": 34.73, "learning_rate": 0.0005, "loss": 0.8378, "step": 442900 }, { "epoch": 34.74, "learning_rate": 0.0005, "loss": 0.8282, "step": 443000 }, { "epoch": 34.75, "learning_rate": 0.0005, "loss": 0.8313, "step": 443100 }, { "epoch": 34.76, "learning_rate": 0.0005, "loss": 0.8331, "step": 443200 }, { "epoch": 34.76, "learning_rate": 0.0005, "loss": 0.8289, "step": 443300 }, { "epoch": 34.77, "learning_rate": 0.0005, "loss": 0.8486, "step": 443400 }, { "epoch": 34.78, "learning_rate": 0.0005, "loss": 0.8296, "step": 443500 }, { "epoch": 34.79, "learning_rate": 0.0005, "loss": 0.825, "step": 443600 }, { "epoch": 34.79, "learning_rate": 0.0005, "loss": 0.8518, "step": 443700 }, { "epoch": 34.8, "learning_rate": 0.0005, "loss": 0.841, "step": 443800 }, { "epoch": 34.81, "learning_rate": 0.0005, "loss": 0.8487, "step": 443900 }, { "epoch": 34.82, "learning_rate": 0.0005, "loss": 0.84, "step": 444000 }, { "epoch": 34.83, "learning_rate": 0.0005, "loss": 0.8461, "step": 444100 }, { "epoch": 34.83, "learning_rate": 0.0005, "loss": 0.8311, "step": 444200 }, { "epoch": 34.84, "learning_rate": 0.0005, "loss": 0.839, "step": 444300 }, { "epoch": 34.85, "learning_rate": 0.0005, "loss": 0.8496, "step": 444400 }, { "epoch": 34.86, "learning_rate": 0.0005, "loss": 0.8523, "step": 444500 }, { "epoch": 34.87, "learning_rate": 0.0005, "loss": 0.8429, "step": 444600 }, { "epoch": 34.87, "learning_rate": 0.0005, "loss": 0.8418, "step": 444700 }, { "epoch": 34.88, "learning_rate": 0.0005, "loss": 0.8368, "step": 444800 }, { "epoch": 34.89, "learning_rate": 0.0005, "loss": 0.8457, "step": 444900 }, { "epoch": 34.9, "learning_rate": 0.0005, "loss": 0.853, "step": 445000 }, { "epoch": 34.9, "learning_rate": 0.0005, "loss": 0.8323, "step": 445100 }, { "epoch": 34.91, "learning_rate": 0.0005, "loss": 0.8477, "step": 445200 }, { "epoch": 34.92, "learning_rate": 0.0005, "loss": 0.8549, "step": 445300 }, { "epoch": 34.93, "learning_rate": 0.0005, "loss": 0.8326, "step": 445400 }, { "epoch": 34.94, "learning_rate": 0.0005, "loss": 0.8582, "step": 445500 }, { "epoch": 34.94, "learning_rate": 0.0005, "loss": 0.8332, "step": 445600 }, { "epoch": 34.95, "learning_rate": 0.0005, "loss": 0.8394, "step": 445700 }, { "epoch": 34.96, "learning_rate": 0.0005, "loss": 0.8373, "step": 445800 }, { "epoch": 34.97, "learning_rate": 0.0005, "loss": 0.8423, "step": 445900 }, { "epoch": 34.97, "learning_rate": 0.0005, "loss": 0.8352, "step": 446000 }, { "epoch": 34.98, "learning_rate": 0.0005, "loss": 0.8462, "step": 446100 }, { "epoch": 34.99, "learning_rate": 0.0005, "loss": 0.8499, "step": 446200 }, { "epoch": 35.0, "learning_rate": 0.0005, "loss": 0.8428, "step": 446300 }, { "epoch": 35.01, "learning_rate": 0.0005, "loss": 0.7978, "step": 446400 }, { "epoch": 35.01, "learning_rate": 0.0005, "loss": 0.7763, "step": 446500 }, { "epoch": 35.02, "learning_rate": 0.0005, "loss": 0.7864, "step": 446600 }, { "epoch": 35.03, "learning_rate": 0.0005, "loss": 0.7784, "step": 446700 }, { "epoch": 35.04, "learning_rate": 0.0005, "loss": 0.7811, "step": 446800 }, { "epoch": 35.05, "learning_rate": 0.0005, "loss": 0.7874, "step": 446900 }, { "epoch": 35.05, "learning_rate": 0.0005, "loss": 0.7826, "step": 447000 }, { "epoch": 35.06, "learning_rate": 0.0005, "loss": 0.7766, "step": 447100 }, { "epoch": 35.07, "learning_rate": 0.0005, "loss": 0.7829, "step": 447200 }, { "epoch": 35.08, "learning_rate": 0.0005, "loss": 0.7731, "step": 447300 }, { "epoch": 35.08, "learning_rate": 0.0005, "loss": 0.7956, "step": 447400 }, { "epoch": 35.09, "learning_rate": 0.0005, "loss": 0.7856, "step": 447500 }, { "epoch": 35.1, "learning_rate": 0.0005, "loss": 0.7957, "step": 447600 }, { "epoch": 35.11, "learning_rate": 0.0005, "loss": 0.7681, "step": 447700 }, { "epoch": 35.12, "learning_rate": 0.0005, "loss": 0.7797, "step": 447800 }, { "epoch": 35.12, "learning_rate": 0.0005, "loss": 0.7957, "step": 447900 }, { "epoch": 35.13, "learning_rate": 0.0005, "loss": 0.7806, "step": 448000 }, { "epoch": 35.14, "learning_rate": 0.0005, "loss": 0.8021, "step": 448100 }, { "epoch": 35.15, "learning_rate": 0.0005, "loss": 0.7923, "step": 448200 }, { "epoch": 35.16, "learning_rate": 0.0005, "loss": 0.8019, "step": 448300 }, { "epoch": 35.16, "learning_rate": 0.0005, "loss": 0.7935, "step": 448400 }, { "epoch": 35.17, "learning_rate": 0.0005, "loss": 0.7987, "step": 448500 }, { "epoch": 35.18, "learning_rate": 0.0005, "loss": 0.798, "step": 448600 }, { "epoch": 35.19, "learning_rate": 0.0005, "loss": 0.7904, "step": 448700 }, { "epoch": 35.19, "learning_rate": 0.0005, "loss": 0.8029, "step": 448800 }, { "epoch": 35.2, "learning_rate": 0.0005, "loss": 0.7862, "step": 448900 }, { "epoch": 35.21, "learning_rate": 0.0005, "loss": 0.7884, "step": 449000 }, { "epoch": 35.22, "learning_rate": 0.0005, "loss": 0.7895, "step": 449100 }, { "epoch": 35.23, "learning_rate": 0.0005, "loss": 0.7948, "step": 449200 }, { "epoch": 35.23, "learning_rate": 0.0005, "loss": 0.7953, "step": 449300 }, { "epoch": 35.24, "learning_rate": 0.0005, "loss": 0.807, "step": 449400 }, { "epoch": 35.25, "learning_rate": 0.0005, "loss": 0.8105, "step": 449500 }, { "epoch": 35.26, "learning_rate": 0.0005, "loss": 0.7959, "step": 449600 }, { "epoch": 35.27, "learning_rate": 0.0005, "loss": 0.8048, "step": 449700 }, { "epoch": 35.27, "learning_rate": 0.0005, "loss": 0.7986, "step": 449800 }, { "epoch": 35.28, "learning_rate": 0.0005, "loss": 0.8037, "step": 449900 }, { "epoch": 35.29, "learning_rate": 0.0005, "loss": 0.8095, "step": 450000 }, { "epoch": 35.29, "eval_gen_len": 18.77716959477355, "eval_loss": 2.638524293899536, "eval_rouge1": 35.734, "eval_rouge2": 14.7352, "eval_rougeL": 29.5245, "eval_rougeLsum": 29.5196, "eval_runtime": 343.351, "eval_samples_per_second": 32.99, "eval_steps_per_second": 2.062, "step": 450000 }, { "epoch": 35.3, "learning_rate": 0.0005, "loss": 0.8028, "step": 450100 }, { "epoch": 35.3, "learning_rate": 0.0005, "loss": 0.8031, "step": 450200 }, { "epoch": 35.31, "learning_rate": 0.0005, "loss": 0.8257, "step": 450300 }, { "epoch": 35.32, "learning_rate": 0.0005, "loss": 0.8024, "step": 450400 }, { "epoch": 35.33, "learning_rate": 0.0005, "loss": 0.8068, "step": 450500 }, { "epoch": 35.34, "learning_rate": 0.0005, "loss": 0.803, "step": 450600 }, { "epoch": 35.34, "learning_rate": 0.0005, "loss": 0.8003, "step": 450700 }, { "epoch": 35.35, "learning_rate": 0.0005, "loss": 0.8013, "step": 450800 }, { "epoch": 35.36, "learning_rate": 0.0005, "loss": 0.8163, "step": 450900 }, { "epoch": 35.37, "learning_rate": 0.0005, "loss": 0.7973, "step": 451000 }, { "epoch": 35.37, "learning_rate": 0.0005, "loss": 0.7942, "step": 451100 }, { "epoch": 35.38, "learning_rate": 0.0005, "loss": 0.803, "step": 451200 }, { "epoch": 35.39, "learning_rate": 0.0005, "loss": 0.7992, "step": 451300 }, { "epoch": 35.4, "learning_rate": 0.0005, "loss": 0.8124, "step": 451400 }, { "epoch": 35.41, "learning_rate": 0.0005, "loss": 0.7982, "step": 451500 }, { "epoch": 35.41, "learning_rate": 0.0005, "loss": 0.8111, "step": 451600 }, { "epoch": 35.42, "learning_rate": 0.0005, "loss": 0.7959, "step": 451700 }, { "epoch": 35.43, "learning_rate": 0.0005, "loss": 0.8066, "step": 451800 }, { "epoch": 35.44, "learning_rate": 0.0005, "loss": 0.8037, "step": 451900 }, { "epoch": 35.45, "learning_rate": 0.0005, "loss": 0.8163, "step": 452000 }, { "epoch": 35.45, "learning_rate": 0.0005, "loss": 0.8144, "step": 452100 }, { "epoch": 35.46, "learning_rate": 0.0005, "loss": 0.7978, "step": 452200 }, { "epoch": 35.47, "learning_rate": 0.0005, "loss": 0.8063, "step": 452300 }, { "epoch": 35.48, "learning_rate": 0.0005, "loss": 0.8142, "step": 452400 }, { "epoch": 35.48, "learning_rate": 0.0005, "loss": 0.8319, "step": 452500 }, { "epoch": 35.49, "learning_rate": 0.0005, "loss": 0.8081, "step": 452600 }, { "epoch": 35.5, "learning_rate": 0.0005, "loss": 0.8118, "step": 452700 }, { "epoch": 35.51, "learning_rate": 0.0005, "loss": 0.822, "step": 452800 }, { "epoch": 35.52, "learning_rate": 0.0005, "loss": 0.8152, "step": 452900 }, { "epoch": 35.52, "learning_rate": 0.0005, "loss": 0.8046, "step": 453000 }, { "epoch": 35.53, "learning_rate": 0.0005, "loss": 0.8165, "step": 453100 }, { "epoch": 35.54, "learning_rate": 0.0005, "loss": 0.81, "step": 453200 }, { "epoch": 35.55, "learning_rate": 0.0005, "loss": 0.7984, "step": 453300 }, { "epoch": 35.56, "learning_rate": 0.0005, "loss": 0.806, "step": 453400 }, { "epoch": 35.56, "learning_rate": 0.0005, "loss": 0.8164, "step": 453500 }, { "epoch": 35.57, "learning_rate": 0.0005, "loss": 0.813, "step": 453600 }, { "epoch": 35.58, "learning_rate": 0.0005, "loss": 0.8072, "step": 453700 }, { "epoch": 35.59, "learning_rate": 0.0005, "loss": 0.8135, "step": 453800 }, { "epoch": 35.59, "learning_rate": 0.0005, "loss": 0.8073, "step": 453900 }, { "epoch": 35.6, "learning_rate": 0.0005, "loss": 0.815, "step": 454000 }, { "epoch": 35.61, "learning_rate": 0.0005, "loss": 0.8239, "step": 454100 }, { "epoch": 35.62, "learning_rate": 0.0005, "loss": 0.8274, "step": 454200 }, { "epoch": 35.63, "learning_rate": 0.0005, "loss": 0.8192, "step": 454300 }, { "epoch": 35.63, "learning_rate": 0.0005, "loss": 0.8422, "step": 454400 }, { "epoch": 35.64, "learning_rate": 0.0005, "loss": 0.8196, "step": 454500 }, { "epoch": 35.65, "learning_rate": 0.0005, "loss": 0.8139, "step": 454600 }, { "epoch": 35.66, "learning_rate": 0.0005, "loss": 0.8382, "step": 454700 }, { "epoch": 35.66, "learning_rate": 0.0005, "loss": 0.8197, "step": 454800 }, { "epoch": 35.67, "learning_rate": 0.0005, "loss": 0.8385, "step": 454900 }, { "epoch": 35.68, "learning_rate": 0.0005, "loss": 0.8177, "step": 455000 }, { "epoch": 35.69, "learning_rate": 0.0005, "loss": 0.814, "step": 455100 }, { "epoch": 35.7, "learning_rate": 0.0005, "loss": 0.8306, "step": 455200 }, { "epoch": 35.7, "learning_rate": 0.0005, "loss": 0.8461, "step": 455300 }, { "epoch": 35.71, "learning_rate": 0.0005, "loss": 0.8416, "step": 455400 }, { "epoch": 35.72, "learning_rate": 0.0005, "loss": 0.8048, "step": 455500 }, { "epoch": 35.73, "learning_rate": 0.0005, "loss": 0.8334, "step": 455600 }, { "epoch": 35.74, "learning_rate": 0.0005, "loss": 0.8359, "step": 455700 }, { "epoch": 35.74, "learning_rate": 0.0005, "loss": 0.8078, "step": 455800 }, { "epoch": 35.75, "learning_rate": 0.0005, "loss": 0.8337, "step": 455900 }, { "epoch": 35.76, "learning_rate": 0.0005, "loss": 0.827, "step": 456000 }, { "epoch": 35.77, "learning_rate": 0.0005, "loss": 0.8268, "step": 456100 }, { "epoch": 35.77, "learning_rate": 0.0005, "loss": 0.811, "step": 456200 }, { "epoch": 35.78, "learning_rate": 0.0005, "loss": 0.833, "step": 456300 }, { "epoch": 35.79, "learning_rate": 0.0005, "loss": 0.839, "step": 456400 }, { "epoch": 35.8, "learning_rate": 0.0005, "loss": 0.837, "step": 456500 }, { "epoch": 35.81, "learning_rate": 0.0005, "loss": 0.8271, "step": 456600 }, { "epoch": 35.81, "learning_rate": 0.0005, "loss": 0.8288, "step": 456700 }, { "epoch": 35.82, "learning_rate": 0.0005, "loss": 0.8316, "step": 456800 }, { "epoch": 35.83, "learning_rate": 0.0005, "loss": 0.8124, "step": 456900 }, { "epoch": 35.84, "learning_rate": 0.0005, "loss": 0.8154, "step": 457000 }, { "epoch": 35.85, "learning_rate": 0.0005, "loss": 0.8378, "step": 457100 }, { "epoch": 35.85, "learning_rate": 0.0005, "loss": 0.8521, "step": 457200 }, { "epoch": 35.86, "learning_rate": 0.0005, "loss": 0.8235, "step": 457300 }, { "epoch": 35.87, "learning_rate": 0.0005, "loss": 0.84, "step": 457400 }, { "epoch": 35.88, "learning_rate": 0.0005, "loss": 0.8393, "step": 457500 }, { "epoch": 35.88, "learning_rate": 0.0005, "loss": 0.8429, "step": 457600 }, { "epoch": 35.89, "learning_rate": 0.0005, "loss": 0.8259, "step": 457700 }, { "epoch": 35.9, "learning_rate": 0.0005, "loss": 0.8353, "step": 457800 }, { "epoch": 35.91, "learning_rate": 0.0005, "loss": 0.8472, "step": 457900 }, { "epoch": 35.92, "learning_rate": 0.0005, "loss": 0.8356, "step": 458000 }, { "epoch": 35.92, "learning_rate": 0.0005, "loss": 0.8406, "step": 458100 }, { "epoch": 35.93, "learning_rate": 0.0005, "loss": 0.8403, "step": 458200 }, { "epoch": 35.94, "learning_rate": 0.0005, "loss": 0.8295, "step": 458300 }, { "epoch": 35.95, "learning_rate": 0.0005, "loss": 0.8491, "step": 458400 }, { "epoch": 35.96, "learning_rate": 0.0005, "loss": 0.8041, "step": 458500 }, { "epoch": 35.96, "learning_rate": 0.0005, "loss": 0.8446, "step": 458600 }, { "epoch": 35.97, "learning_rate": 0.0005, "loss": 0.8517, "step": 458700 }, { "epoch": 35.98, "learning_rate": 0.0005, "loss": 0.8476, "step": 458800 }, { "epoch": 35.99, "learning_rate": 0.0005, "loss": 0.8332, "step": 458900 }, { "epoch": 35.99, "learning_rate": 0.0005, "loss": 0.8464, "step": 459000 }, { "epoch": 36.0, "learning_rate": 0.0005, "loss": 0.8185, "step": 459100 }, { "epoch": 36.01, "learning_rate": 0.0005, "loss": 0.7558, "step": 459200 }, { "epoch": 36.02, "learning_rate": 0.0005, "loss": 0.7795, "step": 459300 }, { "epoch": 36.03, "learning_rate": 0.0005, "loss": 0.7633, "step": 459400 }, { "epoch": 36.03, "learning_rate": 0.0005, "loss": 0.7774, "step": 459500 }, { "epoch": 36.04, "learning_rate": 0.0005, "loss": 0.786, "step": 459600 }, { "epoch": 36.05, "learning_rate": 0.0005, "loss": 0.7673, "step": 459700 }, { "epoch": 36.06, "learning_rate": 0.0005, "loss": 0.7816, "step": 459800 }, { "epoch": 36.06, "learning_rate": 0.0005, "loss": 0.7744, "step": 459900 }, { "epoch": 36.07, "learning_rate": 0.0005, "loss": 0.7855, "step": 460000 }, { "epoch": 36.07, "eval_gen_len": 18.767546570142137, "eval_loss": 2.670743465423584, "eval_rouge1": 35.881, "eval_rouge2": 14.9235, "eval_rougeL": 29.6308, "eval_rougeLsum": 29.6326, "eval_runtime": 357.5591, "eval_samples_per_second": 31.679, "eval_steps_per_second": 1.98, "step": 460000 }, { "epoch": 36.08, "learning_rate": 0.0005, "loss": 0.7765, "step": 460100 }, { "epoch": 36.09, "learning_rate": 0.0005, "loss": 0.7711, "step": 460200 }, { "epoch": 36.1, "learning_rate": 0.0005, "loss": 0.7955, "step": 460300 }, { "epoch": 36.1, "learning_rate": 0.0005, "loss": 0.7766, "step": 460400 }, { "epoch": 36.11, "learning_rate": 0.0005, "loss": 0.7848, "step": 460500 }, { "epoch": 36.12, "learning_rate": 0.0005, "loss": 0.7788, "step": 460600 }, { "epoch": 36.13, "learning_rate": 0.0005, "loss": 0.7853, "step": 460700 }, { "epoch": 36.14, "learning_rate": 0.0005, "loss": 0.7926, "step": 460800 }, { "epoch": 36.14, "learning_rate": 0.0005, "loss": 0.7713, "step": 460900 }, { "epoch": 36.15, "learning_rate": 0.0005, "loss": 0.7747, "step": 461000 }, { "epoch": 36.16, "learning_rate": 0.0005, "loss": 0.7857, "step": 461100 }, { "epoch": 36.17, "learning_rate": 0.0005, "loss": 0.7767, "step": 461200 }, { "epoch": 36.17, "learning_rate": 0.0005, "loss": 0.7687, "step": 461300 }, { "epoch": 36.18, "learning_rate": 0.0005, "loss": 0.7738, "step": 461400 }, { "epoch": 36.19, "learning_rate": 0.0005, "loss": 0.7698, "step": 461500 }, { "epoch": 36.2, "learning_rate": 0.0005, "loss": 0.7822, "step": 461600 }, { "epoch": 36.21, "learning_rate": 0.0005, "loss": 0.796, "step": 461700 }, { "epoch": 36.21, "learning_rate": 0.0005, "loss": 0.7873, "step": 461800 }, { "epoch": 36.22, "learning_rate": 0.0005, "loss": 0.7893, "step": 461900 }, { "epoch": 36.23, "learning_rate": 0.0005, "loss": 0.7887, "step": 462000 }, { "epoch": 36.24, "learning_rate": 0.0005, "loss": 0.7934, "step": 462100 }, { "epoch": 36.25, "learning_rate": 0.0005, "loss": 0.8007, "step": 462200 }, { "epoch": 36.25, "learning_rate": 0.0005, "loss": 0.7955, "step": 462300 }, { "epoch": 36.26, "learning_rate": 0.0005, "loss": 0.7928, "step": 462400 }, { "epoch": 36.27, "learning_rate": 0.0005, "loss": 0.7973, "step": 462500 }, { "epoch": 36.28, "learning_rate": 0.0005, "loss": 0.8038, "step": 462600 }, { "epoch": 36.28, "learning_rate": 0.0005, "loss": 0.7919, "step": 462700 }, { "epoch": 36.29, "learning_rate": 0.0005, "loss": 0.8013, "step": 462800 }, { "epoch": 36.3, "learning_rate": 0.0005, "loss": 0.7972, "step": 462900 }, { "epoch": 36.31, "learning_rate": 0.0005, "loss": 0.7906, "step": 463000 }, { "epoch": 36.32, "learning_rate": 0.0005, "loss": 0.7877, "step": 463100 }, { "epoch": 36.32, "learning_rate": 0.0005, "loss": 0.7763, "step": 463200 }, { "epoch": 36.33, "learning_rate": 0.0005, "loss": 0.8125, "step": 463300 }, { "epoch": 36.34, "learning_rate": 0.0005, "loss": 0.7908, "step": 463400 }, { "epoch": 36.35, "learning_rate": 0.0005, "loss": 0.7881, "step": 463500 }, { "epoch": 36.36, "learning_rate": 0.0005, "loss": 0.7967, "step": 463600 }, { "epoch": 36.36, "learning_rate": 0.0005, "loss": 0.813, "step": 463700 }, { "epoch": 36.37, "learning_rate": 0.0005, "loss": 0.7929, "step": 463800 }, { "epoch": 36.38, "learning_rate": 0.0005, "loss": 0.8054, "step": 463900 }, { "epoch": 36.39, "learning_rate": 0.0005, "loss": 0.8029, "step": 464000 }, { "epoch": 36.39, "learning_rate": 0.0005, "loss": 0.7978, "step": 464100 }, { "epoch": 36.4, "learning_rate": 0.0005, "loss": 0.8142, "step": 464200 }, { "epoch": 36.41, "learning_rate": 0.0005, "loss": 0.7955, "step": 464300 }, { "epoch": 36.42, "learning_rate": 0.0005, "loss": 0.7899, "step": 464400 }, { "epoch": 36.43, "learning_rate": 0.0005, "loss": 0.8003, "step": 464500 }, { "epoch": 36.43, "learning_rate": 0.0005, "loss": 0.8066, "step": 464600 }, { "epoch": 36.44, "learning_rate": 0.0005, "loss": 0.7888, "step": 464700 }, { "epoch": 36.45, "learning_rate": 0.0005, "loss": 0.794, "step": 464800 }, { "epoch": 36.46, "learning_rate": 0.0005, "loss": 0.7944, "step": 464900 }, { "epoch": 36.46, "learning_rate": 0.0005, "loss": 0.8059, "step": 465000 }, { "epoch": 36.47, "learning_rate": 0.0005, "loss": 0.8035, "step": 465100 }, { "epoch": 36.48, "learning_rate": 0.0005, "loss": 0.7903, "step": 465200 }, { "epoch": 36.49, "learning_rate": 0.0005, "loss": 0.7972, "step": 465300 }, { "epoch": 36.5, "learning_rate": 0.0005, "loss": 0.8111, "step": 465400 }, { "epoch": 36.5, "learning_rate": 0.0005, "loss": 0.7994, "step": 465500 }, { "epoch": 36.51, "learning_rate": 0.0005, "loss": 0.8135, "step": 465600 }, { "epoch": 36.52, "learning_rate": 0.0005, "loss": 0.8025, "step": 465700 }, { "epoch": 36.53, "learning_rate": 0.0005, "loss": 0.802, "step": 465800 }, { "epoch": 36.54, "learning_rate": 0.0005, "loss": 0.7956, "step": 465900 }, { "epoch": 36.54, "learning_rate": 0.0005, "loss": 0.8108, "step": 466000 }, { "epoch": 36.55, "learning_rate": 0.0005, "loss": 0.8147, "step": 466100 }, { "epoch": 36.56, "learning_rate": 0.0005, "loss": 0.8047, "step": 466200 }, { "epoch": 36.57, "learning_rate": 0.0005, "loss": 0.8173, "step": 466300 }, { "epoch": 36.57, "learning_rate": 0.0005, "loss": 0.8066, "step": 466400 }, { "epoch": 36.58, "learning_rate": 0.0005, "loss": 0.8079, "step": 466500 }, { "epoch": 36.59, "learning_rate": 0.0005, "loss": 0.7877, "step": 466600 }, { "epoch": 36.6, "learning_rate": 0.0005, "loss": 0.8162, "step": 466700 }, { "epoch": 36.61, "learning_rate": 0.0005, "loss": 0.8069, "step": 466800 }, { "epoch": 36.61, "learning_rate": 0.0005, "loss": 0.8191, "step": 466900 }, { "epoch": 36.62, "learning_rate": 0.0005, "loss": 0.807, "step": 467000 }, { "epoch": 36.63, "learning_rate": 0.0005, "loss": 0.8132, "step": 467100 }, { "epoch": 36.64, "learning_rate": 0.0005, "loss": 0.7967, "step": 467200 }, { "epoch": 36.65, "learning_rate": 0.0005, "loss": 0.8126, "step": 467300 }, { "epoch": 36.65, "learning_rate": 0.0005, "loss": 0.8263, "step": 467400 }, { "epoch": 36.66, "learning_rate": 0.0005, "loss": 0.822, "step": 467500 }, { "epoch": 36.67, "learning_rate": 0.0005, "loss": 0.8118, "step": 467600 }, { "epoch": 36.68, "learning_rate": 0.0005, "loss": 0.8003, "step": 467700 }, { "epoch": 36.68, "learning_rate": 0.0005, "loss": 0.8103, "step": 467800 }, { "epoch": 36.69, "learning_rate": 0.0005, "loss": 0.8028, "step": 467900 }, { "epoch": 36.7, "learning_rate": 0.0005, "loss": 0.8207, "step": 468000 }, { "epoch": 36.71, "learning_rate": 0.0005, "loss": 0.8168, "step": 468100 }, { "epoch": 36.72, "learning_rate": 0.0005, "loss": 0.8461, "step": 468200 }, { "epoch": 36.72, "learning_rate": 0.0005, "loss": 0.8257, "step": 468300 }, { "epoch": 36.73, "learning_rate": 0.0005, "loss": 0.8007, "step": 468400 }, { "epoch": 36.74, "learning_rate": 0.0005, "loss": 0.8206, "step": 468500 }, { "epoch": 36.75, "learning_rate": 0.0005, "loss": 0.8275, "step": 468600 }, { "epoch": 36.76, "learning_rate": 0.0005, "loss": 0.8306, "step": 468700 }, { "epoch": 36.76, "learning_rate": 0.0005, "loss": 0.7945, "step": 468800 }, { "epoch": 36.77, "learning_rate": 0.0005, "loss": 0.8237, "step": 468900 }, { "epoch": 36.78, "learning_rate": 0.0005, "loss": 0.8024, "step": 469000 }, { "epoch": 36.79, "learning_rate": 0.0005, "loss": 0.8356, "step": 469100 }, { "epoch": 36.79, "learning_rate": 0.0005, "loss": 0.8196, "step": 469200 }, { "epoch": 36.8, "learning_rate": 0.0005, "loss": 0.8132, "step": 469300 }, { "epoch": 36.81, "learning_rate": 0.0005, "loss": 0.8178, "step": 469400 }, { "epoch": 36.82, "learning_rate": 0.0005, "loss": 0.817, "step": 469500 }, { "epoch": 36.83, "learning_rate": 0.0005, "loss": 0.8178, "step": 469600 }, { "epoch": 36.83, "learning_rate": 0.0005, "loss": 0.8326, "step": 469700 }, { "epoch": 36.84, "learning_rate": 0.0005, "loss": 0.8334, "step": 469800 }, { "epoch": 36.85, "learning_rate": 0.0005, "loss": 0.8169, "step": 469900 }, { "epoch": 36.86, "learning_rate": 0.0005, "loss": 0.8477, "step": 470000 }, { "epoch": 36.86, "eval_gen_len": 18.735852388099232, "eval_loss": 2.60902738571167, "eval_rouge1": 35.7078, "eval_rouge2": 14.7556, "eval_rougeL": 29.5286, "eval_rougeLsum": 29.5256, "eval_runtime": 378.8936, "eval_samples_per_second": 29.895, "eval_steps_per_second": 1.869, "step": 470000 }, { "epoch": 36.86, "learning_rate": 0.0005, "loss": 0.8233, "step": 470100 }, { "epoch": 36.87, "learning_rate": 0.0005, "loss": 0.8172, "step": 470200 }, { "epoch": 36.88, "learning_rate": 0.0005, "loss": 0.8357, "step": 470300 }, { "epoch": 36.89, "learning_rate": 0.0005, "loss": 0.8409, "step": 470400 }, { "epoch": 36.9, "learning_rate": 0.0005, "loss": 0.8167, "step": 470500 }, { "epoch": 36.9, "learning_rate": 0.0005, "loss": 0.8222, "step": 470600 }, { "epoch": 36.91, "learning_rate": 0.0005, "loss": 0.8258, "step": 470700 }, { "epoch": 36.92, "learning_rate": 0.0005, "loss": 0.8166, "step": 470800 }, { "epoch": 36.93, "learning_rate": 0.0005, "loss": 0.8391, "step": 470900 }, { "epoch": 36.94, "learning_rate": 0.0005, "loss": 0.8468, "step": 471000 }, { "epoch": 36.94, "learning_rate": 0.0005, "loss": 0.8225, "step": 471100 }, { "epoch": 36.95, "learning_rate": 0.0005, "loss": 0.8308, "step": 471200 }, { "epoch": 36.96, "learning_rate": 0.0005, "loss": 0.8239, "step": 471300 }, { "epoch": 36.97, "learning_rate": 0.0005, "loss": 0.8429, "step": 471400 }, { "epoch": 36.97, "learning_rate": 0.0005, "loss": 0.8231, "step": 471500 }, { "epoch": 36.98, "learning_rate": 0.0005, "loss": 0.8362, "step": 471600 }, { "epoch": 36.99, "learning_rate": 0.0005, "loss": 0.8313, "step": 471700 }, { "epoch": 37.0, "learning_rate": 0.0005, "loss": 0.8286, "step": 471800 }, { "epoch": 37.01, "learning_rate": 0.0005, "loss": 0.7754, "step": 471900 }, { "epoch": 37.01, "learning_rate": 0.0005, "loss": 0.7617, "step": 472000 }, { "epoch": 37.02, "learning_rate": 0.0005, "loss": 0.7606, "step": 472100 }, { "epoch": 37.03, "learning_rate": 0.0005, "loss": 0.7469, "step": 472200 }, { "epoch": 37.04, "learning_rate": 0.0005, "loss": 0.7591, "step": 472300 }, { "epoch": 37.05, "learning_rate": 0.0005, "loss": 0.7543, "step": 472400 }, { "epoch": 37.05, "learning_rate": 0.0005, "loss": 0.7544, "step": 472500 }, { "epoch": 37.06, "learning_rate": 0.0005, "loss": 0.7685, "step": 472600 }, { "epoch": 37.07, "learning_rate": 0.0005, "loss": 0.7577, "step": 472700 }, { "epoch": 37.08, "learning_rate": 0.0005, "loss": 0.7495, "step": 472800 }, { "epoch": 37.08, "learning_rate": 0.0005, "loss": 0.7498, "step": 472900 }, { "epoch": 37.09, "learning_rate": 0.0005, "loss": 0.7712, "step": 473000 }, { "epoch": 37.1, "learning_rate": 0.0005, "loss": 0.7464, "step": 473100 }, { "epoch": 37.11, "learning_rate": 0.0005, "loss": 0.7627, "step": 473200 }, { "epoch": 37.12, "learning_rate": 0.0005, "loss": 0.7591, "step": 473300 }, { "epoch": 37.12, "learning_rate": 0.0005, "loss": 0.7803, "step": 473400 }, { "epoch": 37.13, "learning_rate": 0.0005, "loss": 0.7655, "step": 473500 }, { "epoch": 37.14, "learning_rate": 0.0005, "loss": 0.7818, "step": 473600 }, { "epoch": 37.15, "learning_rate": 0.0005, "loss": 0.7823, "step": 473700 }, { "epoch": 37.15, "learning_rate": 0.0005, "loss": 0.7796, "step": 473800 }, { "epoch": 37.16, "learning_rate": 0.0005, "loss": 0.7766, "step": 473900 }, { "epoch": 37.17, "learning_rate": 0.0005, "loss": 0.7712, "step": 474000 }, { "epoch": 37.18, "learning_rate": 0.0005, "loss": 0.7847, "step": 474100 }, { "epoch": 37.19, "learning_rate": 0.0005, "loss": 0.7692, "step": 474200 }, { "epoch": 37.19, "learning_rate": 0.0005, "loss": 0.7817, "step": 474300 }, { "epoch": 37.2, "learning_rate": 0.0005, "loss": 0.7833, "step": 474400 }, { "epoch": 37.21, "learning_rate": 0.0005, "loss": 0.7743, "step": 474500 }, { "epoch": 37.22, "learning_rate": 0.0005, "loss": 0.7668, "step": 474600 }, { "epoch": 37.23, "learning_rate": 0.0005, "loss": 0.7677, "step": 474700 }, { "epoch": 37.23, "learning_rate": 0.0005, "loss": 0.7812, "step": 474800 }, { "epoch": 37.24, "learning_rate": 0.0005, "loss": 0.7863, "step": 474900 }, { "epoch": 37.25, "learning_rate": 0.0005, "loss": 0.781, "step": 475000 }, { "epoch": 37.26, "learning_rate": 0.0005, "loss": 0.779, "step": 475100 }, { "epoch": 37.26, "learning_rate": 0.0005, "loss": 0.7852, "step": 475200 }, { "epoch": 37.27, "learning_rate": 0.0005, "loss": 0.7828, "step": 475300 }, { "epoch": 37.28, "learning_rate": 0.0005, "loss": 0.7738, "step": 475400 }, { "epoch": 37.29, "learning_rate": 0.0005, "loss": 0.7747, "step": 475500 }, { "epoch": 37.3, "learning_rate": 0.0005, "loss": 0.7884, "step": 475600 }, { "epoch": 37.3, "learning_rate": 0.0005, "loss": 0.7846, "step": 475700 }, { "epoch": 37.31, "learning_rate": 0.0005, "loss": 0.7871, "step": 475800 }, { "epoch": 37.32, "learning_rate": 0.0005, "loss": 0.78, "step": 475900 }, { "epoch": 37.33, "learning_rate": 0.0005, "loss": 0.7907, "step": 476000 }, { "epoch": 37.34, "learning_rate": 0.0005, "loss": 0.7768, "step": 476100 }, { "epoch": 37.34, "learning_rate": 0.0005, "loss": 0.7854, "step": 476200 }, { "epoch": 37.35, "learning_rate": 0.0005, "loss": 0.7853, "step": 476300 }, { "epoch": 37.36, "learning_rate": 0.0005, "loss": 0.7806, "step": 476400 }, { "epoch": 37.37, "learning_rate": 0.0005, "loss": 0.7993, "step": 476500 }, { "epoch": 37.37, "learning_rate": 0.0005, "loss": 0.7925, "step": 476600 }, { "epoch": 37.38, "learning_rate": 0.0005, "loss": 0.7759, "step": 476700 }, { "epoch": 37.39, "learning_rate": 0.0005, "loss": 0.7989, "step": 476800 }, { "epoch": 37.4, "learning_rate": 0.0005, "loss": 0.7949, "step": 476900 }, { "epoch": 37.41, "learning_rate": 0.0005, "loss": 0.7947, "step": 477000 }, { "epoch": 37.41, "learning_rate": 0.0005, "loss": 0.7873, "step": 477100 }, { "epoch": 37.42, "learning_rate": 0.0005, "loss": 0.8054, "step": 477200 }, { "epoch": 37.43, "learning_rate": 0.0005, "loss": 0.8011, "step": 477300 }, { "epoch": 37.44, "learning_rate": 0.0005, "loss": 0.7734, "step": 477400 }, { "epoch": 37.45, "learning_rate": 0.0005, "loss": 0.7926, "step": 477500 }, { "epoch": 37.45, "learning_rate": 0.0005, "loss": 0.8041, "step": 477600 }, { "epoch": 37.46, "learning_rate": 0.0005, "loss": 0.7928, "step": 477700 }, { "epoch": 37.47, "learning_rate": 0.0005, "loss": 0.7862, "step": 477800 }, { "epoch": 37.48, "learning_rate": 0.0005, "loss": 0.8035, "step": 477900 }, { "epoch": 37.48, "learning_rate": 0.0005, "loss": 0.791, "step": 478000 }, { "epoch": 37.49, "learning_rate": 0.0005, "loss": 0.7836, "step": 478100 }, { "epoch": 37.5, "learning_rate": 0.0005, "loss": 0.803, "step": 478200 }, { "epoch": 37.51, "learning_rate": 0.0005, "loss": 0.7891, "step": 478300 }, { "epoch": 37.52, "learning_rate": 0.0005, "loss": 0.7887, "step": 478400 }, { "epoch": 37.52, "learning_rate": 0.0005, "loss": 0.7961, "step": 478500 }, { "epoch": 37.53, "learning_rate": 0.0005, "loss": 0.8055, "step": 478600 }, { "epoch": 37.54, "learning_rate": 0.0005, "loss": 0.8102, "step": 478700 }, { "epoch": 37.55, "learning_rate": 0.0005, "loss": 0.8058, "step": 478800 }, { "epoch": 37.55, "learning_rate": 0.0005, "loss": 0.8053, "step": 478900 }, { "epoch": 37.56, "learning_rate": 0.0005, "loss": 0.7839, "step": 479000 }, { "epoch": 37.57, "learning_rate": 0.0005, "loss": 0.8064, "step": 479100 }, { "epoch": 37.58, "learning_rate": 0.0005, "loss": 0.7746, "step": 479200 }, { "epoch": 37.59, "learning_rate": 0.0005, "loss": 0.8084, "step": 479300 }, { "epoch": 37.59, "learning_rate": 0.0005, "loss": 0.8039, "step": 479400 }, { "epoch": 37.6, "learning_rate": 0.0005, "loss": 0.8147, "step": 479500 }, { "epoch": 37.61, "learning_rate": 0.0005, "loss": 0.7917, "step": 479600 }, { "epoch": 37.62, "learning_rate": 0.0005, "loss": 0.7955, "step": 479700 }, { "epoch": 37.63, "learning_rate": 0.0005, "loss": 0.8174, "step": 479800 }, { "epoch": 37.63, "learning_rate": 0.0005, "loss": 0.8039, "step": 479900 }, { "epoch": 37.64, "learning_rate": 0.0005, "loss": 0.819, "step": 480000 }, { "epoch": 37.64, "eval_gen_len": 18.777787587181074, "eval_loss": 2.638998508453369, "eval_rouge1": 35.7961, "eval_rouge2": 14.8984, "eval_rougeL": 29.577, "eval_rougeLsum": 29.574, "eval_runtime": 381.3863, "eval_samples_per_second": 29.7, "eval_steps_per_second": 1.856, "step": 480000 }, { "epoch": 37.65, "learning_rate": 0.0005, "loss": 0.8115, "step": 480100 }, { "epoch": 37.66, "learning_rate": 0.0005, "loss": 0.7937, "step": 480200 }, { "epoch": 37.66, "learning_rate": 0.0005, "loss": 0.8177, "step": 480300 }, { "epoch": 37.67, "learning_rate": 0.0005, "loss": 0.8204, "step": 480400 }, { "epoch": 37.68, "learning_rate": 0.0005, "loss": 0.8079, "step": 480500 }, { "epoch": 37.69, "learning_rate": 0.0005, "loss": 0.7922, "step": 480600 }, { "epoch": 37.7, "learning_rate": 0.0005, "loss": 0.8047, "step": 480700 }, { "epoch": 37.7, "learning_rate": 0.0005, "loss": 0.7963, "step": 480800 }, { "epoch": 37.71, "learning_rate": 0.0005, "loss": 0.7966, "step": 480900 }, { "epoch": 37.72, "learning_rate": 0.0005, "loss": 0.817, "step": 481000 }, { "epoch": 37.73, "learning_rate": 0.0005, "loss": 0.8118, "step": 481100 }, { "epoch": 37.74, "learning_rate": 0.0005, "loss": 0.7964, "step": 481200 }, { "epoch": 37.74, "learning_rate": 0.0005, "loss": 0.8205, "step": 481300 }, { "epoch": 37.75, "learning_rate": 0.0005, "loss": 0.8132, "step": 481400 }, { "epoch": 37.76, "learning_rate": 0.0005, "loss": 0.8093, "step": 481500 }, { "epoch": 37.77, "learning_rate": 0.0005, "loss": 0.8237, "step": 481600 }, { "epoch": 37.77, "learning_rate": 0.0005, "loss": 0.8174, "step": 481700 }, { "epoch": 37.78, "learning_rate": 0.0005, "loss": 0.7873, "step": 481800 }, { "epoch": 37.79, "learning_rate": 0.0005, "loss": 0.8104, "step": 481900 }, { "epoch": 37.8, "learning_rate": 0.0005, "loss": 0.8336, "step": 482000 }, { "epoch": 37.81, "learning_rate": 0.0005, "loss": 0.8169, "step": 482100 }, { "epoch": 37.81, "learning_rate": 0.0005, "loss": 0.8128, "step": 482200 }, { "epoch": 37.82, "learning_rate": 0.0005, "loss": 0.8081, "step": 482300 }, { "epoch": 37.83, "learning_rate": 0.0005, "loss": 0.7982, "step": 482400 }, { "epoch": 37.84, "learning_rate": 0.0005, "loss": 0.8139, "step": 482500 }, { "epoch": 37.85, "learning_rate": 0.0005, "loss": 0.82, "step": 482600 }, { "epoch": 37.85, "learning_rate": 0.0005, "loss": 0.8118, "step": 482700 }, { "epoch": 37.86, "learning_rate": 0.0005, "loss": 0.8155, "step": 482800 }, { "epoch": 37.87, "learning_rate": 0.0005, "loss": 0.8242, "step": 482900 }, { "epoch": 37.88, "learning_rate": 0.0005, "loss": 0.8231, "step": 483000 }, { "epoch": 37.88, "learning_rate": 0.0005, "loss": 0.8049, "step": 483100 }, { "epoch": 37.89, "learning_rate": 0.0005, "loss": 0.8155, "step": 483200 }, { "epoch": 37.9, "learning_rate": 0.0005, "loss": 0.8248, "step": 483300 }, { "epoch": 37.91, "learning_rate": 0.0005, "loss": 0.8062, "step": 483400 }, { "epoch": 37.92, "learning_rate": 0.0005, "loss": 0.8294, "step": 483500 }, { "epoch": 37.92, "learning_rate": 0.0005, "loss": 0.8376, "step": 483600 }, { "epoch": 37.93, "learning_rate": 0.0005, "loss": 0.8136, "step": 483700 }, { "epoch": 37.94, "learning_rate": 0.0005, "loss": 0.8185, "step": 483800 }, { "epoch": 37.95, "learning_rate": 0.0005, "loss": 0.8062, "step": 483900 }, { "epoch": 37.95, "learning_rate": 0.0005, "loss": 0.8199, "step": 484000 }, { "epoch": 37.96, "learning_rate": 0.0005, "loss": 0.8214, "step": 484100 }, { "epoch": 37.97, "learning_rate": 0.0005, "loss": 0.8285, "step": 484200 }, { "epoch": 37.98, "learning_rate": 0.0005, "loss": 0.8259, "step": 484300 }, { "epoch": 37.99, "learning_rate": 0.0005, "loss": 0.8283, "step": 484400 }, { "epoch": 37.99, "learning_rate": 0.0005, "loss": 0.8267, "step": 484500 }, { "epoch": 38.0, "learning_rate": 0.0005, "loss": 0.7963, "step": 484600 }, { "epoch": 38.01, "learning_rate": 0.0005, "loss": 0.7441, "step": 484700 }, { "epoch": 38.02, "learning_rate": 0.0005, "loss": 0.764, "step": 484800 }, { "epoch": 38.03, "learning_rate": 0.0005, "loss": 0.7441, "step": 484900 }, { "epoch": 38.03, "learning_rate": 0.0005, "loss": 0.7467, "step": 485000 }, { "epoch": 38.04, "learning_rate": 0.0005, "loss": 0.7483, "step": 485100 }, { "epoch": 38.05, "learning_rate": 0.0005, "loss": 0.7541, "step": 485200 }, { "epoch": 38.06, "learning_rate": 0.0005, "loss": 0.7637, "step": 485300 }, { "epoch": 38.06, "learning_rate": 0.0005, "loss": 0.7627, "step": 485400 }, { "epoch": 38.07, "learning_rate": 0.0005, "loss": 0.7428, "step": 485500 }, { "epoch": 38.08, "learning_rate": 0.0005, "loss": 0.7634, "step": 485600 }, { "epoch": 38.09, "learning_rate": 0.0005, "loss": 0.754, "step": 485700 }, { "epoch": 38.1, "learning_rate": 0.0005, "loss": 0.764, "step": 485800 }, { "epoch": 38.1, "learning_rate": 0.0005, "loss": 0.7703, "step": 485900 }, { "epoch": 38.11, "learning_rate": 0.0005, "loss": 0.7607, "step": 486000 }, { "epoch": 38.12, "learning_rate": 0.0005, "loss": 0.7593, "step": 486100 }, { "epoch": 38.13, "learning_rate": 0.0005, "loss": 0.7632, "step": 486200 }, { "epoch": 38.14, "learning_rate": 0.0005, "loss": 0.7546, "step": 486300 }, { "epoch": 38.14, "learning_rate": 0.0005, "loss": 0.781, "step": 486400 }, { "epoch": 38.15, "learning_rate": 0.0005, "loss": 0.7796, "step": 486500 }, { "epoch": 38.16, "learning_rate": 0.0005, "loss": 0.7885, "step": 486600 }, { "epoch": 38.17, "learning_rate": 0.0005, "loss": 0.7649, "step": 486700 }, { "epoch": 38.17, "learning_rate": 0.0005, "loss": 0.7591, "step": 486800 }, { "epoch": 38.18, "learning_rate": 0.0005, "loss": 0.7552, "step": 486900 }, { "epoch": 38.19, "learning_rate": 0.0005, "loss": 0.7642, "step": 487000 }, { "epoch": 38.2, "learning_rate": 0.0005, "loss": 0.7598, "step": 487100 }, { "epoch": 38.21, "learning_rate": 0.0005, "loss": 0.7733, "step": 487200 }, { "epoch": 38.21, "learning_rate": 0.0005, "loss": 0.7665, "step": 487300 }, { "epoch": 38.22, "learning_rate": 0.0005, "loss": 0.7725, "step": 487400 }, { "epoch": 38.23, "learning_rate": 0.0005, "loss": 0.765, "step": 487500 }, { "epoch": 38.24, "learning_rate": 0.0005, "loss": 0.7849, "step": 487600 }, { "epoch": 38.24, "learning_rate": 0.0005, "loss": 0.774, "step": 487700 }, { "epoch": 38.25, "learning_rate": 0.0005, "loss": 0.7727, "step": 487800 }, { "epoch": 38.26, "learning_rate": 0.0005, "loss": 0.7863, "step": 487900 }, { "epoch": 38.27, "learning_rate": 0.0005, "loss": 0.7672, "step": 488000 }, { "epoch": 38.28, "learning_rate": 0.0005, "loss": 0.7788, "step": 488100 }, { "epoch": 38.28, "learning_rate": 0.0005, "loss": 0.7728, "step": 488200 }, { "epoch": 38.29, "learning_rate": 0.0005, "loss": 0.7769, "step": 488300 }, { "epoch": 38.3, "learning_rate": 0.0005, "loss": 0.7859, "step": 488400 }, { "epoch": 38.31, "learning_rate": 0.0005, "loss": 0.7622, "step": 488500 }, { "epoch": 38.32, "learning_rate": 0.0005, "loss": 0.7606, "step": 488600 }, { "epoch": 38.32, "learning_rate": 0.0005, "loss": 0.7834, "step": 488700 }, { "epoch": 38.33, "learning_rate": 0.0005, "loss": 0.7724, "step": 488800 }, { "epoch": 38.34, "learning_rate": 0.0005, "loss": 0.7873, "step": 488900 }, { "epoch": 38.35, "learning_rate": 0.0005, "loss": 0.7833, "step": 489000 }, { "epoch": 38.35, "learning_rate": 0.0005, "loss": 0.7713, "step": 489100 }, { "epoch": 38.36, "learning_rate": 0.0005, "loss": 0.771, "step": 489200 }, { "epoch": 38.37, "learning_rate": 0.0005, "loss": 0.7899, "step": 489300 }, { "epoch": 38.38, "learning_rate": 0.0005, "loss": 0.7779, "step": 489400 }, { "epoch": 38.39, "learning_rate": 0.0005, "loss": 0.7835, "step": 489500 }, { "epoch": 38.39, "learning_rate": 0.0005, "loss": 0.7883, "step": 489600 }, { "epoch": 38.4, "learning_rate": 0.0005, "loss": 0.773, "step": 489700 }, { "epoch": 38.41, "learning_rate": 0.0005, "loss": 0.7867, "step": 489800 }, { "epoch": 38.42, "learning_rate": 0.0005, "loss": 0.7703, "step": 489900 }, { "epoch": 38.43, "learning_rate": 0.0005, "loss": 0.7805, "step": 490000 }, { "epoch": 38.43, "eval_gen_len": 18.730908448839056, "eval_loss": 2.6787099838256836, "eval_rouge1": 35.6705, "eval_rouge2": 14.71, "eval_rougeL": 29.4423, "eval_rougeLsum": 29.4417, "eval_runtime": 382.2597, "eval_samples_per_second": 29.632, "eval_steps_per_second": 1.852, "step": 490000 }, { "epoch": 38.43, "learning_rate": 0.0005, "loss": 0.7859, "step": 490100 }, { "epoch": 38.44, "learning_rate": 0.0005, "loss": 0.7903, "step": 490200 }, { "epoch": 38.45, "learning_rate": 0.0005, "loss": 0.7715, "step": 490300 }, { "epoch": 38.46, "learning_rate": 0.0005, "loss": 0.7713, "step": 490400 }, { "epoch": 38.46, "learning_rate": 0.0005, "loss": 0.7675, "step": 490500 }, { "epoch": 38.47, "learning_rate": 0.0005, "loss": 0.7855, "step": 490600 }, { "epoch": 38.48, "learning_rate": 0.0005, "loss": 0.7847, "step": 490700 }, { "epoch": 38.49, "learning_rate": 0.0005, "loss": 0.7773, "step": 490800 }, { "epoch": 38.5, "learning_rate": 0.0005, "loss": 0.7874, "step": 490900 }, { "epoch": 38.5, "learning_rate": 0.0005, "loss": 0.7904, "step": 491000 }, { "epoch": 38.51, "learning_rate": 0.0005, "loss": 0.7847, "step": 491100 }, { "epoch": 38.52, "learning_rate": 0.0005, "loss": 0.7626, "step": 491200 }, { "epoch": 38.53, "learning_rate": 0.0005, "loss": 0.8014, "step": 491300 }, { "epoch": 38.54, "learning_rate": 0.0005, "loss": 0.7699, "step": 491400 }, { "epoch": 38.54, "learning_rate": 0.0005, "loss": 0.7917, "step": 491500 }, { "epoch": 38.55, "learning_rate": 0.0005, "loss": 0.7804, "step": 491600 }, { "epoch": 38.56, "learning_rate": 0.0005, "loss": 0.7782, "step": 491700 }, { "epoch": 38.57, "learning_rate": 0.0005, "loss": 0.7905, "step": 491800 }, { "epoch": 38.57, "learning_rate": 0.0005, "loss": 0.8008, "step": 491900 }, { "epoch": 38.58, "learning_rate": 0.0005, "loss": 0.7952, "step": 492000 }, { "epoch": 38.59, "learning_rate": 0.0005, "loss": 0.7836, "step": 492100 }, { "epoch": 38.6, "learning_rate": 0.0005, "loss": 0.7916, "step": 492200 }, { "epoch": 38.61, "learning_rate": 0.0005, "loss": 0.8032, "step": 492300 }, { "epoch": 38.61, "learning_rate": 0.0005, "loss": 0.7988, "step": 492400 }, { "epoch": 38.62, "learning_rate": 0.0005, "loss": 0.8059, "step": 492500 }, { "epoch": 38.63, "learning_rate": 0.0005, "loss": 0.8002, "step": 492600 }, { "epoch": 38.64, "learning_rate": 0.0005, "loss": 0.8033, "step": 492700 }, { "epoch": 38.64, "learning_rate": 0.0005, "loss": 0.8018, "step": 492800 }, { "epoch": 38.65, "learning_rate": 0.0005, "loss": 0.7887, "step": 492900 }, { "epoch": 38.66, "learning_rate": 0.0005, "loss": 0.7899, "step": 493000 }, { "epoch": 38.67, "learning_rate": 0.0005, "loss": 0.787, "step": 493100 }, { "epoch": 38.68, "learning_rate": 0.0005, "loss": 0.7851, "step": 493200 }, { "epoch": 38.68, "learning_rate": 0.0005, "loss": 0.8015, "step": 493300 }, { "epoch": 38.69, "learning_rate": 0.0005, "loss": 0.8058, "step": 493400 }, { "epoch": 38.7, "learning_rate": 0.0005, "loss": 0.8148, "step": 493500 }, { "epoch": 38.71, "learning_rate": 0.0005, "loss": 0.7937, "step": 493600 }, { "epoch": 38.72, "learning_rate": 0.0005, "loss": 0.7883, "step": 493700 }, { "epoch": 38.72, "learning_rate": 0.0005, "loss": 0.7989, "step": 493800 }, { "epoch": 38.73, "learning_rate": 0.0005, "loss": 0.8148, "step": 493900 }, { "epoch": 38.74, "learning_rate": 0.0005, "loss": 0.793, "step": 494000 }, { "epoch": 38.75, "learning_rate": 0.0005, "loss": 0.7904, "step": 494100 }, { "epoch": 38.75, "learning_rate": 0.0005, "loss": 0.8014, "step": 494200 }, { "epoch": 38.76, "learning_rate": 0.0005, "loss": 0.8086, "step": 494300 }, { "epoch": 38.77, "learning_rate": 0.0005, "loss": 0.7984, "step": 494400 }, { "epoch": 38.78, "learning_rate": 0.0005, "loss": 0.7979, "step": 494500 }, { "epoch": 38.79, "learning_rate": 0.0005, "loss": 0.7977, "step": 494600 }, { "epoch": 38.79, "learning_rate": 0.0005, "loss": 0.7853, "step": 494700 }, { "epoch": 38.8, "learning_rate": 0.0005, "loss": 0.7845, "step": 494800 }, { "epoch": 38.81, "learning_rate": 0.0005, "loss": 0.8026, "step": 494900 }, { "epoch": 38.82, "learning_rate": 0.0005, "loss": 0.8048, "step": 495000 }, { "epoch": 38.83, "learning_rate": 0.0005, "loss": 0.8033, "step": 495100 }, { "epoch": 38.83, "learning_rate": 0.0005, "loss": 0.7907, "step": 495200 }, { "epoch": 38.84, "learning_rate": 0.0005, "loss": 0.809, "step": 495300 }, { "epoch": 38.85, "learning_rate": 0.0005, "loss": 0.7964, "step": 495400 }, { "epoch": 38.86, "learning_rate": 0.0005, "loss": 0.8103, "step": 495500 }, { "epoch": 38.86, "learning_rate": 0.0005, "loss": 0.8152, "step": 495600 }, { "epoch": 38.87, "learning_rate": 0.0005, "loss": 0.797, "step": 495700 }, { "epoch": 38.88, "learning_rate": 0.0005, "loss": 0.8186, "step": 495800 }, { "epoch": 38.89, "learning_rate": 0.0005, "loss": 0.7971, "step": 495900 }, { "epoch": 38.9, "learning_rate": 0.0005, "loss": 0.8047, "step": 496000 }, { "epoch": 38.9, "learning_rate": 0.0005, "loss": 0.8059, "step": 496100 }, { "epoch": 38.91, "learning_rate": 0.0005, "loss": 0.825, "step": 496200 }, { "epoch": 38.92, "learning_rate": 0.0005, "loss": 0.8032, "step": 496300 }, { "epoch": 38.93, "learning_rate": 0.0005, "loss": 0.8058, "step": 496400 }, { "epoch": 38.94, "learning_rate": 0.0005, "loss": 0.8043, "step": 496500 }, { "epoch": 38.94, "learning_rate": 0.0005, "loss": 0.8195, "step": 496600 }, { "epoch": 38.95, "learning_rate": 0.0005, "loss": 0.7943, "step": 496700 }, { "epoch": 38.96, "learning_rate": 0.0005, "loss": 0.8087, "step": 496800 }, { "epoch": 38.97, "learning_rate": 0.0005, "loss": 0.8194, "step": 496900 }, { "epoch": 38.97, "learning_rate": 0.0005, "loss": 0.8167, "step": 497000 }, { "epoch": 38.98, "learning_rate": 0.0005, "loss": 0.7995, "step": 497100 }, { "epoch": 38.99, "learning_rate": 0.0005, "loss": 0.8111, "step": 497200 }, { "epoch": 39.0, "learning_rate": 0.0005, "loss": 0.8107, "step": 497300 }, { "epoch": 39.01, "learning_rate": 0.0005, "loss": 0.7537, "step": 497400 }, { "epoch": 39.01, "learning_rate": 0.0005, "loss": 0.735, "step": 497500 }, { "epoch": 39.02, "learning_rate": 0.0005, "loss": 0.7464, "step": 497600 }, { "epoch": 39.03, "learning_rate": 0.0005, "loss": 0.7368, "step": 497700 }, { "epoch": 39.04, "learning_rate": 0.0005, "loss": 0.7398, "step": 497800 }, { "epoch": 39.04, "learning_rate": 0.0005, "loss": 0.7409, "step": 497900 }, { "epoch": 39.05, "learning_rate": 0.0005, "loss": 0.735, "step": 498000 }, { "epoch": 39.06, "learning_rate": 0.0005, "loss": 0.7382, "step": 498100 }, { "epoch": 39.07, "learning_rate": 0.0005, "loss": 0.7348, "step": 498200 }, { "epoch": 39.08, "learning_rate": 0.0005, "loss": 0.7364, "step": 498300 }, { "epoch": 39.08, "learning_rate": 0.0005, "loss": 0.7536, "step": 498400 }, { "epoch": 39.09, "learning_rate": 0.0005, "loss": 0.7668, "step": 498500 }, { "epoch": 39.1, "learning_rate": 0.0005, "loss": 0.7428, "step": 498600 }, { "epoch": 39.11, "learning_rate": 0.0005, "loss": 0.7446, "step": 498700 }, { "epoch": 39.12, "learning_rate": 0.0005, "loss": 0.7583, "step": 498800 }, { "epoch": 39.12, "learning_rate": 0.0005, "loss": 0.7429, "step": 498900 }, { "epoch": 39.13, "learning_rate": 0.0005, "loss": 0.7537, "step": 499000 }, { "epoch": 39.14, "learning_rate": 0.0005, "loss": 0.7436, "step": 499100 }, { "epoch": 39.15, "learning_rate": 0.0005, "loss": 0.7541, "step": 499200 }, { "epoch": 39.15, "learning_rate": 0.0005, "loss": 0.7643, "step": 499300 }, { "epoch": 39.16, "learning_rate": 0.0005, "loss": 0.745, "step": 499400 }, { "epoch": 39.17, "learning_rate": 0.0005, "loss": 0.7421, "step": 499500 }, { "epoch": 39.18, "learning_rate": 0.0005, "loss": 0.7588, "step": 499600 }, { "epoch": 39.19, "learning_rate": 0.0005, "loss": 0.7629, "step": 499700 }, { "epoch": 39.19, "learning_rate": 0.0005, "loss": 0.7602, "step": 499800 }, { "epoch": 39.2, "learning_rate": 0.0005, "loss": 0.7571, "step": 499900 }, { "epoch": 39.21, "learning_rate": 0.0005, "loss": 0.7541, "step": 500000 }, { "epoch": 39.21, "eval_gen_len": 18.742297166063388, "eval_loss": 2.7122867107391357, "eval_rouge1": 35.952, "eval_rouge2": 15.0205, "eval_rougeL": 29.7389, "eval_rougeLsum": 29.7331, "eval_runtime": 382.2157, "eval_samples_per_second": 29.635, "eval_steps_per_second": 1.852, "step": 500000 } ], "max_steps": 637600, "num_train_epochs": 50, "total_flos": 2.1183059506360812e+18, "trial_name": null, "trial_params": null }