{ "best_metric": 63.1011, "best_model_checkpoint": "output_train_bart_large_local/checkpoint-48000", "epoch": 2.0123153700647967, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 3.9842240824211213e-07, "loss": 1.8309, "step": 500 }, { "epoch": 0.08, "learning_rate": 8.008692852543465e-07, "loss": 1.0322, "step": 1000 }, { "epoch": 0.08, "eval_bertscore/f1": 0.7264, "eval_bertscore/precision": 0.7345, "eval_bertscore/recall": 0.7214, "eval_mean_prediction_length_characters": 779.505, "eval_mean_prediction_length_tokens": 167.969, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 45.7604, "eval_rouge/rouge1": 60.4328, "eval_rouge/rouge2": 39.8155, "eval_rouge/rougeL": 39.824, "eval_rouge/rougeLsum": 57.577, "eval_runtime": 2363.9821, "eval_samples_per_second": 0.423, "eval_steps_per_second": 0.423, "step": 1000 }, { "epoch": 0.12, "learning_rate": 1.203316162266581e-06, "loss": 0.8078, "step": 1500 }, { "epoch": 0.16, "learning_rate": 1.6057630392788153e-06, "loss": 0.7401, "step": 2000 }, { "epoch": 0.16, "eval_bertscore/f1": 0.767, "eval_bertscore/precision": 0.7735, "eval_bertscore/recall": 0.7627, "eval_mean_prediction_length_characters": 746.987, "eval_mean_prediction_length_tokens": 167.637, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 49.7632, "eval_rouge/rouge1": 64.5596, "eval_rouge/rouge2": 43.5883, "eval_rouge/rougeL": 43.7919, "eval_rouge/rougeLsum": 62.268, "eval_runtime": 2390.2778, "eval_samples_per_second": 0.418, "eval_steps_per_second": 0.418, "step": 2000 }, { "epoch": 0.2, "learning_rate": 2.00820991629105e-06, "loss": 0.6952, "step": 2500 }, { "epoch": 0.24, "learning_rate": 2.4106567933032843e-06, "loss": 0.6654, "step": 3000 }, { "epoch": 0.24, "eval_bertscore/f1": 0.775, "eval_bertscore/precision": 0.7801, "eval_bertscore/recall": 0.7719, "eval_mean_prediction_length_characters": 750.739, "eval_mean_prediction_length_tokens": 167.441, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 51.1255, "eval_rouge/rouge1": 65.8167, "eval_rouge/rouge2": 45.0215, "eval_rouge/rougeL": 45.0979, "eval_rouge/rougeLsum": 63.4694, "eval_runtime": 2400.8698, "eval_samples_per_second": 0.417, "eval_steps_per_second": 0.417, "step": 3000 }, { "epoch": 0.28, "learning_rate": 2.8131036703155183e-06, "loss": 0.6532, "step": 3500 }, { "epoch": 0.32, "learning_rate": 3.215550547327753e-06, "loss": 0.6421, "step": 4000 }, { "epoch": 0.32, "eval_bertscore/f1": 0.778, "eval_bertscore/precision": 0.7803, "eval_bertscore/recall": 0.7778, "eval_mean_prediction_length_characters": 787.437, "eval_mean_prediction_length_tokens": 175.145, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 51.6325, "eval_rouge/rouge1": 66.6546, "eval_rouge/rouge2": 45.5613, "eval_rouge/rougeL": 45.3256, "eval_rouge/rougeLsum": 64.2944, "eval_runtime": 2500.2502, "eval_samples_per_second": 0.4, "eval_steps_per_second": 0.4, "step": 4000 }, { "epoch": 0.36, "learning_rate": 3.6179974243399875e-06, "loss": 0.6253, "step": 4500 }, { "epoch": 0.4, "learning_rate": 4.0196394075981975e-06, "loss": 0.62, "step": 5000 }, { "epoch": 0.4, "eval_bertscore/f1": 0.7774, "eval_bertscore/precision": 0.7652, "eval_bertscore/recall": 0.7921, "eval_mean_prediction_length_characters": 948.673, "eval_mean_prediction_length_tokens": 206.93, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 51.4967, "eval_rouge/rouge1": 67.0983, "eval_rouge/rouge2": 45.6949, "eval_rouge/rougeL": 44.541, "eval_rouge/rougeLsum": 64.5765, "eval_runtime": 2980.0062, "eval_samples_per_second": 0.336, "eval_steps_per_second": 0.336, "step": 5000 }, { "epoch": 0.44, "learning_rate": 4.4220862846104314e-06, "loss": 0.6041, "step": 5500 }, { "epoch": 0.48, "learning_rate": 4.824533161622666e-06, "loss": 0.5949, "step": 6000 }, { "epoch": 0.48, "eval_bertscore/f1": 0.7803, "eval_bertscore/precision": 0.7732, "eval_bertscore/recall": 0.7895, "eval_mean_prediction_length_characters": 879.72, "eval_mean_prediction_length_tokens": 192.817, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 52.1495, "eval_rouge/rouge1": 67.3707, "eval_rouge/rouge2": 46.4072, "eval_rouge/rougeL": 45.3622, "eval_rouge/rougeLsum": 65.0424, "eval_runtime": 2666.0858, "eval_samples_per_second": 0.375, "eval_steps_per_second": 0.375, "step": 6000 }, { "epoch": 0.52, "learning_rate": 5.2269800386349e-06, "loss": 0.5762, "step": 6500 }, { "epoch": 0.56, "learning_rate": 5.629426915647135e-06, "loss": 0.5719, "step": 7000 }, { "epoch": 0.56, "eval_bertscore/f1": 0.7838, "eval_bertscore/precision": 0.7832, "eval_bertscore/recall": 0.7866, "eval_mean_prediction_length_characters": 832.624, "eval_mean_prediction_length_tokens": 183.52, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 52.9627, "eval_rouge/rouge1": 67.8554, "eval_rouge/rouge2": 47.0328, "eval_rouge/rougeL": 46.5505, "eval_rouge/rougeLsum": 65.5148, "eval_runtime": 2493.7707, "eval_samples_per_second": 0.401, "eval_steps_per_second": 0.401, "step": 7000 }, { "epoch": 0.6, "learning_rate": 6.031068898905345e-06, "loss": 0.5718, "step": 7500 }, { "epoch": 0.64, "learning_rate": 6.433515775917579e-06, "loss": 0.5541, "step": 8000 }, { "epoch": 0.64, "eval_bertscore/f1": 0.7856, "eval_bertscore/precision": 0.7835, "eval_bertscore/recall": 0.7897, "eval_mean_prediction_length_characters": 831.437, "eval_mean_prediction_length_tokens": 184.28, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 53.234, "eval_rouge/rouge1": 68.2648, "eval_rouge/rouge2": 47.5788, "eval_rouge/rougeL": 46.447, "eval_rouge/rougeLsum": 65.9056, "eval_runtime": 2476.0861, "eval_samples_per_second": 0.404, "eval_steps_per_second": 0.404, "step": 8000 }, { "epoch": 0.68, "learning_rate": 6.835962652929814e-06, "loss": 0.5621, "step": 8500 }, { "epoch": 0.72, "learning_rate": 7.238409529942049e-06, "loss": 0.5509, "step": 9000 }, { "epoch": 0.72, "eval_bertscore/f1": 0.7876, "eval_bertscore/precision": 0.787, "eval_bertscore/recall": 0.79, "eval_mean_prediction_length_characters": 825.034, "eval_mean_prediction_length_tokens": 181.78, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 53.6596, "eval_rouge/rouge1": 68.4619, "eval_rouge/rouge2": 47.9553, "eval_rouge/rougeL": 47.0605, "eval_rouge/rougeLsum": 66.229, "eval_runtime": 2413.4877, "eval_samples_per_second": 0.414, "eval_steps_per_second": 0.414, "step": 9000 }, { "epoch": 0.76, "learning_rate": 7.640856406954282e-06, "loss": 0.5319, "step": 9500 }, { "epoch": 0.8, "learning_rate": 8.042498390212493e-06, "loss": 0.5317, "step": 10000 }, { "epoch": 0.8, "eval_bertscore/f1": 0.7801, "eval_bertscore/precision": 0.7653, "eval_bertscore/recall": 0.7976, "eval_mean_prediction_length_characters": 983.333, "eval_mean_prediction_length_tokens": 212.132, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 52.2204, "eval_rouge/rouge1": 67.4114, "eval_rouge/rouge2": 46.8366, "eval_rouge/rougeL": 45.1025, "eval_rouge/rougeLsum": 64.9737, "eval_runtime": 2850.8965, "eval_samples_per_second": 0.351, "eval_steps_per_second": 0.351, "step": 10000 }, { "epoch": 0.85, "learning_rate": 8.444945267224727e-06, "loss": 0.5246, "step": 10500 }, { "epoch": 0.89, "learning_rate": 8.847392144236962e-06, "loss": 0.5306, "step": 11000 }, { "epoch": 0.89, "eval_bertscore/f1": 0.7812, "eval_bertscore/precision": 0.7727, "eval_bertscore/recall": 0.7923, "eval_mean_prediction_length_characters": 929.756, "eval_mean_prediction_length_tokens": 198.992, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 52.9021, "eval_rouge/rouge1": 67.7205, "eval_rouge/rouge2": 47.5736, "eval_rouge/rougeL": 45.955, "eval_rouge/rougeLsum": 65.2918, "eval_runtime": 2641.4953, "eval_samples_per_second": 0.379, "eval_steps_per_second": 0.379, "step": 11000 }, { "epoch": 0.93, "learning_rate": 9.249839021249196e-06, "loss": 0.5118, "step": 11500 }, { "epoch": 0.97, "learning_rate": 9.65228589826143e-06, "loss": 0.5125, "step": 12000 }, { "epoch": 0.97, "eval_bertscore/f1": 0.7924, "eval_bertscore/precision": 0.8025, "eval_bertscore/recall": 0.7847, "eval_mean_prediction_length_characters": 739.515, "eval_mean_prediction_length_tokens": 166.617, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 54.3827, "eval_rouge/rouge1": 68.5657, "eval_rouge/rouge2": 48.8496, "eval_rouge/rougeL": 48.0192, "eval_rouge/rougeLsum": 66.6464, "eval_runtime": 2173.7087, "eval_samples_per_second": 0.46, "eval_steps_per_second": 0.46, "step": 12000 }, { "epoch": 1.01, "learning_rate": 9.993918580525149e-06, "loss": 0.4953, "step": 12500 }, { "epoch": 1.05, "learning_rate": 9.949202260857123e-06, "loss": 0.4559, "step": 13000 }, { "epoch": 1.05, "eval_bertscore/f1": 0.7876, "eval_bertscore/precision": 0.782, "eval_bertscore/recall": 0.7953, "eval_mean_prediction_length_characters": 872.104, "eval_mean_prediction_length_tokens": 188.836, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 54.0286, "eval_rouge/rouge1": 68.9929, "eval_rouge/rouge2": 48.683, "eval_rouge/rougeL": 46.9557, "eval_rouge/rougeLsum": 66.5892, "eval_runtime": 2531.5352, "eval_samples_per_second": 0.395, "eval_steps_per_second": 0.395, "step": 13000 }, { "epoch": 1.09, "learning_rate": 9.904575373828433e-06, "loss": 0.4572, "step": 13500 }, { "epoch": 1.13, "learning_rate": 9.859859054160406e-06, "loss": 0.4455, "step": 14000 }, { "epoch": 1.13, "eval_bertscore/f1": 0.792, "eval_bertscore/precision": 0.7832, "eval_bertscore/recall": 0.8031, "eval_mean_prediction_length_characters": 921.871, "eval_mean_prediction_length_tokens": 200.026, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 54.5933, "eval_rouge/rouge1": 69.3876, "eval_rouge/rouge2": 49.1971, "eval_rouge/rougeL": 47.6645, "eval_rouge/rougeLsum": 67.1636, "eval_runtime": 2729.0713, "eval_samples_per_second": 0.366, "eval_steps_per_second": 0.366, "step": 14000 }, { "epoch": 1.17, "learning_rate": 9.81514273449238e-06, "loss": 0.446, "step": 14500 }, { "epoch": 1.21, "learning_rate": 9.770426414824355e-06, "loss": 0.4376, "step": 15000 }, { "epoch": 1.21, "eval_bertscore/f1": 0.7943, "eval_bertscore/precision": 0.7909, "eval_bertscore/recall": 0.7996, "eval_mean_prediction_length_characters": 856.751, "eval_mean_prediction_length_tokens": 186.271, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 55.0336, "eval_rouge/rouge1": 69.6177, "eval_rouge/rouge2": 49.6842, "eval_rouge/rougeL": 48.1889, "eval_rouge/rougeLsum": 67.4597, "eval_runtime": 2554.3498, "eval_samples_per_second": 0.391, "eval_steps_per_second": 0.391, "step": 15000 }, { "epoch": 1.25, "learning_rate": 9.725710095156328e-06, "loss": 0.4338, "step": 15500 }, { "epoch": 1.29, "learning_rate": 9.680993775488303e-06, "loss": 0.4333, "step": 16000 }, { "epoch": 1.29, "eval_bertscore/f1": 0.7941, "eval_bertscore/precision": 0.7901, "eval_bertscore/recall": 0.8003, "eval_mean_prediction_length_characters": 857.762, "eval_mean_prediction_length_tokens": 185.042, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 55.3969, "eval_rouge/rouge1": 69.5818, "eval_rouge/rouge2": 50.0439, "eval_rouge/rougeL": 48.8213, "eval_rouge/rougeLsum": 67.3532, "eval_runtime": 2568.5908, "eval_samples_per_second": 0.389, "eval_steps_per_second": 0.389, "step": 16000 }, { "epoch": 1.33, "learning_rate": 9.636366888459613e-06, "loss": 0.4224, "step": 16500 }, { "epoch": 1.37, "learning_rate": 9.591650568791587e-06, "loss": 0.4229, "step": 17000 }, { "epoch": 1.37, "eval_bertscore/f1": 0.7968, "eval_bertscore/precision": 0.7936, "eval_bertscore/recall": 0.802, "eval_mean_prediction_length_characters": 850.064, "eval_mean_prediction_length_tokens": 186.438, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 56.2479, "eval_rouge/rouge1": 70.2657, "eval_rouge/rouge2": 50.9461, "eval_rouge/rougeL": 49.7123, "eval_rouge/rougeLsum": 68.1758, "eval_runtime": 2596.5556, "eval_samples_per_second": 0.385, "eval_steps_per_second": 0.385, "step": 17000 }, { "epoch": 1.41, "learning_rate": 9.54693424912356e-06, "loss": 0.4215, "step": 17500 }, { "epoch": 1.45, "learning_rate": 9.502217929455535e-06, "loss": 0.4145, "step": 18000 }, { "epoch": 1.45, "eval_bertscore/f1": 0.7982, "eval_bertscore/precision": 0.7974, "eval_bertscore/recall": 0.801, "eval_mean_prediction_length_characters": 821.211, "eval_mean_prediction_length_tokens": 179.198, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 56.2997, "eval_rouge/rouge1": 70.2035, "eval_rouge/rouge2": 51.1438, "eval_rouge/rougeL": 49.7011, "eval_rouge/rougeLsum": 68.1054, "eval_runtime": 2500.5637, "eval_samples_per_second": 0.4, "eval_steps_per_second": 0.4, "step": 18000 }, { "epoch": 1.49, "learning_rate": 9.457501609787508e-06, "loss": 0.4108, "step": 18500 }, { "epoch": 1.53, "learning_rate": 9.41287472275882e-06, "loss": 0.413, "step": 19000 }, { "epoch": 1.53, "eval_bertscore/f1": 0.7981, "eval_bertscore/precision": 0.7992, "eval_bertscore/recall": 0.799, "eval_mean_prediction_length_characters": 806.834, "eval_mean_prediction_length_tokens": 176.24, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 56.5708, "eval_rouge/rouge1": 70.2439, "eval_rouge/rouge2": 51.4876, "eval_rouge/rougeL": 50.0572, "eval_rouge/rougeLsum": 68.1941, "eval_runtime": 2443.1073, "eval_samples_per_second": 0.409, "eval_steps_per_second": 0.409, "step": 19000 }, { "epoch": 1.57, "learning_rate": 9.368158403090792e-06, "loss": 0.4064, "step": 19500 }, { "epoch": 1.61, "learning_rate": 9.323442083422767e-06, "loss": 0.4004, "step": 20000 }, { "epoch": 1.61, "eval_bertscore/f1": 0.8022, "eval_bertscore/precision": 0.7971, "eval_bertscore/recall": 0.8093, "eval_mean_prediction_length_characters": 860.077, "eval_mean_prediction_length_tokens": 188.397, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 57.2862, "eval_rouge/rouge1": 71.0513, "eval_rouge/rouge2": 52.0715, "eval_rouge/rougeL": 50.8134, "eval_rouge/rougeLsum": 69.0639, "eval_runtime": 2596.2685, "eval_samples_per_second": 0.385, "eval_steps_per_second": 0.385, "step": 20000 }, { "epoch": 1.65, "learning_rate": 9.278725763754741e-06, "loss": 0.3953, "step": 20500 }, { "epoch": 1.69, "learning_rate": 9.234009444086714e-06, "loss": 0.3905, "step": 21000 }, { "epoch": 1.69, "eval_bertscore/f1": 0.8009, "eval_bertscore/precision": 0.792, "eval_bertscore/recall": 0.812, "eval_mean_prediction_length_characters": 922.278, "eval_mean_prediction_length_tokens": 199.619, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 56.8514, "eval_rouge/rouge1": 70.8436, "eval_rouge/rouge2": 51.8156, "eval_rouge/rougeL": 50.0567, "eval_rouge/rougeLsum": 68.8517, "eval_runtime": 2741.6372, "eval_samples_per_second": 0.365, "eval_steps_per_second": 0.365, "step": 21000 }, { "epoch": 1.73, "learning_rate": 9.189382557058024e-06, "loss": 0.3834, "step": 21500 }, { "epoch": 1.77, "learning_rate": 9.144666237389999e-06, "loss": 0.3772, "step": 22000 }, { "epoch": 1.77, "eval_bertscore/f1": 0.802, "eval_bertscore/precision": 0.8014, "eval_bertscore/recall": 0.8048, "eval_mean_prediction_length_characters": 821.195, "eval_mean_prediction_length_tokens": 179.279, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 57.3719, "eval_rouge/rouge1": 70.4754, "eval_rouge/rouge2": 52.2981, "eval_rouge/rougeL": 51.2358, "eval_rouge/rougeLsum": 68.5129, "eval_runtime": 2484.708, "eval_samples_per_second": 0.402, "eval_steps_per_second": 0.402, "step": 22000 }, { "epoch": 1.81, "learning_rate": 9.099949917721973e-06, "loss": 0.3796, "step": 22500 }, { "epoch": 1.85, "learning_rate": 9.055233598053946e-06, "loss": 0.3745, "step": 23000 }, { "epoch": 1.85, "eval_bertscore/f1": 0.8077, "eval_bertscore/precision": 0.8096, "eval_bertscore/recall": 0.8076, "eval_mean_prediction_length_characters": 797.37, "eval_mean_prediction_length_tokens": 176.263, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 58.2667, "eval_rouge/rouge1": 71.7377, "eval_rouge/rouge2": 53.2732, "eval_rouge/rougeL": 51.7614, "eval_rouge/rougeLsum": 69.9972, "eval_runtime": 2452.4348, "eval_samples_per_second": 0.408, "eval_steps_per_second": 0.408, "step": 23000 }, { "epoch": 1.89, "learning_rate": 9.01051727838592e-06, "loss": 0.3705, "step": 23500 }, { "epoch": 1.93, "learning_rate": 8.96589039135723e-06, "loss": 0.3709, "step": 24000 }, { "epoch": 1.93, "eval_bertscore/f1": 0.8108, "eval_bertscore/precision": 0.8141, "eval_bertscore/recall": 0.8094, "eval_mean_prediction_length_characters": 798.81, "eval_mean_prediction_length_tokens": 176.217, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 58.6206, "eval_rouge/rouge1": 72.0193, "eval_rouge/rouge2": 53.6884, "eval_rouge/rougeL": 52.098, "eval_rouge/rougeLsum": 70.3419, "eval_runtime": 2449.0615, "eval_samples_per_second": 0.408, "eval_steps_per_second": 0.408, "step": 24000 }, { "epoch": 1.97, "learning_rate": 8.92126350432854e-06, "loss": 0.3638, "step": 24500 }, { "epoch": 2.01, "learning_rate": 8.876547184660515e-06, "loss": 0.3461, "step": 25000 }, { "epoch": 2.01, "eval_bertscore/f1": 0.8092, "eval_bertscore/precision": 0.8022, "eval_bertscore/recall": 0.8182, "eval_mean_prediction_length_characters": 891.59, "eval_mean_prediction_length_tokens": 193.579, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 58.5159, "eval_rouge/rouge1": 72.4344, "eval_rouge/rouge2": 54.0016, "eval_rouge/rougeL": 51.2235, "eval_rouge/rougeLsum": 70.5176, "eval_runtime": 2696.2641, "eval_samples_per_second": 0.371, "eval_steps_per_second": 0.371, "step": 25000 }, { "epoch": 2.05, "learning_rate": 8.83183086499249e-06, "loss": 0.2926, "step": 25500 }, { "epoch": 2.09, "learning_rate": 8.787114545324463e-06, "loss": 0.2935, "step": 26000 }, { "epoch": 2.09, "eval_bertscore/f1": 0.8099, "eval_bertscore/precision": 0.806, "eval_bertscore/recall": 0.8158, "eval_mean_prediction_length_characters": 858.323, "eval_mean_prediction_length_tokens": 186.494, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 59.0206, "eval_rouge/rouge1": 72.3233, "eval_rouge/rouge2": 54.4823, "eval_rouge/rougeL": 52.1769, "eval_rouge/rougeLsum": 70.5191, "eval_runtime": 2616.4872, "eval_samples_per_second": 0.382, "eval_steps_per_second": 0.382, "step": 26000 }, { "epoch": 2.13, "learning_rate": 8.742398225656437e-06, "loss": 0.2945, "step": 26500 }, { "epoch": 2.17, "learning_rate": 8.69768190598841e-06, "loss": 0.2859, "step": 27000 }, { "epoch": 2.17, "eval_bertscore/f1": 0.8021, "eval_bertscore/precision": 0.7967, "eval_bertscore/recall": 0.8098, "eval_mean_prediction_length_characters": 862.623, "eval_mean_prediction_length_tokens": 187.108, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 58.0972, "eval_rouge/rouge1": 71.3169, "eval_rouge/rouge2": 53.5502, "eval_rouge/rougeL": 51.3466, "eval_rouge/rougeLsum": 69.328, "eval_runtime": 2591.3634, "eval_samples_per_second": 0.386, "eval_steps_per_second": 0.386, "step": 27000 }, { "epoch": 2.21, "learning_rate": 8.65305501895972e-06, "loss": 0.2913, "step": 27500 }, { "epoch": 2.25, "learning_rate": 8.608338699291695e-06, "loss": 0.2871, "step": 28000 }, { "epoch": 2.25, "eval_bertscore/f1": 0.8125, "eval_bertscore/precision": 0.81, "eval_bertscore/recall": 0.8167, "eval_mean_prediction_length_characters": 839.348, "eval_mean_prediction_length_tokens": 182.08, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 59.5943, "eval_rouge/rouge1": 72.7757, "eval_rouge/rouge2": 55.0591, "eval_rouge/rougeL": 52.82, "eval_rouge/rougeLsum": 70.9482, "eval_runtime": 2411.5587, "eval_samples_per_second": 0.415, "eval_steps_per_second": 0.415, "step": 28000 }, { "epoch": 2.29, "learning_rate": 8.563622379623669e-06, "loss": 0.2858, "step": 28500 }, { "epoch": 2.33, "learning_rate": 8.518906059955642e-06, "loss": 0.2849, "step": 29000 }, { "epoch": 2.33, "eval_bertscore/f1": 0.8151, "eval_bertscore/precision": 0.8177, "eval_bertscore/recall": 0.8143, "eval_mean_prediction_length_characters": 801.734, "eval_mean_prediction_length_tokens": 175.958, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 60.3823, "eval_rouge/rouge1": 73.1311, "eval_rouge/rouge2": 56.0185, "eval_rouge/rougeL": 53.7397, "eval_rouge/rougeLsum": 71.4246, "eval_runtime": 2330.5152, "eval_samples_per_second": 0.429, "eval_steps_per_second": 0.429, "step": 29000 }, { "epoch": 2.37, "learning_rate": 8.474189740287617e-06, "loss": 0.2787, "step": 29500 }, { "epoch": 2.41, "learning_rate": 8.429473420619591e-06, "loss": 0.2819, "step": 30000 }, { "epoch": 2.41, "eval_bertscore/f1": 0.8142, "eval_bertscore/precision": 0.8077, "eval_bertscore/recall": 0.8228, "eval_mean_prediction_length_characters": 894.063, "eval_mean_prediction_length_tokens": 192.74, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 60.1592, "eval_rouge/rouge1": 73.1791, "eval_rouge/rouge2": 55.8507, "eval_rouge/rougeL": 53.2711, "eval_rouge/rougeLsum": 71.4419, "eval_runtime": 2537.8153, "eval_samples_per_second": 0.394, "eval_steps_per_second": 0.394, "step": 30000 }, { "epoch": 2.45, "learning_rate": 8.384757100951564e-06, "loss": 0.2799, "step": 30500 }, { "epoch": 2.5, "learning_rate": 8.340040781283539e-06, "loss": 0.2776, "step": 31000 }, { "epoch": 2.5, "eval_bertscore/f1": 0.8054, "eval_bertscore/precision": 0.7925, "eval_bertscore/recall": 0.821, "eval_mean_prediction_length_characters": 967.121, "eval_mean_prediction_length_tokens": 206.944, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 58.7521, "eval_rouge/rouge1": 71.9074, "eval_rouge/rouge2": 54.5099, "eval_rouge/rougeL": 51.7393, "eval_rouge/rougeLsum": 69.9728, "eval_runtime": 2713.6577, "eval_samples_per_second": 0.369, "eval_steps_per_second": 0.369, "step": 31000 }, { "epoch": 2.54, "learning_rate": 8.295413894254849e-06, "loss": 0.2761, "step": 31500 }, { "epoch": 2.58, "learning_rate": 8.250697574586823e-06, "loss": 0.2742, "step": 32000 }, { "epoch": 2.58, "eval_bertscore/f1": 0.8187, "eval_bertscore/precision": 0.8125, "eval_bertscore/recall": 0.827, "eval_mean_prediction_length_characters": 880.74, "eval_mean_prediction_length_tokens": 191.221, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 60.9358, "eval_rouge/rouge1": 73.8216, "eval_rouge/rouge2": 56.8015, "eval_rouge/rougeL": 53.9603, "eval_rouge/rougeLsum": 72.1193, "eval_runtime": 2491.9164, "eval_samples_per_second": 0.401, "eval_steps_per_second": 0.401, "step": 32000 }, { "epoch": 2.62, "learning_rate": 8.205981254918796e-06, "loss": 0.2737, "step": 32500 }, { "epoch": 2.66, "learning_rate": 8.16126493525077e-06, "loss": 0.2663, "step": 33000 }, { "epoch": 2.66, "eval_bertscore/f1": 0.8215, "eval_bertscore/precision": 0.8174, "eval_bertscore/recall": 0.8272, "eval_mean_prediction_length_characters": 862.393, "eval_mean_prediction_length_tokens": 187.552, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.7551, "eval_rouge/rouge1": 74.5811, "eval_rouge/rouge2": 57.6565, "eval_rouge/rougeL": 54.7699, "eval_rouge/rougeLsum": 72.9841, "eval_runtime": 2430.2491, "eval_samples_per_second": 0.411, "eval_steps_per_second": 0.411, "step": 33000 }, { "epoch": 2.7, "learning_rate": 8.116548615582743e-06, "loss": 0.2636, "step": 33500 }, { "epoch": 2.74, "learning_rate": 8.071921728554053e-06, "loss": 0.2672, "step": 34000 }, { "epoch": 2.74, "eval_bertscore/f1": 0.8194, "eval_bertscore/precision": 0.8133, "eval_bertscore/recall": 0.8274, "eval_mean_prediction_length_characters": 893.688, "eval_mean_prediction_length_tokens": 193.593, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.3651, "eval_rouge/rouge1": 74.2269, "eval_rouge/rouge2": 57.4497, "eval_rouge/rougeL": 54.1897, "eval_rouge/rougeLsum": 72.5834, "eval_runtime": 2499.9317, "eval_samples_per_second": 0.4, "eval_steps_per_second": 0.4, "step": 34000 }, { "epoch": 1.39, "learning_rate": 9.569096202090125e-06, "loss": 0.2366, "step": 34500 }, { "epoch": 1.41, "learning_rate": 9.54673714242274e-06, "loss": 0.2405, "step": 35000 }, { "epoch": 1.41, "eval_bertscore/f1": 0.8145, "eval_bertscore/precision": 0.8054, "eval_bertscore/recall": 0.8256, "eval_mean_prediction_length_characters": 927.422, "eval_mean_prediction_length_tokens": 200.478, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 60.0655, "eval_rouge/rouge1": 73.2741, "eval_rouge/rouge2": 55.8854, "eval_rouge/rougeL": 52.9208, "eval_rouge/rougeLsum": 71.6192, "eval_runtime": 2620.9045, "eval_samples_per_second": 0.382, "eval_steps_per_second": 0.382, "step": 35000 }, { "epoch": 1.43, "learning_rate": 9.524378082755352e-06, "loss": 0.2489, "step": 35500 }, { "epoch": 1.45, "learning_rate": 9.502019023087967e-06, "loss": 0.2428, "step": 36000 }, { "epoch": 1.45, "eval_bertscore/f1": 0.8171, "eval_bertscore/precision": 0.8136, "eval_bertscore/recall": 0.8226, "eval_mean_prediction_length_characters": 847.398, "eval_mean_prediction_length_tokens": 183.753, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 60.8081, "eval_rouge/rouge1": 73.4983, "eval_rouge/rouge2": 56.5892, "eval_rouge/rougeL": 54.0596, "eval_rouge/rougeLsum": 71.9013, "eval_runtime": 2373.5033, "eval_samples_per_second": 0.421, "eval_steps_per_second": 0.421, "step": 36000 }, { "epoch": 1.47, "learning_rate": 9.47965996342058e-06, "loss": 0.2511, "step": 36500 }, { "epoch": 1.49, "learning_rate": 9.457300903753192e-06, "loss": 0.2416, "step": 37000 }, { "epoch": 1.49, "eval_bertscore/f1": 0.8155, "eval_bertscore/precision": 0.8154, "eval_bertscore/recall": 0.8179, "eval_mean_prediction_length_characters": 850.697, "eval_mean_prediction_length_tokens": 185.038, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 60.5988, "eval_rouge/rouge1": 72.9601, "eval_rouge/rouge2": 56.3526, "eval_rouge/rougeL": 54.1244, "eval_rouge/rougeLsum": 71.3786, "eval_runtime": 2440.9193, "eval_samples_per_second": 0.41, "eval_steps_per_second": 0.41, "step": 37000 }, { "epoch": 1.51, "learning_rate": 9.43498656220514e-06, "loss": 0.2404, "step": 37500 }, { "epoch": 1.53, "learning_rate": 9.412627502537755e-06, "loss": 0.2597, "step": 38000 }, { "epoch": 1.53, "eval_bertscore/f1": 0.8146, "eval_bertscore/precision": 0.8118, "eval_bertscore/recall": 0.8194, "eval_mean_prediction_length_characters": 856.43, "eval_mean_prediction_length_tokens": 186.628, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 60.5122, "eval_rouge/rouge1": 73.1873, "eval_rouge/rouge2": 56.3588, "eval_rouge/rougeL": 53.7195, "eval_rouge/rougeLsum": 71.5458, "eval_runtime": 2434.8964, "eval_samples_per_second": 0.411, "eval_steps_per_second": 0.411, "step": 38000 }, { "epoch": 1.55, "learning_rate": 9.390268442870366e-06, "loss": 0.2436, "step": 38500 }, { "epoch": 1.57, "learning_rate": 9.367954101322315e-06, "loss": 0.2526, "step": 39000 }, { "epoch": 1.57, "eval_bertscore/f1": 0.8206, "eval_bertscore/precision": 0.8186, "eval_bertscore/recall": 0.8245, "eval_mean_prediction_length_characters": 843.759, "eval_mean_prediction_length_tokens": 183.492, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.4692, "eval_rouge/rouge1": 74.1108, "eval_rouge/rouge2": 57.3353, "eval_rouge/rougeL": 54.6598, "eval_rouge/rougeLsum": 72.5858, "eval_runtime": 2441.5715, "eval_samples_per_second": 0.41, "eval_steps_per_second": 0.41, "step": 39000 }, { "epoch": 1.59, "learning_rate": 9.34559504165493e-06, "loss": 0.248, "step": 39500 }, { "epoch": 1.61, "learning_rate": 9.323235981987542e-06, "loss": 0.2431, "step": 40000 }, { "epoch": 1.61, "eval_bertscore/f1": 0.8214, "eval_bertscore/precision": 0.8186, "eval_bertscore/recall": 0.826, "eval_mean_prediction_length_characters": 854.807, "eval_mean_prediction_length_tokens": 186.06, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.6121, "eval_rouge/rouge1": 74.3063, "eval_rouge/rouge2": 57.6124, "eval_rouge/rougeL": 54.6331, "eval_rouge/rougeLsum": 72.7609, "eval_runtime": 2483.1491, "eval_samples_per_second": 0.403, "eval_steps_per_second": 0.403, "step": 40000 }, { "epoch": 1.63, "learning_rate": 9.300876922320155e-06, "loss": 0.2413, "step": 40500 }, { "epoch": 1.65, "learning_rate": 9.27851786265277e-06, "loss": 0.2556, "step": 41000 }, { "epoch": 1.65, "eval_bertscore/f1": 0.8212, "eval_bertscore/precision": 0.8166, "eval_bertscore/recall": 0.8277, "eval_mean_prediction_length_characters": 871.94, "eval_mean_prediction_length_tokens": 189.412, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.6287, "eval_rouge/rouge1": 74.4291, "eval_rouge/rouge2": 57.5628, "eval_rouge/rougeL": 54.6343, "eval_rouge/rougeLsum": 72.7972, "eval_runtime": 2504.8302, "eval_samples_per_second": 0.399, "eval_steps_per_second": 0.399, "step": 41000 }, { "epoch": 1.67, "learning_rate": 9.256158802985382e-06, "loss": 0.2414, "step": 41500 }, { "epoch": 1.69, "learning_rate": 9.233799743317997e-06, "loss": 0.2493, "step": 42000 }, { "epoch": 1.69, "eval_bertscore/f1": 0.8202, "eval_bertscore/precision": 0.8115, "eval_bertscore/recall": 0.831, "eval_mean_prediction_length_characters": 919.152, "eval_mean_prediction_length_tokens": 199.285, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.6474, "eval_rouge/rouge1": 74.2897, "eval_rouge/rouge2": 57.7571, "eval_rouge/rougeL": 54.6022, "eval_rouge/rougeLsum": 72.7195, "eval_runtime": 2638.78, "eval_samples_per_second": 0.379, "eval_steps_per_second": 0.379, "step": 42000 }, { "epoch": 1.71, "learning_rate": 9.21144068365061e-06, "loss": 0.2467, "step": 42500 }, { "epoch": 1.73, "learning_rate": 9.189081623983222e-06, "loss": 0.2421, "step": 43000 }, { "epoch": 1.73, "eval_bertscore/f1": 0.823, "eval_bertscore/precision": 0.8258, "eval_bertscore/recall": 0.8222, "eval_mean_prediction_length_characters": 803.212, "eval_mean_prediction_length_tokens": 176.172, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.9161, "eval_rouge/rouge1": 74.3746, "eval_rouge/rouge2": 57.9399, "eval_rouge/rougeL": 55.0818, "eval_rouge/rougeLsum": 72.8967, "eval_runtime": 2327.757, "eval_samples_per_second": 0.43, "eval_steps_per_second": 0.43, "step": 43000 }, { "epoch": 1.75, "learning_rate": 9.166722564315837e-06, "loss": 0.2478, "step": 43500 }, { "epoch": 1.77, "learning_rate": 9.14436350464845e-06, "loss": 0.229, "step": 44000 }, { "epoch": 1.77, "eval_bertscore/f1": 0.8212, "eval_bertscore/precision": 0.822, "eval_bertscore/recall": 0.8227, "eval_mean_prediction_length_characters": 819.677, "eval_mean_prediction_length_tokens": 178.389, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 61.8804, "eval_rouge/rouge1": 74.0116, "eval_rouge/rouge2": 57.9086, "eval_rouge/rougeL": 55.2862, "eval_rouge/rougeLsum": 72.466, "eval_runtime": 2366.2663, "eval_samples_per_second": 0.423, "eval_steps_per_second": 0.423, "step": 44000 }, { "epoch": 1.79, "learning_rate": 9.122049163100398e-06, "loss": 0.2398, "step": 44500 }, { "epoch": 1.81, "learning_rate": 9.09969010343301e-06, "loss": 0.2429, "step": 45000 }, { "epoch": 1.81, "eval_bertscore/f1": 0.8243, "eval_bertscore/precision": 0.8174, "eval_bertscore/recall": 0.8333, "eval_mean_prediction_length_characters": 896.966, "eval_mean_prediction_length_tokens": 195.349, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 62.2879, "eval_rouge/rouge1": 74.8367, "eval_rouge/rouge2": 58.4326, "eval_rouge/rougeL": 55.2638, "eval_rouge/rougeLsum": 73.3944, "eval_runtime": 2574.2161, "eval_samples_per_second": 0.388, "eval_steps_per_second": 0.388, "step": 45000 }, { "epoch": 1.83, "learning_rate": 9.077331043765623e-06, "loss": 0.2355, "step": 45500 }, { "epoch": 1.85, "learning_rate": 9.054971984098238e-06, "loss": 0.2375, "step": 46000 }, { "epoch": 1.85, "eval_bertscore/f1": 0.824, "eval_bertscore/precision": 0.82, "eval_bertscore/recall": 0.8303, "eval_mean_prediction_length_characters": 873.321, "eval_mean_prediction_length_tokens": 190.408, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 62.1689, "eval_rouge/rouge1": 74.7062, "eval_rouge/rouge2": 58.2978, "eval_rouge/rougeL": 55.1711, "eval_rouge/rougeLsum": 73.2347, "eval_runtime": 2618.1437, "eval_samples_per_second": 0.382, "eval_steps_per_second": 0.382, "step": 46000 }, { "epoch": 1.87, "learning_rate": 9.032657642550186e-06, "loss": 0.2425, "step": 46500 }, { "epoch": 1.89, "learning_rate": 9.010298582882799e-06, "loss": 0.228, "step": 47000 }, { "epoch": 1.89, "eval_bertscore/f1": 0.8258, "eval_bertscore/precision": 0.8189, "eval_bertscore/recall": 0.8348, "eval_mean_prediction_length_characters": 904.242, "eval_mean_prediction_length_tokens": 196.813, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 62.6657, "eval_rouge/rouge1": 75.0484, "eval_rouge/rouge2": 59.0138, "eval_rouge/rougeL": 55.5642, "eval_rouge/rougeLsum": 73.6606, "eval_runtime": 2709.9214, "eval_samples_per_second": 0.369, "eval_steps_per_second": 0.369, "step": 47000 }, { "epoch": 1.91, "learning_rate": 8.987939523215412e-06, "loss": 0.2388, "step": 47500 }, { "epoch": 1.93, "learning_rate": 8.965580463548026e-06, "loss": 0.2398, "step": 48000 }, { "epoch": 1.93, "eval_bertscore/f1": 0.8287, "eval_bertscore/precision": 0.8255, "eval_bertscore/recall": 0.8338, "eval_mean_prediction_length_characters": 861.415, "eval_mean_prediction_length_tokens": 187.846, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 63.1011, "eval_rouge/rouge1": 75.5807, "eval_rouge/rouge2": 59.457, "eval_rouge/rougeL": 55.9108, "eval_rouge/rougeLsum": 74.1699, "eval_runtime": 2601.0783, "eval_samples_per_second": 0.384, "eval_steps_per_second": 0.384, "step": 48000 }, { "epoch": 1.95, "learning_rate": 8.943221403880639e-06, "loss": 0.2399, "step": 48500 }, { "epoch": 1.97, "learning_rate": 8.920862344213252e-06, "loss": 0.2373, "step": 49000 }, { "epoch": 1.97, "eval_bertscore/f1": 0.8253, "eval_bertscore/precision": 0.8244, "eval_bertscore/recall": 0.828, "eval_mean_prediction_length_characters": 840.263, "eval_mean_prediction_length_tokens": 182.473, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 62.7461, "eval_rouge/rouge1": 74.8987, "eval_rouge/rouge2": 59.0142, "eval_rouge/rougeL": 55.8894, "eval_rouge/rougeLsum": 73.4189, "eval_runtime": 2549.6944, "eval_samples_per_second": 0.392, "eval_steps_per_second": 0.392, "step": 49000 }, { "epoch": 1.99, "learning_rate": 8.8985480026652e-06, "loss": 0.2397, "step": 49500 }, { "epoch": 2.01, "learning_rate": 8.876233661117149e-06, "loss": 0.2057, "step": 50000 }, { "epoch": 2.01, "eval_bertscore/f1": 0.8258, "eval_bertscore/precision": 0.8222, "eval_bertscore/recall": 0.8313, "eval_mean_prediction_length_characters": 857.49, "eval_mean_prediction_length_tokens": 186.547, "eval_num_predicted": 1000, "eval_rouge/geometric_mean": 62.8166, "eval_rouge/rouge1": 75.1042, "eval_rouge/rouge2": 59.0815, "eval_rouge/rougeL": 55.8608, "eval_rouge/rougeLsum": 73.5713, "eval_runtime": 2602.9106, "eval_samples_per_second": 0.384, "eval_steps_per_second": 0.384, "step": 50000 } ], "max_steps": 248470, "num_train_epochs": 10, "total_flos": 6.239894115827712e+16, "trial_name": null, "trial_params": null }