{ "best_metric": null, "best_model_checkpoint": null, "epoch": 17.770034843205575, "global_step": 5100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.89, "learning_rate": 2.4902343750000002e-05, "loss": 3.791, "step": 255 }, { "epoch": 0.89, "eval_average_rogue": 0.2125, "eval_loss": 3.328416347503662, "eval_rouge1_fmeasure": 0.3288, "eval_rouge1_precision": 0.3115, "eval_rouge1_recall": 0.3637, "eval_rouge2_fmeasure": 0.06, "eval_rouge2_precision": 0.0569, "eval_rouge2_recall": 0.0664, "eval_rougeL_fmeasure": 0.145, "eval_rougeL_precision": 0.1366, "eval_rougeL_recall": 0.1631, "eval_rougeLsum_fmeasure": 0.3161, "eval_rougeLsum_precision": 0.2997, "eval_rougeLsum_recall": 0.3496, "eval_runtime": 3350.2964, "eval_samples_per_second": 0.038, "eval_steps_per_second": 0.038, "step": 255 }, { "epoch": 1.78, "learning_rate": 4.9804687500000004e-05, "loss": 3.3793, "step": 510 }, { "epoch": 1.78, "eval_average_rogue": 0.2109, "eval_loss": 3.268017292022705, "eval_rouge1_fmeasure": 0.3285, "eval_rouge1_precision": 0.3068, "eval_rouge1_recall": 0.3699, "eval_rouge2_fmeasure": 0.0571, "eval_rouge2_precision": 0.053, "eval_rouge2_recall": 0.065, "eval_rougeL_fmeasure": 0.1417, "eval_rougeL_precision": 0.1315, "eval_rougeL_recall": 0.1631, "eval_rougeLsum_fmeasure": 0.3163, "eval_rougeLsum_precision": 0.2954, "eval_rougeLsum_recall": 0.356, "eval_runtime": 3338.0119, "eval_samples_per_second": 0.038, "eval_steps_per_second": 0.038, "step": 510 }, { "epoch": 2.67, "learning_rate": 4.758033664881408e-05, "loss": 3.1826, "step": 765 }, { "epoch": 2.67, "eval_average_rogue": 0.214, "eval_loss": 3.245297431945801, "eval_rouge1_fmeasure": 0.3293, "eval_rouge1_precision": 0.3099, "eval_rouge1_recall": 0.3668, "eval_rouge2_fmeasure": 0.0644, "eval_rouge2_precision": 0.0605, "eval_rouge2_recall": 0.0717, "eval_rougeL_fmeasure": 0.145, "eval_rougeL_precision": 0.1356, "eval_rougeL_recall": 0.1647, "eval_rougeLsum_fmeasure": 0.3173, "eval_rougeLsum_precision": 0.2986, "eval_rougeLsum_recall": 0.3531, "eval_runtime": 3330.8537, "eval_samples_per_second": 0.038, "eval_steps_per_second": 0.038, "step": 765 }, { "epoch": 3.55, "learning_rate": 4.5141545524101e-05, "loss": 3.0133, "step": 1020 }, { "epoch": 3.55, "eval_average_rogue": 0.1948, "eval_loss": 3.2580819129943848, "eval_rouge1_fmeasure": 0.2983, "eval_rouge1_precision": 0.2893, "eval_rouge1_recall": 0.3239, "eval_rouge2_fmeasure": 0.0545, "eval_rouge2_precision": 0.0527, "eval_rouge2_recall": 0.0591, "eval_rougeL_fmeasure": 0.1397, "eval_rougeL_precision": 0.1356, "eval_rougeL_recall": 0.1534, "eval_rougeLsum_fmeasure": 0.2865, "eval_rougeLsum_precision": 0.2776, "eval_rougeLsum_recall": 0.3105, "eval_runtime": 3289.5263, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 1020 }, { "epoch": 4.44, "learning_rate": 4.2702754399387915e-05, "loss": 2.8569, "step": 1275 }, { "epoch": 4.44, "eval_average_rogue": 0.2256, "eval_loss": 3.2716007232666016, "eval_rouge1_fmeasure": 0.3491, "eval_rouge1_precision": 0.3326, "eval_rouge1_recall": 0.3835, "eval_rouge2_fmeasure": 0.0671, "eval_rouge2_precision": 0.0641, "eval_rouge2_recall": 0.0732, "eval_rougeL_fmeasure": 0.1516, "eval_rougeL_precision": 0.1436, "eval_rougeL_recall": 0.1701, "eval_rougeLsum_fmeasure": 0.3346, "eval_rougeLsum_precision": 0.3189, "eval_rougeLsum_recall": 0.368, "eval_runtime": 3297.3203, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 1275 }, { "epoch": 5.33, "learning_rate": 4.026396327467483e-05, "loss": 2.7008, "step": 1530 }, { "epoch": 5.33, "eval_average_rogue": 0.2285, "eval_loss": 3.313244104385376, "eval_rouge1_fmeasure": 0.3531, "eval_rouge1_precision": 0.3326, "eval_rouge1_recall": 0.3925, "eval_rouge2_fmeasure": 0.0693, "eval_rouge2_precision": 0.0653, "eval_rouge2_recall": 0.0772, "eval_rougeL_fmeasure": 0.1521, "eval_rougeL_precision": 0.1427, "eval_rougeL_recall": 0.1725, "eval_rougeLsum_fmeasure": 0.3396, "eval_rougeLsum_precision": 0.3202, "eval_rougeLsum_recall": 0.3776, "eval_runtime": 3297.2909, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 1530 }, { "epoch": 6.22, "learning_rate": 3.7825172149961744e-05, "loss": 2.5657, "step": 1785 }, { "epoch": 6.22, "eval_average_rogue": 0.2241, "eval_loss": 3.349586009979248, "eval_rouge1_fmeasure": 0.3473, "eval_rouge1_precision": 0.3291, "eval_rouge1_recall": 0.385, "eval_rouge2_fmeasure": 0.0658, "eval_rouge2_precision": 0.0622, "eval_rouge2_recall": 0.0731, "eval_rougeL_fmeasure": 0.1504, "eval_rougeL_precision": 0.1416, "eval_rougeL_recall": 0.17, "eval_rougeLsum_fmeasure": 0.3326, "eval_rougeLsum_precision": 0.3154, "eval_rougeLsum_recall": 0.3683, "eval_runtime": 3292.4741, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 1785 }, { "epoch": 7.11, "learning_rate": 3.538638102524866e-05, "loss": 2.4327, "step": 2040 }, { "epoch": 7.11, "eval_average_rogue": 0.2298, "eval_loss": 3.437542200088501, "eval_rouge1_fmeasure": 0.3551, "eval_rouge1_precision": 0.3347, "eval_rouge1_recall": 0.3946, "eval_rouge2_fmeasure": 0.0703, "eval_rouge2_precision": 0.0664, "eval_rouge2_recall": 0.0778, "eval_rougeL_fmeasure": 0.1527, "eval_rougeL_precision": 0.1433, "eval_rougeL_recall": 0.1732, "eval_rougeLsum_fmeasure": 0.341, "eval_rougeLsum_precision": 0.3218, "eval_rougeLsum_recall": 0.3788, "eval_runtime": 3284.3798, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 2040 }, { "epoch": 8.0, "learning_rate": 3.294758990053558e-05, "loss": 2.3352, "step": 2295 }, { "epoch": 8.0, "eval_average_rogue": 0.2284, "eval_loss": 3.4355413913726807, "eval_rouge1_fmeasure": 0.3551, "eval_rouge1_precision": 0.3334, "eval_rouge1_recall": 0.397, "eval_rouge2_fmeasure": 0.0682, "eval_rouge2_precision": 0.064, "eval_rouge2_recall": 0.0762, "eval_rougeL_fmeasure": 0.1507, "eval_rougeL_precision": 0.1406, "eval_rougeL_recall": 0.1721, "eval_rougeLsum_fmeasure": 0.3396, "eval_rougeLsum_precision": 0.3189, "eval_rougeLsum_recall": 0.3797, "eval_runtime": 3319.5759, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 2295 }, { "epoch": 8.89, "learning_rate": 3.0508798775822494e-05, "loss": 2.2068, "step": 2550 }, { "epoch": 8.89, "eval_average_rogue": 0.2319, "eval_loss": 3.517580032348633, "eval_rouge1_fmeasure": 0.3593, "eval_rouge1_precision": 0.3378, "eval_rouge1_recall": 0.4005, "eval_rouge2_fmeasure": 0.0711, "eval_rouge2_precision": 0.0669, "eval_rouge2_recall": 0.0793, "eval_rougeL_fmeasure": 0.153, "eval_rougeL_precision": 0.1431, "eval_rougeL_recall": 0.1738, "eval_rougeLsum_fmeasure": 0.3441, "eval_rougeLsum_precision": 0.3239, "eval_rougeLsum_recall": 0.383, "eval_runtime": 3268.2597, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 2550 }, { "epoch": 9.77, "learning_rate": 2.8070007651109415e-05, "loss": 2.1177, "step": 2805 }, { "epoch": 9.77, "eval_average_rogue": 0.2322, "eval_loss": 3.5942444801330566, "eval_rouge1_fmeasure": 0.3615, "eval_rouge1_precision": 0.3407, "eval_rouge1_recall": 0.402, "eval_rouge2_fmeasure": 0.0691, "eval_rouge2_precision": 0.0652, "eval_rouge2_recall": 0.0771, "eval_rougeL_fmeasure": 0.1516, "eval_rougeL_precision": 0.1422, "eval_rougeL_recall": 0.1722, "eval_rougeLsum_fmeasure": 0.3465, "eval_rougeLsum_precision": 0.3267, "eval_rougeLsum_recall": 0.3853, "eval_runtime": 3329.9183, "eval_samples_per_second": 0.038, "eval_steps_per_second": 0.038, "step": 2805 }, { "epoch": 10.66, "learning_rate": 2.563121652639633e-05, "loss": 2.0452, "step": 3060 }, { "epoch": 10.66, "eval_average_rogue": 0.2329, "eval_loss": 3.618927001953125, "eval_rouge1_fmeasure": 0.3627, "eval_rouge1_precision": 0.3451, "eval_rouge1_recall": 0.3992, "eval_rouge2_fmeasure": 0.0688, "eval_rouge2_precision": 0.0655, "eval_rouge2_recall": 0.0756, "eval_rougeL_fmeasure": 0.152, "eval_rougeL_precision": 0.1437, "eval_rougeL_recall": 0.1712, "eval_rougeLsum_fmeasure": 0.3482, "eval_rougeLsum_precision": 0.3317, "eval_rougeLsum_recall": 0.383, "eval_runtime": 3288.5395, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 3060 }, { "epoch": 11.55, "learning_rate": 2.3192425401683247e-05, "loss": 1.9276, "step": 3315 }, { "epoch": 11.55, "eval_average_rogue": 0.235, "eval_loss": 3.685786247253418, "eval_rouge1_fmeasure": 0.3662, "eval_rouge1_precision": 0.3468, "eval_rouge1_recall": 0.4051, "eval_rouge2_fmeasure": 0.0706, "eval_rouge2_precision": 0.0668, "eval_rouge2_recall": 0.0788, "eval_rougeL_fmeasure": 0.1525, "eval_rougeL_precision": 0.1437, "eval_rougeL_recall": 0.1723, "eval_rougeLsum_fmeasure": 0.3508, "eval_rougeLsum_precision": 0.3328, "eval_rougeLsum_recall": 0.3883, "eval_runtime": 3283.2817, "eval_samples_per_second": 0.039, "eval_steps_per_second": 0.039, "step": 3315 }, { "epoch": 12.44, "learning_rate": 2.0753634276970162e-05, "loss": 1.9006, "step": 3570 }, { "epoch": 12.44, "eval_average_rogue": 0.2331, "eval_loss": 3.7175817489624023, "eval_rouge1_fmeasure": 0.3627, "eval_rouge1_precision": 0.3429, "eval_rouge1_recall": 0.4025, "eval_rouge2_fmeasure": 0.0698, "eval_rouge2_precision": 0.0659, "eval_rouge2_recall": 0.0783, "eval_rougeL_fmeasure": 0.152, "eval_rougeL_precision": 0.1429, "eval_rougeL_recall": 0.1722, "eval_rougeLsum_fmeasure": 0.3477, "eval_rougeLsum_precision": 0.329, "eval_rougeLsum_recall": 0.386, "eval_runtime": 3384.322, "eval_samples_per_second": 0.038, "eval_steps_per_second": 0.038, "step": 3570 }, { "epoch": 13.33, "learning_rate": 1.8314843152257076e-05, "loss": 1.8247, "step": 3825 }, { "epoch": 13.33, "eval_average_rogue": 0.2358, "eval_loss": 3.7242326736450195, "eval_rouge1_fmeasure": 0.3679, "eval_rouge1_precision": 0.3481, "eval_rouge1_recall": 0.4077, "eval_rouge2_fmeasure": 0.0698, "eval_rouge2_precision": 0.066, "eval_rouge2_recall": 0.0773, "eval_rougeL_fmeasure": 0.1537, "eval_rougeL_precision": 0.1444, "eval_rougeL_recall": 0.1741, "eval_rougeLsum_fmeasure": 0.3517, "eval_rougeLsum_precision": 0.3323, "eval_rougeLsum_recall": 0.3894, "eval_runtime": 3438.3144, "eval_samples_per_second": 0.037, "eval_steps_per_second": 0.037, "step": 3825 }, { "epoch": 14.22, "learning_rate": 1.5876052027543994e-05, "loss": 1.7352, "step": 4080 }, { "epoch": 14.22, "eval_average_rogue": 0.2336, "eval_loss": 3.790210723876953, "eval_rouge1_fmeasure": 0.3648, "eval_rouge1_precision": 0.3457, "eval_rouge1_recall": 0.4025, "eval_rouge2_fmeasure": 0.0702, "eval_rouge2_precision": 0.0667, "eval_rouge2_recall": 0.0776, "eval_rougeL_fmeasure": 0.1508, "eval_rougeL_precision": 0.1422, "eval_rougeL_recall": 0.1702, "eval_rougeLsum_fmeasure": 0.3486, "eval_rougeLsum_precision": 0.3307, "eval_rougeLsum_recall": 0.3849, "eval_runtime": 3429.0398, "eval_samples_per_second": 0.037, "eval_steps_per_second": 0.037, "step": 4080 }, { "epoch": 15.1, "learning_rate": 1.3437260902830912e-05, "loss": 1.7091, "step": 4335 }, { "epoch": 15.1, "eval_average_rogue": 0.2351, "eval_loss": 3.8391542434692383, "eval_rouge1_fmeasure": 0.3664, "eval_rouge1_precision": 0.346, "eval_rouge1_recall": 0.4069, "eval_rouge2_fmeasure": 0.0706, "eval_rouge2_precision": 0.0666, "eval_rouge2_recall": 0.0786, "eval_rougeL_fmeasure": 0.1527, "eval_rougeL_precision": 0.1435, "eval_rougeL_recall": 0.1728, "eval_rougeLsum_fmeasure": 0.3509, "eval_rougeLsum_precision": 0.331, "eval_rougeLsum_recall": 0.3892, "eval_runtime": 3445.0892, "eval_samples_per_second": 0.037, "eval_steps_per_second": 0.037, "step": 4335 }, { "epoch": 15.99, "learning_rate": 1.0998469778117827e-05, "loss": 1.654, "step": 4590 }, { "epoch": 15.99, "eval_average_rogue": 0.2347, "eval_loss": 3.8251237869262695, "eval_rouge1_fmeasure": 0.3674, "eval_rouge1_precision": 0.3475, "eval_rouge1_recall": 0.4065, "eval_rouge2_fmeasure": 0.0692, "eval_rouge2_precision": 0.0655, "eval_rouge2_recall": 0.0767, "eval_rougeL_fmeasure": 0.1515, "eval_rougeL_precision": 0.1425, "eval_rougeL_recall": 0.1713, "eval_rougeLsum_fmeasure": 0.3508, "eval_rougeLsum_precision": 0.3318, "eval_rougeLsum_recall": 0.3883, "eval_runtime": 3431.7145, "eval_samples_per_second": 0.037, "eval_steps_per_second": 0.037, "step": 4590 }, { "epoch": 16.88, "learning_rate": 8.559678653404744e-06, "loss": 1.6034, "step": 4845 }, { "epoch": 16.88, "eval_average_rogue": 0.2342, "eval_loss": 3.8599014282226562, "eval_rouge1_fmeasure": 0.3653, "eval_rouge1_precision": 0.3449, "eval_rouge1_recall": 0.4056, "eval_rouge2_fmeasure": 0.0694, "eval_rouge2_precision": 0.0655, "eval_rouge2_recall": 0.0771, "eval_rougeL_fmeasure": 0.1531, "eval_rougeL_precision": 0.1438, "eval_rougeL_recall": 0.1738, "eval_rougeLsum_fmeasure": 0.3491, "eval_rougeLsum_precision": 0.3295, "eval_rougeLsum_recall": 0.3877, "eval_runtime": 3434.4477, "eval_samples_per_second": 0.037, "eval_steps_per_second": 0.037, "step": 4845 }, { "epoch": 17.77, "learning_rate": 6.120887528691661e-06, "loss": 1.5801, "step": 5100 }, { "epoch": 17.77, "eval_average_rogue": 0.2336, "eval_loss": 3.8798491954803467, "eval_rouge1_fmeasure": 0.3647, "eval_rouge1_precision": 0.3432, "eval_rouge1_recall": 0.4057, "eval_rouge2_fmeasure": 0.0682, "eval_rouge2_precision": 0.0643, "eval_rouge2_recall": 0.0758, "eval_rougeL_fmeasure": 0.1523, "eval_rougeL_precision": 0.1426, "eval_rougeL_recall": 0.1732, "eval_rougeLsum_fmeasure": 0.3491, "eval_rougeLsum_precision": 0.329, "eval_rougeLsum_recall": 0.3882, "eval_runtime": 3446.3776, "eval_samples_per_second": 0.037, "eval_steps_per_second": 0.037, "step": 5100 } ], "max_steps": 5740, "num_train_epochs": 20, "total_flos": 1.37710446575616e+16, "trial_name": null, "trial_params": null }