|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 17.770034843205575, |
|
"global_step": 5100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.4902343750000002e-05, |
|
"loss": 3.791, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_average_rogue": 0.2125, |
|
"eval_loss": 3.328416347503662, |
|
"eval_rouge1_fmeasure": 0.3288, |
|
"eval_rouge1_precision": 0.3115, |
|
"eval_rouge1_recall": 0.3637, |
|
"eval_rouge2_fmeasure": 0.06, |
|
"eval_rouge2_precision": 0.0569, |
|
"eval_rouge2_recall": 0.0664, |
|
"eval_rougeL_fmeasure": 0.145, |
|
"eval_rougeL_precision": 0.1366, |
|
"eval_rougeL_recall": 0.1631, |
|
"eval_rougeLsum_fmeasure": 0.3161, |
|
"eval_rougeLsum_precision": 0.2997, |
|
"eval_rougeLsum_recall": 0.3496, |
|
"eval_runtime": 3350.2964, |
|
"eval_samples_per_second": 0.038, |
|
"eval_steps_per_second": 0.038, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.9804687500000004e-05, |
|
"loss": 3.3793, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_average_rogue": 0.2109, |
|
"eval_loss": 3.268017292022705, |
|
"eval_rouge1_fmeasure": 0.3285, |
|
"eval_rouge1_precision": 0.3068, |
|
"eval_rouge1_recall": 0.3699, |
|
"eval_rouge2_fmeasure": 0.0571, |
|
"eval_rouge2_precision": 0.053, |
|
"eval_rouge2_recall": 0.065, |
|
"eval_rougeL_fmeasure": 0.1417, |
|
"eval_rougeL_precision": 0.1315, |
|
"eval_rougeL_recall": 0.1631, |
|
"eval_rougeLsum_fmeasure": 0.3163, |
|
"eval_rougeLsum_precision": 0.2954, |
|
"eval_rougeLsum_recall": 0.356, |
|
"eval_runtime": 3338.0119, |
|
"eval_samples_per_second": 0.038, |
|
"eval_steps_per_second": 0.038, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.758033664881408e-05, |
|
"loss": 3.1826, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_average_rogue": 0.214, |
|
"eval_loss": 3.245297431945801, |
|
"eval_rouge1_fmeasure": 0.3293, |
|
"eval_rouge1_precision": 0.3099, |
|
"eval_rouge1_recall": 0.3668, |
|
"eval_rouge2_fmeasure": 0.0644, |
|
"eval_rouge2_precision": 0.0605, |
|
"eval_rouge2_recall": 0.0717, |
|
"eval_rougeL_fmeasure": 0.145, |
|
"eval_rougeL_precision": 0.1356, |
|
"eval_rougeL_recall": 0.1647, |
|
"eval_rougeLsum_fmeasure": 0.3173, |
|
"eval_rougeLsum_precision": 0.2986, |
|
"eval_rougeLsum_recall": 0.3531, |
|
"eval_runtime": 3330.8537, |
|
"eval_samples_per_second": 0.038, |
|
"eval_steps_per_second": 0.038, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.5141545524101e-05, |
|
"loss": 3.0133, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_average_rogue": 0.1948, |
|
"eval_loss": 3.2580819129943848, |
|
"eval_rouge1_fmeasure": 0.2983, |
|
"eval_rouge1_precision": 0.2893, |
|
"eval_rouge1_recall": 0.3239, |
|
"eval_rouge2_fmeasure": 0.0545, |
|
"eval_rouge2_precision": 0.0527, |
|
"eval_rouge2_recall": 0.0591, |
|
"eval_rougeL_fmeasure": 0.1397, |
|
"eval_rougeL_precision": 0.1356, |
|
"eval_rougeL_recall": 0.1534, |
|
"eval_rougeLsum_fmeasure": 0.2865, |
|
"eval_rougeLsum_precision": 0.2776, |
|
"eval_rougeLsum_recall": 0.3105, |
|
"eval_runtime": 3289.5263, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 4.2702754399387915e-05, |
|
"loss": 2.8569, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_average_rogue": 0.2256, |
|
"eval_loss": 3.2716007232666016, |
|
"eval_rouge1_fmeasure": 0.3491, |
|
"eval_rouge1_precision": 0.3326, |
|
"eval_rouge1_recall": 0.3835, |
|
"eval_rouge2_fmeasure": 0.0671, |
|
"eval_rouge2_precision": 0.0641, |
|
"eval_rouge2_recall": 0.0732, |
|
"eval_rougeL_fmeasure": 0.1516, |
|
"eval_rougeL_precision": 0.1436, |
|
"eval_rougeL_recall": 0.1701, |
|
"eval_rougeLsum_fmeasure": 0.3346, |
|
"eval_rougeLsum_precision": 0.3189, |
|
"eval_rougeLsum_recall": 0.368, |
|
"eval_runtime": 3297.3203, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 4.026396327467483e-05, |
|
"loss": 2.7008, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_average_rogue": 0.2285, |
|
"eval_loss": 3.313244104385376, |
|
"eval_rouge1_fmeasure": 0.3531, |
|
"eval_rouge1_precision": 0.3326, |
|
"eval_rouge1_recall": 0.3925, |
|
"eval_rouge2_fmeasure": 0.0693, |
|
"eval_rouge2_precision": 0.0653, |
|
"eval_rouge2_recall": 0.0772, |
|
"eval_rougeL_fmeasure": 0.1521, |
|
"eval_rougeL_precision": 0.1427, |
|
"eval_rougeL_recall": 0.1725, |
|
"eval_rougeLsum_fmeasure": 0.3396, |
|
"eval_rougeLsum_precision": 0.3202, |
|
"eval_rougeLsum_recall": 0.3776, |
|
"eval_runtime": 3297.2909, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 3.7825172149961744e-05, |
|
"loss": 2.5657, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_average_rogue": 0.2241, |
|
"eval_loss": 3.349586009979248, |
|
"eval_rouge1_fmeasure": 0.3473, |
|
"eval_rouge1_precision": 0.3291, |
|
"eval_rouge1_recall": 0.385, |
|
"eval_rouge2_fmeasure": 0.0658, |
|
"eval_rouge2_precision": 0.0622, |
|
"eval_rouge2_recall": 0.0731, |
|
"eval_rougeL_fmeasure": 0.1504, |
|
"eval_rougeL_precision": 0.1416, |
|
"eval_rougeL_recall": 0.17, |
|
"eval_rougeLsum_fmeasure": 0.3326, |
|
"eval_rougeLsum_precision": 0.3154, |
|
"eval_rougeLsum_recall": 0.3683, |
|
"eval_runtime": 3292.4741, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 3.538638102524866e-05, |
|
"loss": 2.4327, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_average_rogue": 0.2298, |
|
"eval_loss": 3.437542200088501, |
|
"eval_rouge1_fmeasure": 0.3551, |
|
"eval_rouge1_precision": 0.3347, |
|
"eval_rouge1_recall": 0.3946, |
|
"eval_rouge2_fmeasure": 0.0703, |
|
"eval_rouge2_precision": 0.0664, |
|
"eval_rouge2_recall": 0.0778, |
|
"eval_rougeL_fmeasure": 0.1527, |
|
"eval_rougeL_precision": 0.1433, |
|
"eval_rougeL_recall": 0.1732, |
|
"eval_rougeLsum_fmeasure": 0.341, |
|
"eval_rougeLsum_precision": 0.3218, |
|
"eval_rougeLsum_recall": 0.3788, |
|
"eval_runtime": 3284.3798, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.294758990053558e-05, |
|
"loss": 2.3352, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_average_rogue": 0.2284, |
|
"eval_loss": 3.4355413913726807, |
|
"eval_rouge1_fmeasure": 0.3551, |
|
"eval_rouge1_precision": 0.3334, |
|
"eval_rouge1_recall": 0.397, |
|
"eval_rouge2_fmeasure": 0.0682, |
|
"eval_rouge2_precision": 0.064, |
|
"eval_rouge2_recall": 0.0762, |
|
"eval_rougeL_fmeasure": 0.1507, |
|
"eval_rougeL_precision": 0.1406, |
|
"eval_rougeL_recall": 0.1721, |
|
"eval_rougeLsum_fmeasure": 0.3396, |
|
"eval_rougeLsum_precision": 0.3189, |
|
"eval_rougeLsum_recall": 0.3797, |
|
"eval_runtime": 3319.5759, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 3.0508798775822494e-05, |
|
"loss": 2.2068, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_average_rogue": 0.2319, |
|
"eval_loss": 3.517580032348633, |
|
"eval_rouge1_fmeasure": 0.3593, |
|
"eval_rouge1_precision": 0.3378, |
|
"eval_rouge1_recall": 0.4005, |
|
"eval_rouge2_fmeasure": 0.0711, |
|
"eval_rouge2_precision": 0.0669, |
|
"eval_rouge2_recall": 0.0793, |
|
"eval_rougeL_fmeasure": 0.153, |
|
"eval_rougeL_precision": 0.1431, |
|
"eval_rougeL_recall": 0.1738, |
|
"eval_rougeLsum_fmeasure": 0.3441, |
|
"eval_rougeLsum_precision": 0.3239, |
|
"eval_rougeLsum_recall": 0.383, |
|
"eval_runtime": 3268.2597, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.8070007651109415e-05, |
|
"loss": 2.1177, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_average_rogue": 0.2322, |
|
"eval_loss": 3.5942444801330566, |
|
"eval_rouge1_fmeasure": 0.3615, |
|
"eval_rouge1_precision": 0.3407, |
|
"eval_rouge1_recall": 0.402, |
|
"eval_rouge2_fmeasure": 0.0691, |
|
"eval_rouge2_precision": 0.0652, |
|
"eval_rouge2_recall": 0.0771, |
|
"eval_rougeL_fmeasure": 0.1516, |
|
"eval_rougeL_precision": 0.1422, |
|
"eval_rougeL_recall": 0.1722, |
|
"eval_rougeLsum_fmeasure": 0.3465, |
|
"eval_rougeLsum_precision": 0.3267, |
|
"eval_rougeLsum_recall": 0.3853, |
|
"eval_runtime": 3329.9183, |
|
"eval_samples_per_second": 0.038, |
|
"eval_steps_per_second": 0.038, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 2.563121652639633e-05, |
|
"loss": 2.0452, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"eval_average_rogue": 0.2329, |
|
"eval_loss": 3.618927001953125, |
|
"eval_rouge1_fmeasure": 0.3627, |
|
"eval_rouge1_precision": 0.3451, |
|
"eval_rouge1_recall": 0.3992, |
|
"eval_rouge2_fmeasure": 0.0688, |
|
"eval_rouge2_precision": 0.0655, |
|
"eval_rouge2_recall": 0.0756, |
|
"eval_rougeL_fmeasure": 0.152, |
|
"eval_rougeL_precision": 0.1437, |
|
"eval_rougeL_recall": 0.1712, |
|
"eval_rougeLsum_fmeasure": 0.3482, |
|
"eval_rougeLsum_precision": 0.3317, |
|
"eval_rougeLsum_recall": 0.383, |
|
"eval_runtime": 3288.5395, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 2.3192425401683247e-05, |
|
"loss": 1.9276, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"eval_average_rogue": 0.235, |
|
"eval_loss": 3.685786247253418, |
|
"eval_rouge1_fmeasure": 0.3662, |
|
"eval_rouge1_precision": 0.3468, |
|
"eval_rouge1_recall": 0.4051, |
|
"eval_rouge2_fmeasure": 0.0706, |
|
"eval_rouge2_precision": 0.0668, |
|
"eval_rouge2_recall": 0.0788, |
|
"eval_rougeL_fmeasure": 0.1525, |
|
"eval_rougeL_precision": 0.1437, |
|
"eval_rougeL_recall": 0.1723, |
|
"eval_rougeLsum_fmeasure": 0.3508, |
|
"eval_rougeLsum_precision": 0.3328, |
|
"eval_rougeLsum_recall": 0.3883, |
|
"eval_runtime": 3283.2817, |
|
"eval_samples_per_second": 0.039, |
|
"eval_steps_per_second": 0.039, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 2.0753634276970162e-05, |
|
"loss": 1.9006, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"eval_average_rogue": 0.2331, |
|
"eval_loss": 3.7175817489624023, |
|
"eval_rouge1_fmeasure": 0.3627, |
|
"eval_rouge1_precision": 0.3429, |
|
"eval_rouge1_recall": 0.4025, |
|
"eval_rouge2_fmeasure": 0.0698, |
|
"eval_rouge2_precision": 0.0659, |
|
"eval_rouge2_recall": 0.0783, |
|
"eval_rougeL_fmeasure": 0.152, |
|
"eval_rougeL_precision": 0.1429, |
|
"eval_rougeL_recall": 0.1722, |
|
"eval_rougeLsum_fmeasure": 0.3477, |
|
"eval_rougeLsum_precision": 0.329, |
|
"eval_rougeLsum_recall": 0.386, |
|
"eval_runtime": 3384.322, |
|
"eval_samples_per_second": 0.038, |
|
"eval_steps_per_second": 0.038, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 1.8314843152257076e-05, |
|
"loss": 1.8247, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_average_rogue": 0.2358, |
|
"eval_loss": 3.7242326736450195, |
|
"eval_rouge1_fmeasure": 0.3679, |
|
"eval_rouge1_precision": 0.3481, |
|
"eval_rouge1_recall": 0.4077, |
|
"eval_rouge2_fmeasure": 0.0698, |
|
"eval_rouge2_precision": 0.066, |
|
"eval_rouge2_recall": 0.0773, |
|
"eval_rougeL_fmeasure": 0.1537, |
|
"eval_rougeL_precision": 0.1444, |
|
"eval_rougeL_recall": 0.1741, |
|
"eval_rougeLsum_fmeasure": 0.3517, |
|
"eval_rougeLsum_precision": 0.3323, |
|
"eval_rougeLsum_recall": 0.3894, |
|
"eval_runtime": 3438.3144, |
|
"eval_samples_per_second": 0.037, |
|
"eval_steps_per_second": 0.037, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 1.5876052027543994e-05, |
|
"loss": 1.7352, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"eval_average_rogue": 0.2336, |
|
"eval_loss": 3.790210723876953, |
|
"eval_rouge1_fmeasure": 0.3648, |
|
"eval_rouge1_precision": 0.3457, |
|
"eval_rouge1_recall": 0.4025, |
|
"eval_rouge2_fmeasure": 0.0702, |
|
"eval_rouge2_precision": 0.0667, |
|
"eval_rouge2_recall": 0.0776, |
|
"eval_rougeL_fmeasure": 0.1508, |
|
"eval_rougeL_precision": 0.1422, |
|
"eval_rougeL_recall": 0.1702, |
|
"eval_rougeLsum_fmeasure": 0.3486, |
|
"eval_rougeLsum_precision": 0.3307, |
|
"eval_rougeLsum_recall": 0.3849, |
|
"eval_runtime": 3429.0398, |
|
"eval_samples_per_second": 0.037, |
|
"eval_steps_per_second": 0.037, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 1.3437260902830912e-05, |
|
"loss": 1.7091, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"eval_average_rogue": 0.2351, |
|
"eval_loss": 3.8391542434692383, |
|
"eval_rouge1_fmeasure": 0.3664, |
|
"eval_rouge1_precision": 0.346, |
|
"eval_rouge1_recall": 0.4069, |
|
"eval_rouge2_fmeasure": 0.0706, |
|
"eval_rouge2_precision": 0.0666, |
|
"eval_rouge2_recall": 0.0786, |
|
"eval_rougeL_fmeasure": 0.1527, |
|
"eval_rougeL_precision": 0.1435, |
|
"eval_rougeL_recall": 0.1728, |
|
"eval_rougeLsum_fmeasure": 0.3509, |
|
"eval_rougeLsum_precision": 0.331, |
|
"eval_rougeLsum_recall": 0.3892, |
|
"eval_runtime": 3445.0892, |
|
"eval_samples_per_second": 0.037, |
|
"eval_steps_per_second": 0.037, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 1.0998469778117827e-05, |
|
"loss": 1.654, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_average_rogue": 0.2347, |
|
"eval_loss": 3.8251237869262695, |
|
"eval_rouge1_fmeasure": 0.3674, |
|
"eval_rouge1_precision": 0.3475, |
|
"eval_rouge1_recall": 0.4065, |
|
"eval_rouge2_fmeasure": 0.0692, |
|
"eval_rouge2_precision": 0.0655, |
|
"eval_rouge2_recall": 0.0767, |
|
"eval_rougeL_fmeasure": 0.1515, |
|
"eval_rougeL_precision": 0.1425, |
|
"eval_rougeL_recall": 0.1713, |
|
"eval_rougeLsum_fmeasure": 0.3508, |
|
"eval_rougeLsum_precision": 0.3318, |
|
"eval_rougeLsum_recall": 0.3883, |
|
"eval_runtime": 3431.7145, |
|
"eval_samples_per_second": 0.037, |
|
"eval_steps_per_second": 0.037, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 8.559678653404744e-06, |
|
"loss": 1.6034, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"eval_average_rogue": 0.2342, |
|
"eval_loss": 3.8599014282226562, |
|
"eval_rouge1_fmeasure": 0.3653, |
|
"eval_rouge1_precision": 0.3449, |
|
"eval_rouge1_recall": 0.4056, |
|
"eval_rouge2_fmeasure": 0.0694, |
|
"eval_rouge2_precision": 0.0655, |
|
"eval_rouge2_recall": 0.0771, |
|
"eval_rougeL_fmeasure": 0.1531, |
|
"eval_rougeL_precision": 0.1438, |
|
"eval_rougeL_recall": 0.1738, |
|
"eval_rougeLsum_fmeasure": 0.3491, |
|
"eval_rougeLsum_precision": 0.3295, |
|
"eval_rougeLsum_recall": 0.3877, |
|
"eval_runtime": 3434.4477, |
|
"eval_samples_per_second": 0.037, |
|
"eval_steps_per_second": 0.037, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 6.120887528691661e-06, |
|
"loss": 1.5801, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"eval_average_rogue": 0.2336, |
|
"eval_loss": 3.8798491954803467, |
|
"eval_rouge1_fmeasure": 0.3647, |
|
"eval_rouge1_precision": 0.3432, |
|
"eval_rouge1_recall": 0.4057, |
|
"eval_rouge2_fmeasure": 0.0682, |
|
"eval_rouge2_precision": 0.0643, |
|
"eval_rouge2_recall": 0.0758, |
|
"eval_rougeL_fmeasure": 0.1523, |
|
"eval_rougeL_precision": 0.1426, |
|
"eval_rougeL_recall": 0.1732, |
|
"eval_rougeLsum_fmeasure": 0.3491, |
|
"eval_rougeLsum_precision": 0.329, |
|
"eval_rougeLsum_recall": 0.3882, |
|
"eval_runtime": 3446.3776, |
|
"eval_samples_per_second": 0.037, |
|
"eval_steps_per_second": 0.037, |
|
"step": 5100 |
|
} |
|
], |
|
"max_steps": 5740, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.37710446575616e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|