script2sumFinal / trainer_state.json
grizzlypath26's picture
Initial Upload
5de6049
raw
history blame
16.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 17.770034843205575,
"global_step": 5100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.89,
"learning_rate": 2.4902343750000002e-05,
"loss": 3.791,
"step": 255
},
{
"epoch": 0.89,
"eval_average_rogue": 0.2125,
"eval_loss": 3.328416347503662,
"eval_rouge1_fmeasure": 0.3288,
"eval_rouge1_precision": 0.3115,
"eval_rouge1_recall": 0.3637,
"eval_rouge2_fmeasure": 0.06,
"eval_rouge2_precision": 0.0569,
"eval_rouge2_recall": 0.0664,
"eval_rougeL_fmeasure": 0.145,
"eval_rougeL_precision": 0.1366,
"eval_rougeL_recall": 0.1631,
"eval_rougeLsum_fmeasure": 0.3161,
"eval_rougeLsum_precision": 0.2997,
"eval_rougeLsum_recall": 0.3496,
"eval_runtime": 3350.2964,
"eval_samples_per_second": 0.038,
"eval_steps_per_second": 0.038,
"step": 255
},
{
"epoch": 1.78,
"learning_rate": 4.9804687500000004e-05,
"loss": 3.3793,
"step": 510
},
{
"epoch": 1.78,
"eval_average_rogue": 0.2109,
"eval_loss": 3.268017292022705,
"eval_rouge1_fmeasure": 0.3285,
"eval_rouge1_precision": 0.3068,
"eval_rouge1_recall": 0.3699,
"eval_rouge2_fmeasure": 0.0571,
"eval_rouge2_precision": 0.053,
"eval_rouge2_recall": 0.065,
"eval_rougeL_fmeasure": 0.1417,
"eval_rougeL_precision": 0.1315,
"eval_rougeL_recall": 0.1631,
"eval_rougeLsum_fmeasure": 0.3163,
"eval_rougeLsum_precision": 0.2954,
"eval_rougeLsum_recall": 0.356,
"eval_runtime": 3338.0119,
"eval_samples_per_second": 0.038,
"eval_steps_per_second": 0.038,
"step": 510
},
{
"epoch": 2.67,
"learning_rate": 4.758033664881408e-05,
"loss": 3.1826,
"step": 765
},
{
"epoch": 2.67,
"eval_average_rogue": 0.214,
"eval_loss": 3.245297431945801,
"eval_rouge1_fmeasure": 0.3293,
"eval_rouge1_precision": 0.3099,
"eval_rouge1_recall": 0.3668,
"eval_rouge2_fmeasure": 0.0644,
"eval_rouge2_precision": 0.0605,
"eval_rouge2_recall": 0.0717,
"eval_rougeL_fmeasure": 0.145,
"eval_rougeL_precision": 0.1356,
"eval_rougeL_recall": 0.1647,
"eval_rougeLsum_fmeasure": 0.3173,
"eval_rougeLsum_precision": 0.2986,
"eval_rougeLsum_recall": 0.3531,
"eval_runtime": 3330.8537,
"eval_samples_per_second": 0.038,
"eval_steps_per_second": 0.038,
"step": 765
},
{
"epoch": 3.55,
"learning_rate": 4.5141545524101e-05,
"loss": 3.0133,
"step": 1020
},
{
"epoch": 3.55,
"eval_average_rogue": 0.1948,
"eval_loss": 3.2580819129943848,
"eval_rouge1_fmeasure": 0.2983,
"eval_rouge1_precision": 0.2893,
"eval_rouge1_recall": 0.3239,
"eval_rouge2_fmeasure": 0.0545,
"eval_rouge2_precision": 0.0527,
"eval_rouge2_recall": 0.0591,
"eval_rougeL_fmeasure": 0.1397,
"eval_rougeL_precision": 0.1356,
"eval_rougeL_recall": 0.1534,
"eval_rougeLsum_fmeasure": 0.2865,
"eval_rougeLsum_precision": 0.2776,
"eval_rougeLsum_recall": 0.3105,
"eval_runtime": 3289.5263,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 1020
},
{
"epoch": 4.44,
"learning_rate": 4.2702754399387915e-05,
"loss": 2.8569,
"step": 1275
},
{
"epoch": 4.44,
"eval_average_rogue": 0.2256,
"eval_loss": 3.2716007232666016,
"eval_rouge1_fmeasure": 0.3491,
"eval_rouge1_precision": 0.3326,
"eval_rouge1_recall": 0.3835,
"eval_rouge2_fmeasure": 0.0671,
"eval_rouge2_precision": 0.0641,
"eval_rouge2_recall": 0.0732,
"eval_rougeL_fmeasure": 0.1516,
"eval_rougeL_precision": 0.1436,
"eval_rougeL_recall": 0.1701,
"eval_rougeLsum_fmeasure": 0.3346,
"eval_rougeLsum_precision": 0.3189,
"eval_rougeLsum_recall": 0.368,
"eval_runtime": 3297.3203,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 1275
},
{
"epoch": 5.33,
"learning_rate": 4.026396327467483e-05,
"loss": 2.7008,
"step": 1530
},
{
"epoch": 5.33,
"eval_average_rogue": 0.2285,
"eval_loss": 3.313244104385376,
"eval_rouge1_fmeasure": 0.3531,
"eval_rouge1_precision": 0.3326,
"eval_rouge1_recall": 0.3925,
"eval_rouge2_fmeasure": 0.0693,
"eval_rouge2_precision": 0.0653,
"eval_rouge2_recall": 0.0772,
"eval_rougeL_fmeasure": 0.1521,
"eval_rougeL_precision": 0.1427,
"eval_rougeL_recall": 0.1725,
"eval_rougeLsum_fmeasure": 0.3396,
"eval_rougeLsum_precision": 0.3202,
"eval_rougeLsum_recall": 0.3776,
"eval_runtime": 3297.2909,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 1530
},
{
"epoch": 6.22,
"learning_rate": 3.7825172149961744e-05,
"loss": 2.5657,
"step": 1785
},
{
"epoch": 6.22,
"eval_average_rogue": 0.2241,
"eval_loss": 3.349586009979248,
"eval_rouge1_fmeasure": 0.3473,
"eval_rouge1_precision": 0.3291,
"eval_rouge1_recall": 0.385,
"eval_rouge2_fmeasure": 0.0658,
"eval_rouge2_precision": 0.0622,
"eval_rouge2_recall": 0.0731,
"eval_rougeL_fmeasure": 0.1504,
"eval_rougeL_precision": 0.1416,
"eval_rougeL_recall": 0.17,
"eval_rougeLsum_fmeasure": 0.3326,
"eval_rougeLsum_precision": 0.3154,
"eval_rougeLsum_recall": 0.3683,
"eval_runtime": 3292.4741,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 1785
},
{
"epoch": 7.11,
"learning_rate": 3.538638102524866e-05,
"loss": 2.4327,
"step": 2040
},
{
"epoch": 7.11,
"eval_average_rogue": 0.2298,
"eval_loss": 3.437542200088501,
"eval_rouge1_fmeasure": 0.3551,
"eval_rouge1_precision": 0.3347,
"eval_rouge1_recall": 0.3946,
"eval_rouge2_fmeasure": 0.0703,
"eval_rouge2_precision": 0.0664,
"eval_rouge2_recall": 0.0778,
"eval_rougeL_fmeasure": 0.1527,
"eval_rougeL_precision": 0.1433,
"eval_rougeL_recall": 0.1732,
"eval_rougeLsum_fmeasure": 0.341,
"eval_rougeLsum_precision": 0.3218,
"eval_rougeLsum_recall": 0.3788,
"eval_runtime": 3284.3798,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 2040
},
{
"epoch": 8.0,
"learning_rate": 3.294758990053558e-05,
"loss": 2.3352,
"step": 2295
},
{
"epoch": 8.0,
"eval_average_rogue": 0.2284,
"eval_loss": 3.4355413913726807,
"eval_rouge1_fmeasure": 0.3551,
"eval_rouge1_precision": 0.3334,
"eval_rouge1_recall": 0.397,
"eval_rouge2_fmeasure": 0.0682,
"eval_rouge2_precision": 0.064,
"eval_rouge2_recall": 0.0762,
"eval_rougeL_fmeasure": 0.1507,
"eval_rougeL_precision": 0.1406,
"eval_rougeL_recall": 0.1721,
"eval_rougeLsum_fmeasure": 0.3396,
"eval_rougeLsum_precision": 0.3189,
"eval_rougeLsum_recall": 0.3797,
"eval_runtime": 3319.5759,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 2295
},
{
"epoch": 8.89,
"learning_rate": 3.0508798775822494e-05,
"loss": 2.2068,
"step": 2550
},
{
"epoch": 8.89,
"eval_average_rogue": 0.2319,
"eval_loss": 3.517580032348633,
"eval_rouge1_fmeasure": 0.3593,
"eval_rouge1_precision": 0.3378,
"eval_rouge1_recall": 0.4005,
"eval_rouge2_fmeasure": 0.0711,
"eval_rouge2_precision": 0.0669,
"eval_rouge2_recall": 0.0793,
"eval_rougeL_fmeasure": 0.153,
"eval_rougeL_precision": 0.1431,
"eval_rougeL_recall": 0.1738,
"eval_rougeLsum_fmeasure": 0.3441,
"eval_rougeLsum_precision": 0.3239,
"eval_rougeLsum_recall": 0.383,
"eval_runtime": 3268.2597,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 2550
},
{
"epoch": 9.77,
"learning_rate": 2.8070007651109415e-05,
"loss": 2.1177,
"step": 2805
},
{
"epoch": 9.77,
"eval_average_rogue": 0.2322,
"eval_loss": 3.5942444801330566,
"eval_rouge1_fmeasure": 0.3615,
"eval_rouge1_precision": 0.3407,
"eval_rouge1_recall": 0.402,
"eval_rouge2_fmeasure": 0.0691,
"eval_rouge2_precision": 0.0652,
"eval_rouge2_recall": 0.0771,
"eval_rougeL_fmeasure": 0.1516,
"eval_rougeL_precision": 0.1422,
"eval_rougeL_recall": 0.1722,
"eval_rougeLsum_fmeasure": 0.3465,
"eval_rougeLsum_precision": 0.3267,
"eval_rougeLsum_recall": 0.3853,
"eval_runtime": 3329.9183,
"eval_samples_per_second": 0.038,
"eval_steps_per_second": 0.038,
"step": 2805
},
{
"epoch": 10.66,
"learning_rate": 2.563121652639633e-05,
"loss": 2.0452,
"step": 3060
},
{
"epoch": 10.66,
"eval_average_rogue": 0.2329,
"eval_loss": 3.618927001953125,
"eval_rouge1_fmeasure": 0.3627,
"eval_rouge1_precision": 0.3451,
"eval_rouge1_recall": 0.3992,
"eval_rouge2_fmeasure": 0.0688,
"eval_rouge2_precision": 0.0655,
"eval_rouge2_recall": 0.0756,
"eval_rougeL_fmeasure": 0.152,
"eval_rougeL_precision": 0.1437,
"eval_rougeL_recall": 0.1712,
"eval_rougeLsum_fmeasure": 0.3482,
"eval_rougeLsum_precision": 0.3317,
"eval_rougeLsum_recall": 0.383,
"eval_runtime": 3288.5395,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 3060
},
{
"epoch": 11.55,
"learning_rate": 2.3192425401683247e-05,
"loss": 1.9276,
"step": 3315
},
{
"epoch": 11.55,
"eval_average_rogue": 0.235,
"eval_loss": 3.685786247253418,
"eval_rouge1_fmeasure": 0.3662,
"eval_rouge1_precision": 0.3468,
"eval_rouge1_recall": 0.4051,
"eval_rouge2_fmeasure": 0.0706,
"eval_rouge2_precision": 0.0668,
"eval_rouge2_recall": 0.0788,
"eval_rougeL_fmeasure": 0.1525,
"eval_rougeL_precision": 0.1437,
"eval_rougeL_recall": 0.1723,
"eval_rougeLsum_fmeasure": 0.3508,
"eval_rougeLsum_precision": 0.3328,
"eval_rougeLsum_recall": 0.3883,
"eval_runtime": 3283.2817,
"eval_samples_per_second": 0.039,
"eval_steps_per_second": 0.039,
"step": 3315
},
{
"epoch": 12.44,
"learning_rate": 2.0753634276970162e-05,
"loss": 1.9006,
"step": 3570
},
{
"epoch": 12.44,
"eval_average_rogue": 0.2331,
"eval_loss": 3.7175817489624023,
"eval_rouge1_fmeasure": 0.3627,
"eval_rouge1_precision": 0.3429,
"eval_rouge1_recall": 0.4025,
"eval_rouge2_fmeasure": 0.0698,
"eval_rouge2_precision": 0.0659,
"eval_rouge2_recall": 0.0783,
"eval_rougeL_fmeasure": 0.152,
"eval_rougeL_precision": 0.1429,
"eval_rougeL_recall": 0.1722,
"eval_rougeLsum_fmeasure": 0.3477,
"eval_rougeLsum_precision": 0.329,
"eval_rougeLsum_recall": 0.386,
"eval_runtime": 3384.322,
"eval_samples_per_second": 0.038,
"eval_steps_per_second": 0.038,
"step": 3570
},
{
"epoch": 13.33,
"learning_rate": 1.8314843152257076e-05,
"loss": 1.8247,
"step": 3825
},
{
"epoch": 13.33,
"eval_average_rogue": 0.2358,
"eval_loss": 3.7242326736450195,
"eval_rouge1_fmeasure": 0.3679,
"eval_rouge1_precision": 0.3481,
"eval_rouge1_recall": 0.4077,
"eval_rouge2_fmeasure": 0.0698,
"eval_rouge2_precision": 0.066,
"eval_rouge2_recall": 0.0773,
"eval_rougeL_fmeasure": 0.1537,
"eval_rougeL_precision": 0.1444,
"eval_rougeL_recall": 0.1741,
"eval_rougeLsum_fmeasure": 0.3517,
"eval_rougeLsum_precision": 0.3323,
"eval_rougeLsum_recall": 0.3894,
"eval_runtime": 3438.3144,
"eval_samples_per_second": 0.037,
"eval_steps_per_second": 0.037,
"step": 3825
},
{
"epoch": 14.22,
"learning_rate": 1.5876052027543994e-05,
"loss": 1.7352,
"step": 4080
},
{
"epoch": 14.22,
"eval_average_rogue": 0.2336,
"eval_loss": 3.790210723876953,
"eval_rouge1_fmeasure": 0.3648,
"eval_rouge1_precision": 0.3457,
"eval_rouge1_recall": 0.4025,
"eval_rouge2_fmeasure": 0.0702,
"eval_rouge2_precision": 0.0667,
"eval_rouge2_recall": 0.0776,
"eval_rougeL_fmeasure": 0.1508,
"eval_rougeL_precision": 0.1422,
"eval_rougeL_recall": 0.1702,
"eval_rougeLsum_fmeasure": 0.3486,
"eval_rougeLsum_precision": 0.3307,
"eval_rougeLsum_recall": 0.3849,
"eval_runtime": 3429.0398,
"eval_samples_per_second": 0.037,
"eval_steps_per_second": 0.037,
"step": 4080
},
{
"epoch": 15.1,
"learning_rate": 1.3437260902830912e-05,
"loss": 1.7091,
"step": 4335
},
{
"epoch": 15.1,
"eval_average_rogue": 0.2351,
"eval_loss": 3.8391542434692383,
"eval_rouge1_fmeasure": 0.3664,
"eval_rouge1_precision": 0.346,
"eval_rouge1_recall": 0.4069,
"eval_rouge2_fmeasure": 0.0706,
"eval_rouge2_precision": 0.0666,
"eval_rouge2_recall": 0.0786,
"eval_rougeL_fmeasure": 0.1527,
"eval_rougeL_precision": 0.1435,
"eval_rougeL_recall": 0.1728,
"eval_rougeLsum_fmeasure": 0.3509,
"eval_rougeLsum_precision": 0.331,
"eval_rougeLsum_recall": 0.3892,
"eval_runtime": 3445.0892,
"eval_samples_per_second": 0.037,
"eval_steps_per_second": 0.037,
"step": 4335
},
{
"epoch": 15.99,
"learning_rate": 1.0998469778117827e-05,
"loss": 1.654,
"step": 4590
},
{
"epoch": 15.99,
"eval_average_rogue": 0.2347,
"eval_loss": 3.8251237869262695,
"eval_rouge1_fmeasure": 0.3674,
"eval_rouge1_precision": 0.3475,
"eval_rouge1_recall": 0.4065,
"eval_rouge2_fmeasure": 0.0692,
"eval_rouge2_precision": 0.0655,
"eval_rouge2_recall": 0.0767,
"eval_rougeL_fmeasure": 0.1515,
"eval_rougeL_precision": 0.1425,
"eval_rougeL_recall": 0.1713,
"eval_rougeLsum_fmeasure": 0.3508,
"eval_rougeLsum_precision": 0.3318,
"eval_rougeLsum_recall": 0.3883,
"eval_runtime": 3431.7145,
"eval_samples_per_second": 0.037,
"eval_steps_per_second": 0.037,
"step": 4590
},
{
"epoch": 16.88,
"learning_rate": 8.559678653404744e-06,
"loss": 1.6034,
"step": 4845
},
{
"epoch": 16.88,
"eval_average_rogue": 0.2342,
"eval_loss": 3.8599014282226562,
"eval_rouge1_fmeasure": 0.3653,
"eval_rouge1_precision": 0.3449,
"eval_rouge1_recall": 0.4056,
"eval_rouge2_fmeasure": 0.0694,
"eval_rouge2_precision": 0.0655,
"eval_rouge2_recall": 0.0771,
"eval_rougeL_fmeasure": 0.1531,
"eval_rougeL_precision": 0.1438,
"eval_rougeL_recall": 0.1738,
"eval_rougeLsum_fmeasure": 0.3491,
"eval_rougeLsum_precision": 0.3295,
"eval_rougeLsum_recall": 0.3877,
"eval_runtime": 3434.4477,
"eval_samples_per_second": 0.037,
"eval_steps_per_second": 0.037,
"step": 4845
},
{
"epoch": 17.77,
"learning_rate": 6.120887528691661e-06,
"loss": 1.5801,
"step": 5100
},
{
"epoch": 17.77,
"eval_average_rogue": 0.2336,
"eval_loss": 3.8798491954803467,
"eval_rouge1_fmeasure": 0.3647,
"eval_rouge1_precision": 0.3432,
"eval_rouge1_recall": 0.4057,
"eval_rouge2_fmeasure": 0.0682,
"eval_rouge2_precision": 0.0643,
"eval_rouge2_recall": 0.0758,
"eval_rougeL_fmeasure": 0.1523,
"eval_rougeL_precision": 0.1426,
"eval_rougeL_recall": 0.1732,
"eval_rougeLsum_fmeasure": 0.3491,
"eval_rougeLsum_precision": 0.329,
"eval_rougeLsum_recall": 0.3882,
"eval_runtime": 3446.3776,
"eval_samples_per_second": 0.037,
"eval_steps_per_second": 0.037,
"step": 5100
}
],
"max_steps": 5740,
"num_train_epochs": 20,
"total_flos": 1.37710446575616e+16,
"trial_name": null,
"trial_params": null
}