zerofec-qa2claim-t5-base / trainer_state.json
khuangaf
first commit
636af75
raw
history blame
55.1 kB
{
"best_metric": 92.6157,
"best_model_checkpoint": "qa2claim-base/checkpoint-12000",
"epoch": 1.5512736773350753,
"global_step": 38000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 3.2255,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 5.8800000000000005e-06,
"loss": 3.2465,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.182e-05,
"loss": 2.6401,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 1.782e-05,
"loss": 2.2893,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 2.3820000000000002e-05,
"loss": 2.1163,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 2.982e-05,
"loss": 2.0047,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 2.997105527638191e-05,
"loss": 1.9368,
"step": 600
},
{
"epoch": 0.03,
"learning_rate": 2.9940904522613068e-05,
"loss": 1.9029,
"step": 700
},
{
"epoch": 0.03,
"learning_rate": 2.9910753768844223e-05,
"loss": 1.8627,
"step": 800
},
{
"epoch": 0.04,
"learning_rate": 2.988090452261307e-05,
"loss": 1.8403,
"step": 900
},
{
"epoch": 0.04,
"learning_rate": 2.9850753768844224e-05,
"loss": 1.835,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 2.9820603015075376e-05,
"loss": 1.8027,
"step": 1100
},
{
"epoch": 0.05,
"learning_rate": 2.979045226130653e-05,
"loss": 1.8176,
"step": 1200
},
{
"epoch": 0.05,
"learning_rate": 2.976030150753769e-05,
"loss": 1.7983,
"step": 1300
},
{
"epoch": 0.06,
"learning_rate": 2.9730150753768845e-05,
"loss": 1.778,
"step": 1400
},
{
"epoch": 0.06,
"learning_rate": 2.97e-05,
"loss": 1.7764,
"step": 1500
},
{
"epoch": 0.07,
"learning_rate": 2.9669849246231156e-05,
"loss": 1.7599,
"step": 1600
},
{
"epoch": 0.07,
"learning_rate": 2.9639698492462314e-05,
"loss": 1.7572,
"step": 1700
},
{
"epoch": 0.07,
"learning_rate": 2.960954773869347e-05,
"loss": 1.7773,
"step": 1800
},
{
"epoch": 0.08,
"learning_rate": 2.9579396984924625e-05,
"loss": 1.7301,
"step": 1900
},
{
"epoch": 0.08,
"learning_rate": 2.954924623115578e-05,
"loss": 1.7536,
"step": 2000
},
{
"epoch": 0.08,
"eval_bleu": 75.15293608723576,
"eval_gen_len": 16.288,
"eval_loss": 1.6770071983337402,
"eval_meteor": 0.8848959776023978,
"eval_rouge1": 92.6205,
"eval_rouge2": 86.7136,
"eval_rougeL": 89.2742,
"eval_rougeLsum": 89.2914,
"eval_runtime": 29.1055,
"eval_samples_per_second": 17.179,
"eval_steps_per_second": 2.165,
"step": 2000
},
{
"epoch": 0.09,
"learning_rate": 2.951909547738694e-05,
"loss": 1.7313,
"step": 2100
},
{
"epoch": 0.09,
"learning_rate": 2.948894472361809e-05,
"loss": 1.7308,
"step": 2200
},
{
"epoch": 0.09,
"learning_rate": 2.9458793969849246e-05,
"loss": 1.7186,
"step": 2300
},
{
"epoch": 0.1,
"learning_rate": 2.94286432160804e-05,
"loss": 1.7262,
"step": 2400
},
{
"epoch": 0.1,
"learning_rate": 2.9398492462311556e-05,
"loss": 1.702,
"step": 2500
},
{
"epoch": 0.11,
"learning_rate": 2.9368341708542715e-05,
"loss": 1.7107,
"step": 2600
},
{
"epoch": 0.11,
"learning_rate": 2.933819095477387e-05,
"loss": 1.7187,
"step": 2700
},
{
"epoch": 0.11,
"learning_rate": 2.9308040201005025e-05,
"loss": 1.707,
"step": 2800
},
{
"epoch": 0.12,
"learning_rate": 2.927788944723618e-05,
"loss": 1.7019,
"step": 2900
},
{
"epoch": 0.12,
"learning_rate": 2.924773869346734e-05,
"loss": 1.6977,
"step": 3000
},
{
"epoch": 0.13,
"learning_rate": 2.9217587939698495e-05,
"loss": 1.7047,
"step": 3100
},
{
"epoch": 0.13,
"learning_rate": 2.918743718592965e-05,
"loss": 1.6943,
"step": 3200
},
{
"epoch": 0.13,
"learning_rate": 2.9157286432160802e-05,
"loss": 1.6795,
"step": 3300
},
{
"epoch": 0.14,
"learning_rate": 2.912713567839196e-05,
"loss": 1.6781,
"step": 3400
},
{
"epoch": 0.14,
"learning_rate": 2.9096984924623116e-05,
"loss": 1.7098,
"step": 3500
},
{
"epoch": 0.15,
"learning_rate": 2.906683417085427e-05,
"loss": 1.6774,
"step": 3600
},
{
"epoch": 0.15,
"learning_rate": 2.9036683417085426e-05,
"loss": 1.6911,
"step": 3700
},
{
"epoch": 0.16,
"learning_rate": 2.9006532663316585e-05,
"loss": 1.6933,
"step": 3800
},
{
"epoch": 0.16,
"learning_rate": 2.8976683417085427e-05,
"loss": 1.6742,
"step": 3900
},
{
"epoch": 0.16,
"learning_rate": 2.8946532663316586e-05,
"loss": 1.6804,
"step": 4000
},
{
"epoch": 0.16,
"eval_bleu": 77.38072312145013,
"eval_gen_len": 16.306,
"eval_loss": 1.6234748363494873,
"eval_meteor": 0.892630886417938,
"eval_rouge1": 93.2399,
"eval_rouge2": 88.3086,
"eval_rougeL": 90.3444,
"eval_rougeLsum": 90.3735,
"eval_runtime": 25.4106,
"eval_samples_per_second": 19.677,
"eval_steps_per_second": 2.479,
"step": 4000
},
{
"epoch": 0.17,
"learning_rate": 2.891638190954774e-05,
"loss": 1.673,
"step": 4100
},
{
"epoch": 0.17,
"learning_rate": 2.8886231155778896e-05,
"loss": 1.6754,
"step": 4200
},
{
"epoch": 0.18,
"learning_rate": 2.885608040201005e-05,
"loss": 1.6668,
"step": 4300
},
{
"epoch": 0.18,
"learning_rate": 2.8825929648241207e-05,
"loss": 1.6759,
"step": 4400
},
{
"epoch": 0.18,
"learning_rate": 2.8795778894472362e-05,
"loss": 1.6526,
"step": 4500
},
{
"epoch": 0.19,
"learning_rate": 2.8765628140703517e-05,
"loss": 1.6654,
"step": 4600
},
{
"epoch": 0.19,
"learning_rate": 2.8735477386934672e-05,
"loss": 1.6466,
"step": 4700
},
{
"epoch": 0.2,
"learning_rate": 2.870532663316583e-05,
"loss": 1.6789,
"step": 4800
},
{
"epoch": 0.2,
"learning_rate": 2.8675175879396986e-05,
"loss": 1.6583,
"step": 4900
},
{
"epoch": 0.2,
"learning_rate": 2.864502512562814e-05,
"loss": 1.6639,
"step": 5000
},
{
"epoch": 0.21,
"learning_rate": 2.8614874371859297e-05,
"loss": 1.6464,
"step": 5100
},
{
"epoch": 0.21,
"learning_rate": 2.8584723618090452e-05,
"loss": 1.6654,
"step": 5200
},
{
"epoch": 0.22,
"learning_rate": 2.8554874371859297e-05,
"loss": 1.6364,
"step": 5300
},
{
"epoch": 0.22,
"learning_rate": 2.8525025125628143e-05,
"loss": 1.6325,
"step": 5400
},
{
"epoch": 0.22,
"learning_rate": 2.8494874371859298e-05,
"loss": 1.6496,
"step": 5500
},
{
"epoch": 0.23,
"learning_rate": 2.846502512562814e-05,
"loss": 1.6542,
"step": 5600
},
{
"epoch": 0.23,
"learning_rate": 2.8434874371859295e-05,
"loss": 1.6418,
"step": 5700
},
{
"epoch": 0.24,
"learning_rate": 2.8404723618090454e-05,
"loss": 1.6421,
"step": 5800
},
{
"epoch": 0.24,
"learning_rate": 2.8374874371859296e-05,
"loss": 1.6427,
"step": 5900
},
{
"epoch": 0.24,
"learning_rate": 2.8344723618090454e-05,
"loss": 1.6423,
"step": 6000
},
{
"epoch": 0.24,
"eval_bleu": 78.69642597526826,
"eval_gen_len": 16.314,
"eval_loss": 1.593881607055664,
"eval_meteor": 0.8979075563574866,
"eval_rouge1": 93.7314,
"eval_rouge2": 89.4698,
"eval_rougeL": 91.3989,
"eval_rougeLsum": 91.4076,
"eval_runtime": 25.6849,
"eval_samples_per_second": 19.467,
"eval_steps_per_second": 2.453,
"step": 6000
},
{
"epoch": 0.25,
"learning_rate": 2.831457286432161e-05,
"loss": 1.6491,
"step": 6100
},
{
"epoch": 0.25,
"learning_rate": 2.8284422110552765e-05,
"loss": 1.6533,
"step": 6200
},
{
"epoch": 0.26,
"learning_rate": 2.825427135678392e-05,
"loss": 1.6501,
"step": 6300
},
{
"epoch": 0.26,
"learning_rate": 2.8224120603015075e-05,
"loss": 1.6463,
"step": 6400
},
{
"epoch": 0.27,
"learning_rate": 2.8193969849246234e-05,
"loss": 1.6341,
"step": 6500
},
{
"epoch": 0.27,
"learning_rate": 2.816381909547739e-05,
"loss": 1.6424,
"step": 6600
},
{
"epoch": 0.27,
"learning_rate": 2.813366834170854e-05,
"loss": 1.6399,
"step": 6700
},
{
"epoch": 0.28,
"learning_rate": 2.8103517587939697e-05,
"loss": 1.6362,
"step": 6800
},
{
"epoch": 0.28,
"learning_rate": 2.8073366834170855e-05,
"loss": 1.6357,
"step": 6900
},
{
"epoch": 0.29,
"learning_rate": 2.804321608040201e-05,
"loss": 1.6284,
"step": 7000
},
{
"epoch": 0.29,
"learning_rate": 2.8013065326633166e-05,
"loss": 1.6333,
"step": 7100
},
{
"epoch": 0.29,
"learning_rate": 2.798291457286432e-05,
"loss": 1.6414,
"step": 7200
},
{
"epoch": 0.3,
"learning_rate": 2.795276381909548e-05,
"loss": 1.6166,
"step": 7300
},
{
"epoch": 0.3,
"learning_rate": 2.7922613065326635e-05,
"loss": 1.6368,
"step": 7400
},
{
"epoch": 0.31,
"learning_rate": 2.789246231155779e-05,
"loss": 1.6313,
"step": 7500
},
{
"epoch": 0.31,
"learning_rate": 2.7862311557788945e-05,
"loss": 1.6241,
"step": 7600
},
{
"epoch": 0.31,
"learning_rate": 2.7832160804020104e-05,
"loss": 1.619,
"step": 7700
},
{
"epoch": 0.32,
"learning_rate": 2.7802010050251256e-05,
"loss": 1.6339,
"step": 7800
},
{
"epoch": 0.32,
"learning_rate": 2.777185929648241e-05,
"loss": 1.6152,
"step": 7900
},
{
"epoch": 0.33,
"learning_rate": 2.7741708542713567e-05,
"loss": 1.6253,
"step": 8000
},
{
"epoch": 0.33,
"eval_bleu": 80.00978050192599,
"eval_gen_len": 16.226,
"eval_loss": 1.5748662948608398,
"eval_meteor": 0.9021016869942787,
"eval_rouge1": 94.2752,
"eval_rouge2": 90.7004,
"eval_rougeL": 92.2246,
"eval_rougeLsum": 92.2489,
"eval_runtime": 25.2021,
"eval_samples_per_second": 19.84,
"eval_steps_per_second": 2.5,
"step": 8000
},
{
"epoch": 0.33,
"learning_rate": 2.7711557788944725e-05,
"loss": 1.6291,
"step": 8100
},
{
"epoch": 0.33,
"learning_rate": 2.768140703517588e-05,
"loss": 1.6288,
"step": 8200
},
{
"epoch": 0.34,
"learning_rate": 2.7651557788944726e-05,
"loss": 1.6158,
"step": 8300
},
{
"epoch": 0.34,
"learning_rate": 2.762140703517588e-05,
"loss": 1.6285,
"step": 8400
},
{
"epoch": 0.35,
"learning_rate": 2.7591256281407036e-05,
"loss": 1.6231,
"step": 8500
},
{
"epoch": 0.35,
"learning_rate": 2.756110552763819e-05,
"loss": 1.6237,
"step": 8600
},
{
"epoch": 0.36,
"learning_rate": 2.753095477386935e-05,
"loss": 1.6059,
"step": 8700
},
{
"epoch": 0.36,
"learning_rate": 2.7500804020100505e-05,
"loss": 1.6094,
"step": 8800
},
{
"epoch": 0.36,
"learning_rate": 2.7470954773869347e-05,
"loss": 1.6092,
"step": 8900
},
{
"epoch": 0.37,
"learning_rate": 2.744110552763819e-05,
"loss": 1.6162,
"step": 9000
},
{
"epoch": 0.37,
"learning_rate": 2.7410954773869348e-05,
"loss": 1.6059,
"step": 9100
},
{
"epoch": 0.38,
"learning_rate": 2.7380804020100503e-05,
"loss": 1.6135,
"step": 9200
},
{
"epoch": 0.38,
"learning_rate": 2.735065326633166e-05,
"loss": 1.6144,
"step": 9300
},
{
"epoch": 0.38,
"learning_rate": 2.7320804020100504e-05,
"loss": 1.6288,
"step": 9400
},
{
"epoch": 0.39,
"learning_rate": 2.729065326633166e-05,
"loss": 1.6098,
"step": 9500
},
{
"epoch": 0.39,
"learning_rate": 2.7260804020100504e-05,
"loss": 1.617,
"step": 9600
},
{
"epoch": 0.4,
"learning_rate": 2.723065326633166e-05,
"loss": 1.6079,
"step": 9700
},
{
"epoch": 0.4,
"learning_rate": 2.7200502512562815e-05,
"loss": 1.611,
"step": 9800
},
{
"epoch": 0.4,
"learning_rate": 2.7170351758793974e-05,
"loss": 1.5935,
"step": 9900
},
{
"epoch": 0.41,
"learning_rate": 2.7140201005025125e-05,
"loss": 1.5965,
"step": 10000
},
{
"epoch": 0.41,
"eval_bleu": 80.29588431741519,
"eval_gen_len": 16.244,
"eval_loss": 1.5651723146438599,
"eval_meteor": 0.9028810347439424,
"eval_rouge1": 94.3213,
"eval_rouge2": 90.869,
"eval_rougeL": 92.4221,
"eval_rougeLsum": 92.4429,
"eval_runtime": 25.551,
"eval_samples_per_second": 19.569,
"eval_steps_per_second": 2.466,
"step": 10000
},
{
"epoch": 0.41,
"learning_rate": 2.711035175879397e-05,
"loss": 1.611,
"step": 10100
},
{
"epoch": 0.42,
"learning_rate": 2.7080201005025126e-05,
"loss": 1.6066,
"step": 10200
},
{
"epoch": 0.42,
"learning_rate": 2.705005025125628e-05,
"loss": 1.6028,
"step": 10300
},
{
"epoch": 0.42,
"learning_rate": 2.7019899497487437e-05,
"loss": 1.6192,
"step": 10400
},
{
"epoch": 0.43,
"learning_rate": 2.6989748743718595e-05,
"loss": 1.6114,
"step": 10500
},
{
"epoch": 0.43,
"learning_rate": 2.695959798994975e-05,
"loss": 1.6043,
"step": 10600
},
{
"epoch": 0.44,
"learning_rate": 2.6929447236180906e-05,
"loss": 1.5949,
"step": 10700
},
{
"epoch": 0.44,
"learning_rate": 2.689929648241206e-05,
"loss": 1.6017,
"step": 10800
},
{
"epoch": 0.44,
"learning_rate": 2.6869145728643216e-05,
"loss": 1.6001,
"step": 10900
},
{
"epoch": 0.45,
"learning_rate": 2.683929648241206e-05,
"loss": 1.5983,
"step": 11000
},
{
"epoch": 0.45,
"learning_rate": 2.6809145728643213e-05,
"loss": 1.5914,
"step": 11100
},
{
"epoch": 0.46,
"learning_rate": 2.6778994974874372e-05,
"loss": 1.5997,
"step": 11200
},
{
"epoch": 0.46,
"learning_rate": 2.6748844221105527e-05,
"loss": 1.5962,
"step": 11300
},
{
"epoch": 0.47,
"learning_rate": 2.6718693467336683e-05,
"loss": 1.5946,
"step": 11400
},
{
"epoch": 0.47,
"learning_rate": 2.6688542713567838e-05,
"loss": 1.5969,
"step": 11500
},
{
"epoch": 0.47,
"learning_rate": 2.6658391959798997e-05,
"loss": 1.6132,
"step": 11600
},
{
"epoch": 0.48,
"learning_rate": 2.6628241206030152e-05,
"loss": 1.5893,
"step": 11700
},
{
"epoch": 0.48,
"learning_rate": 2.6598090452261307e-05,
"loss": 1.6123,
"step": 11800
},
{
"epoch": 0.49,
"learning_rate": 2.6567939698492462e-05,
"loss": 1.5975,
"step": 11900
},
{
"epoch": 0.49,
"learning_rate": 2.653778894472362e-05,
"loss": 1.5908,
"step": 12000
},
{
"epoch": 0.49,
"eval_bleu": 80.65979156040467,
"eval_gen_len": 16.216,
"eval_loss": 1.561901330947876,
"eval_meteor": 0.9045236466427484,
"eval_rouge1": 94.5279,
"eval_rouge2": 91.2374,
"eval_rougeL": 92.5949,
"eval_rougeLsum": 92.6157,
"eval_runtime": 25.5485,
"eval_samples_per_second": 19.571,
"eval_steps_per_second": 2.466,
"step": 12000
},
{
"epoch": 0.49,
"learning_rate": 2.6508241206030153e-05,
"loss": 1.6137,
"step": 12100
},
{
"epoch": 0.5,
"learning_rate": 2.6478090452261305e-05,
"loss": 2.1155,
"step": 12200
},
{
"epoch": 0.5,
"learning_rate": 2.644793969849246e-05,
"loss": 2.6991,
"step": 12300
},
{
"epoch": 0.51,
"learning_rate": 2.641778894472362e-05,
"loss": 2.8329,
"step": 12400
},
{
"epoch": 0.51,
"learning_rate": 2.6387638190954774e-05,
"loss": 2.8749,
"step": 12500
},
{
"epoch": 0.51,
"learning_rate": 2.635748743718593e-05,
"loss": 2.8538,
"step": 12600
},
{
"epoch": 0.52,
"learning_rate": 2.6327336683417085e-05,
"loss": 2.8509,
"step": 12700
},
{
"epoch": 0.52,
"learning_rate": 2.6297185929648243e-05,
"loss": 2.8037,
"step": 12800
},
{
"epoch": 0.53,
"learning_rate": 2.62670351758794e-05,
"loss": 2.8414,
"step": 12900
},
{
"epoch": 0.53,
"learning_rate": 2.6236884422110554e-05,
"loss": 2.8386,
"step": 13000
},
{
"epoch": 0.53,
"learning_rate": 2.620673366834171e-05,
"loss": 2.8152,
"step": 13100
},
{
"epoch": 0.54,
"learning_rate": 2.6176582914572868e-05,
"loss": 2.7805,
"step": 13200
},
{
"epoch": 0.54,
"learning_rate": 2.6146432160804023e-05,
"loss": 2.7591,
"step": 13300
},
{
"epoch": 0.55,
"learning_rate": 2.6116281407035175e-05,
"loss": 2.7748,
"step": 13400
},
{
"epoch": 0.55,
"learning_rate": 2.608613065326633e-05,
"loss": 2.773,
"step": 13500
},
{
"epoch": 0.56,
"learning_rate": 2.605597989949749e-05,
"loss": 2.7776,
"step": 13600
},
{
"epoch": 0.56,
"learning_rate": 2.602613065326633e-05,
"loss": 2.752,
"step": 13700
},
{
"epoch": 0.56,
"learning_rate": 2.599597989949749e-05,
"loss": 2.7383,
"step": 13800
},
{
"epoch": 0.57,
"learning_rate": 2.5965829145728645e-05,
"loss": 2.7375,
"step": 13900
},
{
"epoch": 0.57,
"learning_rate": 2.59356783919598e-05,
"loss": 2.7502,
"step": 14000
},
{
"epoch": 0.57,
"eval_bleu": 76.79295767834411,
"eval_gen_len": 16.252,
"eval_loss": 2.6775336265563965,
"eval_meteor": 0.890923129283697,
"eval_rouge1": 92.9876,
"eval_rouge2": 88.6491,
"eval_rougeL": 91.2913,
"eval_rougeLsum": 91.2855,
"eval_runtime": 25.3646,
"eval_samples_per_second": 19.712,
"eval_steps_per_second": 2.484,
"step": 14000
},
{
"epoch": 0.58,
"learning_rate": 2.5905527638190955e-05,
"loss": 2.7346,
"step": 14100
},
{
"epoch": 0.58,
"learning_rate": 2.5875376884422114e-05,
"loss": 2.7492,
"step": 14200
},
{
"epoch": 0.58,
"learning_rate": 2.584522613065327e-05,
"loss": 2.7273,
"step": 14300
},
{
"epoch": 0.59,
"learning_rate": 2.5815075376884424e-05,
"loss": 2.7279,
"step": 14400
},
{
"epoch": 0.59,
"learning_rate": 2.5784924623115576e-05,
"loss": 2.7241,
"step": 14500
},
{
"epoch": 0.6,
"learning_rate": 2.575477386934673e-05,
"loss": 2.7457,
"step": 14600
},
{
"epoch": 0.6,
"learning_rate": 2.572462311557789e-05,
"loss": 2.7347,
"step": 14700
},
{
"epoch": 0.6,
"learning_rate": 2.5694472361809045e-05,
"loss": 2.7168,
"step": 14800
},
{
"epoch": 0.61,
"learning_rate": 2.56643216080402e-05,
"loss": 2.7086,
"step": 14900
},
{
"epoch": 0.61,
"learning_rate": 2.5634170854271356e-05,
"loss": 2.7265,
"step": 15000
},
{
"epoch": 0.62,
"learning_rate": 2.5604020100502515e-05,
"loss": 2.7228,
"step": 15100
},
{
"epoch": 0.62,
"learning_rate": 2.557386934673367e-05,
"loss": 2.7089,
"step": 15200
},
{
"epoch": 0.62,
"learning_rate": 2.5543718592964825e-05,
"loss": 2.6962,
"step": 15300
},
{
"epoch": 0.63,
"learning_rate": 2.551356783919598e-05,
"loss": 2.7067,
"step": 15400
},
{
"epoch": 0.63,
"learning_rate": 2.548341708542714e-05,
"loss": 2.7016,
"step": 15500
},
{
"epoch": 0.64,
"learning_rate": 2.545326633165829e-05,
"loss": 2.6746,
"step": 15600
},
{
"epoch": 0.64,
"learning_rate": 2.5423115577889446e-05,
"loss": 2.6954,
"step": 15700
},
{
"epoch": 0.65,
"learning_rate": 2.53929648241206e-05,
"loss": 2.6972,
"step": 15800
},
{
"epoch": 0.65,
"learning_rate": 2.536281407035176e-05,
"loss": 2.6871,
"step": 15900
},
{
"epoch": 0.65,
"learning_rate": 2.5332663316582915e-05,
"loss": 2.7134,
"step": 16000
},
{
"epoch": 0.65,
"eval_bleu": 78.02775295125716,
"eval_gen_len": 16.242,
"eval_loss": 2.631150484085083,
"eval_meteor": 0.8935096956153317,
"eval_rouge1": 93.4121,
"eval_rouge2": 89.2356,
"eval_rougeL": 91.593,
"eval_rougeLsum": 91.6198,
"eval_runtime": 25.3474,
"eval_samples_per_second": 19.726,
"eval_steps_per_second": 2.485,
"step": 16000
},
{
"epoch": 0.66,
"learning_rate": 2.530251256281407e-05,
"loss": 2.7203,
"step": 16100
},
{
"epoch": 0.66,
"learning_rate": 2.5272361809045226e-05,
"loss": 2.7245,
"step": 16200
},
{
"epoch": 0.67,
"learning_rate": 2.5242211055276385e-05,
"loss": 2.7023,
"step": 16300
},
{
"epoch": 0.67,
"learning_rate": 2.521206030150754e-05,
"loss": 2.7224,
"step": 16400
},
{
"epoch": 0.67,
"learning_rate": 2.5181909547738695e-05,
"loss": 2.6802,
"step": 16500
},
{
"epoch": 0.68,
"learning_rate": 2.515175879396985e-05,
"loss": 2.6996,
"step": 16600
},
{
"epoch": 0.68,
"learning_rate": 2.5121608040201006e-05,
"loss": 2.681,
"step": 16700
},
{
"epoch": 0.69,
"learning_rate": 2.509145728643216e-05,
"loss": 2.6895,
"step": 16800
},
{
"epoch": 0.69,
"learning_rate": 2.5061306532663316e-05,
"loss": 2.698,
"step": 16900
},
{
"epoch": 0.69,
"learning_rate": 2.503115577889447e-05,
"loss": 2.6802,
"step": 17000
},
{
"epoch": 0.7,
"learning_rate": 2.5001005025125627e-05,
"loss": 2.6914,
"step": 17100
},
{
"epoch": 0.7,
"learning_rate": 2.4970854271356785e-05,
"loss": 2.7011,
"step": 17200
},
{
"epoch": 0.71,
"learning_rate": 2.494070351758794e-05,
"loss": 2.6659,
"step": 17300
},
{
"epoch": 0.71,
"learning_rate": 2.4910552763819096e-05,
"loss": 2.6905,
"step": 17400
},
{
"epoch": 0.71,
"learning_rate": 2.488040201005025e-05,
"loss": 2.6557,
"step": 17500
},
{
"epoch": 0.72,
"learning_rate": 2.485025125628141e-05,
"loss": 2.6648,
"step": 17600
},
{
"epoch": 0.72,
"learning_rate": 2.4820100502512565e-05,
"loss": 2.6954,
"step": 17700
},
{
"epoch": 0.73,
"learning_rate": 2.478994974874372e-05,
"loss": 2.682,
"step": 17800
},
{
"epoch": 0.73,
"learning_rate": 2.4759798994974872e-05,
"loss": 2.6767,
"step": 17900
},
{
"epoch": 0.73,
"learning_rate": 2.472964824120603e-05,
"loss": 2.6865,
"step": 18000
},
{
"epoch": 0.73,
"eval_bleu": 78.46041201212769,
"eval_gen_len": 16.228,
"eval_loss": 2.584897756576538,
"eval_meteor": 0.8947853545083831,
"eval_rouge1": 93.4999,
"eval_rouge2": 89.2925,
"eval_rougeL": 91.7008,
"eval_rougeLsum": 91.7289,
"eval_runtime": 24.8047,
"eval_samples_per_second": 20.157,
"eval_steps_per_second": 2.54,
"step": 18000
},
{
"epoch": 0.74,
"learning_rate": 2.4699497487437186e-05,
"loss": 2.6607,
"step": 18100
},
{
"epoch": 0.74,
"learning_rate": 2.466934673366834e-05,
"loss": 2.6692,
"step": 18200
},
{
"epoch": 0.75,
"learning_rate": 2.4639195979899497e-05,
"loss": 2.6633,
"step": 18300
},
{
"epoch": 0.75,
"learning_rate": 2.4609045226130655e-05,
"loss": 2.6655,
"step": 18400
},
{
"epoch": 0.76,
"learning_rate": 2.457889447236181e-05,
"loss": 2.6623,
"step": 18500
},
{
"epoch": 0.76,
"learning_rate": 2.4548743718592966e-05,
"loss": 2.6679,
"step": 18600
},
{
"epoch": 0.76,
"learning_rate": 2.451859296482412e-05,
"loss": 2.674,
"step": 18700
},
{
"epoch": 0.77,
"learning_rate": 2.448844221105528e-05,
"loss": 2.6719,
"step": 18800
},
{
"epoch": 0.77,
"learning_rate": 2.4458291457286435e-05,
"loss": 2.6832,
"step": 18900
},
{
"epoch": 0.78,
"learning_rate": 2.4428140703517587e-05,
"loss": 2.6776,
"step": 19000
},
{
"epoch": 0.78,
"learning_rate": 2.4397989949748742e-05,
"loss": 2.6682,
"step": 19100
},
{
"epoch": 0.78,
"learning_rate": 2.4367839195979898e-05,
"loss": 2.6956,
"step": 19200
},
{
"epoch": 0.79,
"learning_rate": 2.4337688442211056e-05,
"loss": 2.6586,
"step": 19300
},
{
"epoch": 0.79,
"learning_rate": 2.430753768844221e-05,
"loss": 2.6666,
"step": 19400
},
{
"epoch": 0.8,
"learning_rate": 2.4277386934673367e-05,
"loss": 2.6663,
"step": 19500
},
{
"epoch": 0.8,
"learning_rate": 2.4247236180904522e-05,
"loss": 2.6638,
"step": 19600
},
{
"epoch": 0.8,
"learning_rate": 2.421708542713568e-05,
"loss": 2.6112,
"step": 19700
},
{
"epoch": 0.81,
"learning_rate": 2.4186934673366836e-05,
"loss": 1.9824,
"step": 19800
},
{
"epoch": 0.81,
"learning_rate": 2.415708542713568e-05,
"loss": 1.7259,
"step": 19900
},
{
"epoch": 0.82,
"learning_rate": 2.4126934673366836e-05,
"loss": 2.2838,
"step": 20000
},
{
"epoch": 0.82,
"eval_bleu": 77.92153227866292,
"eval_gen_len": 16.232,
"eval_loss": 2.501798629760742,
"eval_meteor": 0.8944690987739373,
"eval_rouge1": 93.4893,
"eval_rouge2": 89.2497,
"eval_rougeL": 91.4623,
"eval_rougeLsum": 91.51,
"eval_runtime": 25.1905,
"eval_samples_per_second": 19.849,
"eval_steps_per_second": 2.501,
"step": 20000
},
{
"epoch": 0.82,
"learning_rate": 2.409678391959799e-05,
"loss": 2.6248,
"step": 20100
},
{
"epoch": 0.82,
"learning_rate": 2.4066633165829144e-05,
"loss": 2.6643,
"step": 20200
},
{
"epoch": 0.83,
"learning_rate": 2.4036482412060302e-05,
"loss": 2.642,
"step": 20300
},
{
"epoch": 0.83,
"learning_rate": 2.4006331658291458e-05,
"loss": 2.67,
"step": 20400
},
{
"epoch": 0.84,
"learning_rate": 2.3976180904522613e-05,
"loss": 2.6563,
"step": 20500
},
{
"epoch": 0.84,
"learning_rate": 2.3946030150753768e-05,
"loss": 2.6873,
"step": 20600
},
{
"epoch": 0.85,
"learning_rate": 2.3915879396984927e-05,
"loss": 2.65,
"step": 20700
},
{
"epoch": 0.85,
"learning_rate": 2.3885728643216082e-05,
"loss": 2.6635,
"step": 20800
},
{
"epoch": 0.85,
"learning_rate": 2.3855577889447237e-05,
"loss": 2.6443,
"step": 20900
},
{
"epoch": 0.86,
"learning_rate": 2.3825427135678393e-05,
"loss": 2.6504,
"step": 21000
},
{
"epoch": 0.86,
"learning_rate": 2.379527638190955e-05,
"loss": 2.6425,
"step": 21100
},
{
"epoch": 0.87,
"learning_rate": 2.3765125628140703e-05,
"loss": 2.6774,
"step": 21200
},
{
"epoch": 0.87,
"learning_rate": 2.373497487437186e-05,
"loss": 2.6575,
"step": 21300
},
{
"epoch": 0.87,
"learning_rate": 2.3704824120603014e-05,
"loss": 2.6542,
"step": 21400
},
{
"epoch": 0.88,
"learning_rate": 2.3674673366834172e-05,
"loss": 2.6508,
"step": 21500
},
{
"epoch": 0.88,
"learning_rate": 2.3644522613065328e-05,
"loss": 2.6648,
"step": 21600
},
{
"epoch": 0.89,
"learning_rate": 2.3614673366834173e-05,
"loss": 2.6623,
"step": 21700
},
{
"epoch": 0.89,
"learning_rate": 2.3584522613065328e-05,
"loss": 2.66,
"step": 21800
},
{
"epoch": 0.89,
"learning_rate": 2.3554371859296483e-05,
"loss": 2.6568,
"step": 21900
},
{
"epoch": 0.9,
"learning_rate": 2.352422110552764e-05,
"loss": 2.6591,
"step": 22000
},
{
"epoch": 0.9,
"eval_bleu": 79.49958192973479,
"eval_gen_len": 16.234,
"eval_loss": 2.5640199184417725,
"eval_meteor": 0.9025279208048376,
"eval_rouge1": 94.1823,
"eval_rouge2": 90.5552,
"eval_rougeL": 92.428,
"eval_rougeLsum": 92.4351,
"eval_runtime": 25.4062,
"eval_samples_per_second": 19.68,
"eval_steps_per_second": 2.48,
"step": 22000
},
{
"epoch": 0.9,
"learning_rate": 2.3494070351758794e-05,
"loss": 2.6622,
"step": 22100
},
{
"epoch": 0.91,
"learning_rate": 2.3463919597989953e-05,
"loss": 2.6638,
"step": 22200
},
{
"epoch": 0.91,
"learning_rate": 2.3433768844221108e-05,
"loss": 2.6521,
"step": 22300
},
{
"epoch": 0.91,
"learning_rate": 2.340361809045226e-05,
"loss": 2.657,
"step": 22400
},
{
"epoch": 0.92,
"learning_rate": 2.3373467336683415e-05,
"loss": 2.632,
"step": 22500
},
{
"epoch": 0.92,
"learning_rate": 2.3343316582914574e-05,
"loss": 2.6544,
"step": 22600
},
{
"epoch": 0.93,
"learning_rate": 2.331316582914573e-05,
"loss": 2.6429,
"step": 22700
},
{
"epoch": 0.93,
"learning_rate": 2.3283015075376884e-05,
"loss": 2.6534,
"step": 22800
},
{
"epoch": 0.93,
"learning_rate": 2.325286432160804e-05,
"loss": 2.6827,
"step": 22900
},
{
"epoch": 0.94,
"learning_rate": 2.3222713567839198e-05,
"loss": 2.6506,
"step": 23000
},
{
"epoch": 0.94,
"learning_rate": 2.3192562814070353e-05,
"loss": 2.6396,
"step": 23100
},
{
"epoch": 0.95,
"learning_rate": 2.316241206030151e-05,
"loss": 2.6777,
"step": 23200
},
{
"epoch": 0.95,
"learning_rate": 2.3132261306532664e-05,
"loss": 2.6548,
"step": 23300
},
{
"epoch": 0.96,
"learning_rate": 2.3102110552763823e-05,
"loss": 2.6735,
"step": 23400
},
{
"epoch": 0.96,
"learning_rate": 2.3071959798994974e-05,
"loss": 2.6713,
"step": 23500
},
{
"epoch": 0.96,
"learning_rate": 2.304180904522613e-05,
"loss": 2.6752,
"step": 23600
},
{
"epoch": 0.97,
"learning_rate": 2.3011658291457285e-05,
"loss": 2.6533,
"step": 23700
},
{
"epoch": 0.97,
"learning_rate": 2.2981507537688444e-05,
"loss": 2.6623,
"step": 23800
},
{
"epoch": 0.98,
"learning_rate": 2.29513567839196e-05,
"loss": 2.6596,
"step": 23900
},
{
"epoch": 0.98,
"learning_rate": 2.2921206030150754e-05,
"loss": 2.662,
"step": 24000
},
{
"epoch": 0.98,
"eval_bleu": 79.32255832423239,
"eval_gen_len": 16.256,
"eval_loss": 2.555393695831299,
"eval_meteor": 0.9010940538998614,
"eval_rouge1": 94.0079,
"eval_rouge2": 90.2547,
"eval_rougeL": 92.1916,
"eval_rougeLsum": 92.2075,
"eval_runtime": 25.4585,
"eval_samples_per_second": 19.64,
"eval_steps_per_second": 2.475,
"step": 24000
},
{
"epoch": 0.98,
"learning_rate": 2.289105527638191e-05,
"loss": 2.6431,
"step": 24100
},
{
"epoch": 0.99,
"learning_rate": 2.2860904522613068e-05,
"loss": 2.6577,
"step": 24200
},
{
"epoch": 0.99,
"learning_rate": 2.2830753768844223e-05,
"loss": 2.6452,
"step": 24300
},
{
"epoch": 1.0,
"learning_rate": 2.280060301507538e-05,
"loss": 2.6465,
"step": 24400
},
{
"epoch": 1.0,
"learning_rate": 2.2770452261306534e-05,
"loss": 2.6566,
"step": 24500
},
{
"epoch": 1.0,
"learning_rate": 2.2740301507537686e-05,
"loss": 2.6632,
"step": 24600
},
{
"epoch": 1.01,
"learning_rate": 2.2710150753768844e-05,
"loss": 2.6179,
"step": 24700
},
{
"epoch": 1.01,
"learning_rate": 2.268e-05,
"loss": 2.6355,
"step": 24800
},
{
"epoch": 1.02,
"learning_rate": 2.2649849246231155e-05,
"loss": 2.656,
"step": 24900
},
{
"epoch": 1.02,
"learning_rate": 2.261969849246231e-05,
"loss": 2.6485,
"step": 25000
},
{
"epoch": 1.02,
"learning_rate": 2.258954773869347e-05,
"loss": 2.6834,
"step": 25100
},
{
"epoch": 1.03,
"learning_rate": 2.2559396984924624e-05,
"loss": 2.6586,
"step": 25200
},
{
"epoch": 1.03,
"learning_rate": 2.252924623115578e-05,
"loss": 2.6245,
"step": 25300
},
{
"epoch": 1.04,
"learning_rate": 2.2499095477386935e-05,
"loss": 2.6439,
"step": 25400
},
{
"epoch": 1.04,
"learning_rate": 2.2468944723618093e-05,
"loss": 2.657,
"step": 25500
},
{
"epoch": 1.05,
"learning_rate": 2.243879396984925e-05,
"loss": 2.651,
"step": 25600
},
{
"epoch": 1.05,
"learning_rate": 2.2408643216080404e-05,
"loss": 2.5159,
"step": 25700
},
{
"epoch": 1.05,
"learning_rate": 2.2378492462311556e-05,
"loss": 1.8454,
"step": 25800
},
{
"epoch": 1.06,
"learning_rate": 2.2348341708542714e-05,
"loss": 1.7432,
"step": 25900
},
{
"epoch": 1.06,
"learning_rate": 2.2318793969849246e-05,
"loss": 1.8874,
"step": 26000
},
{
"epoch": 1.06,
"eval_bleu": 79.38237465310864,
"eval_gen_len": 16.272,
"eval_loss": 2.2466025352478027,
"eval_meteor": 0.9008152345684776,
"eval_rouge1": 94.0369,
"eval_rouge2": 90.1224,
"eval_rougeL": 91.9066,
"eval_rougeLsum": 91.944,
"eval_runtime": 24.8009,
"eval_samples_per_second": 20.161,
"eval_steps_per_second": 2.54,
"step": 26000
},
{
"epoch": 1.07,
"learning_rate": 2.22886432160804e-05,
"loss": 2.5847,
"step": 26100
},
{
"epoch": 1.07,
"learning_rate": 2.2258492462311557e-05,
"loss": 2.623,
"step": 26200
},
{
"epoch": 1.07,
"learning_rate": 2.2228341708542716e-05,
"loss": 2.665,
"step": 26300
},
{
"epoch": 1.08,
"learning_rate": 2.219819095477387e-05,
"loss": 2.647,
"step": 26400
},
{
"epoch": 1.08,
"learning_rate": 2.2168040201005026e-05,
"loss": 2.6653,
"step": 26500
},
{
"epoch": 1.09,
"learning_rate": 2.213788944723618e-05,
"loss": 2.6546,
"step": 26600
},
{
"epoch": 1.09,
"learning_rate": 2.210773869346734e-05,
"loss": 2.6471,
"step": 26700
},
{
"epoch": 1.09,
"learning_rate": 2.2077587939698495e-05,
"loss": 2.6871,
"step": 26800
},
{
"epoch": 1.1,
"learning_rate": 2.2047437185929647e-05,
"loss": 2.6601,
"step": 26900
},
{
"epoch": 1.1,
"learning_rate": 2.2017286432160802e-05,
"loss": 2.6541,
"step": 27000
},
{
"epoch": 1.11,
"learning_rate": 2.198713567839196e-05,
"loss": 2.6255,
"step": 27100
},
{
"epoch": 1.11,
"learning_rate": 2.1956984924623116e-05,
"loss": 2.6765,
"step": 27200
},
{
"epoch": 1.11,
"learning_rate": 2.192683417085427e-05,
"loss": 2.6448,
"step": 27300
},
{
"epoch": 1.12,
"learning_rate": 2.1896683417085427e-05,
"loss": 2.6667,
"step": 27400
},
{
"epoch": 1.12,
"learning_rate": 2.1866532663316582e-05,
"loss": 2.6544,
"step": 27500
},
{
"epoch": 1.13,
"learning_rate": 2.183638190954774e-05,
"loss": 2.6492,
"step": 27600
},
{
"epoch": 1.13,
"learning_rate": 2.1806231155778896e-05,
"loss": 2.6541,
"step": 27700
},
{
"epoch": 1.13,
"learning_rate": 2.177608040201005e-05,
"loss": 2.6601,
"step": 27800
},
{
"epoch": 1.14,
"learning_rate": 2.1745929648241207e-05,
"loss": 2.653,
"step": 27900
},
{
"epoch": 1.14,
"learning_rate": 2.1715778894472362e-05,
"loss": 2.6527,
"step": 28000
},
{
"epoch": 1.14,
"eval_bleu": 80.15532470386316,
"eval_gen_len": 16.238,
"eval_loss": 2.573094129562378,
"eval_meteor": 0.9042327142167804,
"eval_rouge1": 94.281,
"eval_rouge2": 90.8096,
"eval_rougeL": 92.559,
"eval_rougeLsum": 92.5681,
"eval_runtime": 25.5646,
"eval_samples_per_second": 19.558,
"eval_steps_per_second": 2.464,
"step": 28000
},
{
"epoch": 1.15,
"learning_rate": 2.1685628140703517e-05,
"loss": 2.6359,
"step": 28100
},
{
"epoch": 1.15,
"learning_rate": 2.1655477386934672e-05,
"loss": 2.6562,
"step": 28200
},
{
"epoch": 1.16,
"learning_rate": 2.1625326633165828e-05,
"loss": 2.6518,
"step": 28300
},
{
"epoch": 1.16,
"learning_rate": 2.1595175879396986e-05,
"loss": 2.6403,
"step": 28400
},
{
"epoch": 1.16,
"learning_rate": 2.156502512562814e-05,
"loss": 2.658,
"step": 28500
},
{
"epoch": 1.17,
"learning_rate": 2.1534874371859297e-05,
"loss": 2.6583,
"step": 28600
},
{
"epoch": 1.17,
"learning_rate": 2.1504723618090452e-05,
"loss": 2.6719,
"step": 28700
},
{
"epoch": 1.18,
"learning_rate": 2.147457286432161e-05,
"loss": 2.6451,
"step": 28800
},
{
"epoch": 1.18,
"learning_rate": 2.1444422110552766e-05,
"loss": 2.6701,
"step": 28900
},
{
"epoch": 1.18,
"learning_rate": 2.141427135678392e-05,
"loss": 2.6666,
"step": 29000
},
{
"epoch": 1.19,
"learning_rate": 2.1384120603015073e-05,
"loss": 2.6536,
"step": 29100
},
{
"epoch": 1.19,
"learning_rate": 2.1353969849246232e-05,
"loss": 2.6423,
"step": 29200
},
{
"epoch": 1.2,
"learning_rate": 2.1323819095477387e-05,
"loss": 2.6586,
"step": 29300
},
{
"epoch": 1.2,
"learning_rate": 2.1293668341708542e-05,
"loss": 2.6554,
"step": 29400
},
{
"epoch": 1.2,
"learning_rate": 2.1263517587939698e-05,
"loss": 2.6623,
"step": 29500
},
{
"epoch": 1.21,
"learning_rate": 2.1233366834170856e-05,
"loss": 2.6749,
"step": 29600
},
{
"epoch": 1.21,
"learning_rate": 2.120321608040201e-05,
"loss": 2.6446,
"step": 29700
},
{
"epoch": 1.22,
"learning_rate": 2.1173065326633167e-05,
"loss": 2.6764,
"step": 29800
},
{
"epoch": 1.22,
"learning_rate": 2.1142914572864322e-05,
"loss": 2.6522,
"step": 29900
},
{
"epoch": 1.22,
"learning_rate": 2.1112763819095477e-05,
"loss": 2.6618,
"step": 30000
},
{
"epoch": 1.22,
"eval_bleu": 79.74215483171986,
"eval_gen_len": 16.236,
"eval_loss": 2.4326839447021484,
"eval_meteor": 0.9044505502074225,
"eval_rouge1": 94.3369,
"eval_rouge2": 90.6586,
"eval_rougeL": 92.1744,
"eval_rougeLsum": 92.1895,
"eval_runtime": 25.4582,
"eval_samples_per_second": 19.64,
"eval_steps_per_second": 2.475,
"step": 30000
},
{
"epoch": 1.23,
"learning_rate": 2.1082613065326636e-05,
"loss": 1.977,
"step": 30100
},
{
"epoch": 1.23,
"learning_rate": 2.105246231155779e-05,
"loss": 1.731,
"step": 30200
},
{
"epoch": 1.24,
"learning_rate": 2.1022311557788943e-05,
"loss": 1.7164,
"step": 30300
},
{
"epoch": 1.24,
"learning_rate": 2.09921608040201e-05,
"loss": 1.6936,
"step": 30400
},
{
"epoch": 1.25,
"learning_rate": 2.0962010050251257e-05,
"loss": 1.6888,
"step": 30500
},
{
"epoch": 1.25,
"learning_rate": 2.0931859296482412e-05,
"loss": 1.7005,
"step": 30600
},
{
"epoch": 1.25,
"learning_rate": 2.0901708542713568e-05,
"loss": 1.6815,
"step": 30700
},
{
"epoch": 1.26,
"learning_rate": 2.0871557788944723e-05,
"loss": 1.6894,
"step": 30800
},
{
"epoch": 1.26,
"learning_rate": 2.0841708542713568e-05,
"loss": 1.6858,
"step": 30900
},
{
"epoch": 1.27,
"learning_rate": 2.0811557788944723e-05,
"loss": 2.3491,
"step": 31000
},
{
"epoch": 1.27,
"learning_rate": 2.0781407035175882e-05,
"loss": 2.6593,
"step": 31100
},
{
"epoch": 1.27,
"learning_rate": 2.0751256281407037e-05,
"loss": 2.6468,
"step": 31200
},
{
"epoch": 1.28,
"learning_rate": 2.0721407035175883e-05,
"loss": 2.6608,
"step": 31300
},
{
"epoch": 1.28,
"learning_rate": 2.069155778894472e-05,
"loss": 2.6587,
"step": 31400
},
{
"epoch": 1.29,
"learning_rate": 2.066140703517588e-05,
"loss": 2.6772,
"step": 31500
},
{
"epoch": 1.29,
"learning_rate": 2.0631256281407035e-05,
"loss": 2.6861,
"step": 31600
},
{
"epoch": 1.29,
"learning_rate": 2.060140703517588e-05,
"loss": 2.6909,
"step": 31700
},
{
"epoch": 1.3,
"learning_rate": 2.0571256281407036e-05,
"loss": 2.7178,
"step": 31800
},
{
"epoch": 1.3,
"learning_rate": 2.054110552763819e-05,
"loss": 2.7448,
"step": 31900
},
{
"epoch": 1.31,
"learning_rate": 2.0510954773869346e-05,
"loss": 2.7368,
"step": 32000
},
{
"epoch": 1.31,
"eval_bleu": 79.88380574425997,
"eval_gen_len": 16.228,
"eval_loss": 2.729801893234253,
"eval_meteor": 0.9013053514191987,
"eval_rouge1": 94.1293,
"eval_rouge2": 90.4052,
"eval_rougeL": 92.4025,
"eval_rougeLsum": 92.4078,
"eval_runtime": 25.6909,
"eval_samples_per_second": 19.462,
"eval_steps_per_second": 2.452,
"step": 32000
},
{
"epoch": 1.31,
"learning_rate": 2.048110552763819e-05,
"loss": 2.7485,
"step": 32100
},
{
"epoch": 1.31,
"learning_rate": 2.0450954773869347e-05,
"loss": 2.7678,
"step": 32200
},
{
"epoch": 1.32,
"learning_rate": 2.0420804020100506e-05,
"loss": 2.7518,
"step": 32300
},
{
"epoch": 1.32,
"learning_rate": 2.039065326633166e-05,
"loss": 2.7449,
"step": 32400
},
{
"epoch": 1.33,
"learning_rate": 2.0360502512562813e-05,
"loss": 2.7359,
"step": 32500
},
{
"epoch": 1.33,
"learning_rate": 2.0330351758793968e-05,
"loss": 2.7379,
"step": 32600
},
{
"epoch": 1.33,
"learning_rate": 2.0300201005025127e-05,
"loss": 2.743,
"step": 32700
},
{
"epoch": 1.34,
"learning_rate": 2.0270050251256282e-05,
"loss": 2.73,
"step": 32800
},
{
"epoch": 1.34,
"learning_rate": 2.0240201005025127e-05,
"loss": 2.7259,
"step": 32900
},
{
"epoch": 1.35,
"learning_rate": 2.0210050251256282e-05,
"loss": 2.6952,
"step": 33000
},
{
"epoch": 1.35,
"learning_rate": 2.0179899497487438e-05,
"loss": 2.6952,
"step": 33100
},
{
"epoch": 1.36,
"learning_rate": 2.0149748743718593e-05,
"loss": 2.6855,
"step": 33200
},
{
"epoch": 1.36,
"learning_rate": 2.011959798994975e-05,
"loss": 2.7112,
"step": 33300
},
{
"epoch": 1.36,
"learning_rate": 2.0089447236180907e-05,
"loss": 2.6988,
"step": 33400
},
{
"epoch": 1.37,
"learning_rate": 2.0059296482412062e-05,
"loss": 2.6994,
"step": 33500
},
{
"epoch": 1.37,
"learning_rate": 2.0029145728643214e-05,
"loss": 2.6774,
"step": 33600
},
{
"epoch": 1.38,
"learning_rate": 1.9998994974874373e-05,
"loss": 2.6856,
"step": 33700
},
{
"epoch": 1.38,
"learning_rate": 1.9968844221105528e-05,
"loss": 2.6875,
"step": 33800
},
{
"epoch": 1.38,
"learning_rate": 1.9938693467336683e-05,
"loss": 2.6905,
"step": 33900
},
{
"epoch": 1.39,
"learning_rate": 1.990854271356784e-05,
"loss": 2.6858,
"step": 34000
},
{
"epoch": 1.39,
"eval_bleu": 80.43692634867155,
"eval_gen_len": 16.234,
"eval_loss": 2.6981565952301025,
"eval_meteor": 0.9051906006959359,
"eval_rouge1": 94.4532,
"eval_rouge2": 91.0106,
"eval_rougeL": 92.3314,
"eval_rougeLsum": 92.3438,
"eval_runtime": 24.9557,
"eval_samples_per_second": 20.035,
"eval_steps_per_second": 2.524,
"step": 34000
},
{
"epoch": 1.39,
"learning_rate": 1.9878391959798994e-05,
"loss": 2.6896,
"step": 34100
},
{
"epoch": 1.4,
"learning_rate": 1.9848241206030152e-05,
"loss": 2.688,
"step": 34200
},
{
"epoch": 1.4,
"learning_rate": 1.9818090452261308e-05,
"loss": 2.6817,
"step": 34300
},
{
"epoch": 1.4,
"learning_rate": 1.9787939698492463e-05,
"loss": 2.6833,
"step": 34400
},
{
"epoch": 1.41,
"learning_rate": 1.9758090452261308e-05,
"loss": 2.6626,
"step": 34500
},
{
"epoch": 1.41,
"learning_rate": 1.9727939698492464e-05,
"loss": 2.6504,
"step": 34600
},
{
"epoch": 1.42,
"learning_rate": 1.9697788944723615e-05,
"loss": 2.6603,
"step": 34700
},
{
"epoch": 1.42,
"learning_rate": 1.9667638190954774e-05,
"loss": 2.6381,
"step": 34800
},
{
"epoch": 1.42,
"learning_rate": 1.963748743718593e-05,
"loss": 2.6358,
"step": 34900
},
{
"epoch": 1.43,
"learning_rate": 1.9607336683417085e-05,
"loss": 2.6053,
"step": 35000
},
{
"epoch": 1.43,
"learning_rate": 1.957718592964824e-05,
"loss": 2.6376,
"step": 35100
},
{
"epoch": 1.44,
"learning_rate": 1.95470351758794e-05,
"loss": 2.619,
"step": 35200
},
{
"epoch": 1.44,
"learning_rate": 1.9516884422110554e-05,
"loss": 2.6132,
"step": 35300
},
{
"epoch": 1.45,
"learning_rate": 1.948673366834171e-05,
"loss": 2.6241,
"step": 35400
},
{
"epoch": 1.45,
"learning_rate": 1.9456582914572864e-05,
"loss": 2.6128,
"step": 35500
},
{
"epoch": 1.45,
"learning_rate": 1.9426432160804023e-05,
"loss": 2.6169,
"step": 35600
},
{
"epoch": 1.46,
"learning_rate": 1.9396281407035178e-05,
"loss": 2.6206,
"step": 35700
},
{
"epoch": 1.46,
"learning_rate": 1.936613065326633e-05,
"loss": 2.5968,
"step": 35800
},
{
"epoch": 1.47,
"learning_rate": 1.9335979899497485e-05,
"loss": 2.6079,
"step": 35900
},
{
"epoch": 1.47,
"learning_rate": 1.9305829145728644e-05,
"loss": 2.6236,
"step": 36000
},
{
"epoch": 1.47,
"eval_bleu": 80.05686051541119,
"eval_gen_len": 16.246,
"eval_loss": 2.579113245010376,
"eval_meteor": 0.9032701849367213,
"eval_rouge1": 94.3329,
"eval_rouge2": 90.4972,
"eval_rougeL": 92.1838,
"eval_rougeLsum": 92.1803,
"eval_runtime": 25.6081,
"eval_samples_per_second": 19.525,
"eval_steps_per_second": 2.46,
"step": 36000
},
{
"epoch": 1.47,
"learning_rate": 1.92756783919598e-05,
"loss": 2.6189,
"step": 36100
},
{
"epoch": 1.48,
"learning_rate": 1.9245527638190955e-05,
"loss": 2.6045,
"step": 36200
},
{
"epoch": 1.48,
"learning_rate": 1.921537688442211e-05,
"loss": 2.6162,
"step": 36300
},
{
"epoch": 1.49,
"learning_rate": 1.918522613065327e-05,
"loss": 2.6158,
"step": 36400
},
{
"epoch": 1.49,
"learning_rate": 1.9155075376884424e-05,
"loss": 2.6142,
"step": 36500
},
{
"epoch": 1.49,
"learning_rate": 1.912492462311558e-05,
"loss": 2.6383,
"step": 36600
},
{
"epoch": 1.5,
"learning_rate": 1.9095075376884424e-05,
"loss": 2.6594,
"step": 36700
},
{
"epoch": 1.5,
"learning_rate": 1.906522613065327e-05,
"loss": 2.6318,
"step": 36800
},
{
"epoch": 1.51,
"learning_rate": 1.9035075376884425e-05,
"loss": 2.6514,
"step": 36900
},
{
"epoch": 1.51,
"learning_rate": 1.9004924623115577e-05,
"loss": 2.6613,
"step": 37000
},
{
"epoch": 1.51,
"learning_rate": 1.8974773869346732e-05,
"loss": 2.6357,
"step": 37100
},
{
"epoch": 1.52,
"learning_rate": 1.894462311557789e-05,
"loss": 2.6453,
"step": 37200
},
{
"epoch": 1.52,
"learning_rate": 1.8914773869346733e-05,
"loss": 2.6371,
"step": 37300
},
{
"epoch": 1.53,
"learning_rate": 1.888462311557789e-05,
"loss": 2.6155,
"step": 37400
},
{
"epoch": 1.53,
"learning_rate": 1.8854773869346733e-05,
"loss": 2.6097,
"step": 37500
},
{
"epoch": 1.53,
"learning_rate": 1.882522613065327e-05,
"loss": 2.6346,
"step": 37600
},
{
"epoch": 1.54,
"learning_rate": 1.8795075376884424e-05,
"loss": 2.6374,
"step": 37700
},
{
"epoch": 1.54,
"learning_rate": 1.876492462311558e-05,
"loss": 2.6266,
"step": 37800
},
{
"epoch": 1.55,
"learning_rate": 1.8734773869346734e-05,
"loss": 2.6392,
"step": 37900
},
{
"epoch": 1.55,
"learning_rate": 1.870462311557789e-05,
"loss": 2.6279,
"step": 38000
},
{
"epoch": 1.55,
"eval_bleu": 80.26404550464542,
"eval_gen_len": 16.212,
"eval_loss": 2.613689422607422,
"eval_meteor": 0.9019056042294404,
"eval_rouge1": 94.1522,
"eval_rouge2": 90.7421,
"eval_rougeL": 92.4197,
"eval_rougeLsum": 92.443,
"eval_runtime": 25.3778,
"eval_samples_per_second": 19.702,
"eval_steps_per_second": 2.482,
"step": 38000
}
],
"max_steps": 100000,
"num_train_epochs": 5,
"total_flos": 1.8512320510138778e+17,
"trial_name": null,
"trial_params": null
}