|
{ |
|
"best_metric": 92.6157, |
|
"best_model_checkpoint": "qa2claim-base/checkpoint-12000", |
|
"epoch": 1.5512736773350753, |
|
"global_step": 38000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 3.2255, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 3.2465, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.182e-05, |
|
"loss": 2.6401, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.782e-05, |
|
"loss": 2.2893, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3820000000000002e-05, |
|
"loss": 2.1163, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.982e-05, |
|
"loss": 2.0047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.997105527638191e-05, |
|
"loss": 1.9368, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9940904522613068e-05, |
|
"loss": 1.9029, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9910753768844223e-05, |
|
"loss": 1.8627, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.988090452261307e-05, |
|
"loss": 1.8403, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9850753768844224e-05, |
|
"loss": 1.835, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9820603015075376e-05, |
|
"loss": 1.8027, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.979045226130653e-05, |
|
"loss": 1.8176, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.976030150753769e-05, |
|
"loss": 1.7983, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9730150753768845e-05, |
|
"loss": 1.778, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.97e-05, |
|
"loss": 1.7764, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9669849246231156e-05, |
|
"loss": 1.7599, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9639698492462314e-05, |
|
"loss": 1.7572, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.960954773869347e-05, |
|
"loss": 1.7773, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9579396984924625e-05, |
|
"loss": 1.7301, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.954924623115578e-05, |
|
"loss": 1.7536, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 75.15293608723576, |
|
"eval_gen_len": 16.288, |
|
"eval_loss": 1.6770071983337402, |
|
"eval_meteor": 0.8848959776023978, |
|
"eval_rouge1": 92.6205, |
|
"eval_rouge2": 86.7136, |
|
"eval_rougeL": 89.2742, |
|
"eval_rougeLsum": 89.2914, |
|
"eval_runtime": 29.1055, |
|
"eval_samples_per_second": 17.179, |
|
"eval_steps_per_second": 2.165, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.951909547738694e-05, |
|
"loss": 1.7313, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.948894472361809e-05, |
|
"loss": 1.7308, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9458793969849246e-05, |
|
"loss": 1.7186, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.94286432160804e-05, |
|
"loss": 1.7262, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9398492462311556e-05, |
|
"loss": 1.702, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9368341708542715e-05, |
|
"loss": 1.7107, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.933819095477387e-05, |
|
"loss": 1.7187, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9308040201005025e-05, |
|
"loss": 1.707, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.927788944723618e-05, |
|
"loss": 1.7019, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.924773869346734e-05, |
|
"loss": 1.6977, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9217587939698495e-05, |
|
"loss": 1.7047, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.918743718592965e-05, |
|
"loss": 1.6943, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9157286432160802e-05, |
|
"loss": 1.6795, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.912713567839196e-05, |
|
"loss": 1.6781, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9096984924623116e-05, |
|
"loss": 1.7098, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.906683417085427e-05, |
|
"loss": 1.6774, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.9036683417085426e-05, |
|
"loss": 1.6911, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9006532663316585e-05, |
|
"loss": 1.6933, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8976683417085427e-05, |
|
"loss": 1.6742, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8946532663316586e-05, |
|
"loss": 1.6804, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 77.38072312145013, |
|
"eval_gen_len": 16.306, |
|
"eval_loss": 1.6234748363494873, |
|
"eval_meteor": 0.892630886417938, |
|
"eval_rouge1": 93.2399, |
|
"eval_rouge2": 88.3086, |
|
"eval_rougeL": 90.3444, |
|
"eval_rougeLsum": 90.3735, |
|
"eval_runtime": 25.4106, |
|
"eval_samples_per_second": 19.677, |
|
"eval_steps_per_second": 2.479, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.891638190954774e-05, |
|
"loss": 1.673, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8886231155778896e-05, |
|
"loss": 1.6754, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.885608040201005e-05, |
|
"loss": 1.6668, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8825929648241207e-05, |
|
"loss": 1.6759, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8795778894472362e-05, |
|
"loss": 1.6526, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8765628140703517e-05, |
|
"loss": 1.6654, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8735477386934672e-05, |
|
"loss": 1.6466, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.870532663316583e-05, |
|
"loss": 1.6789, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.8675175879396986e-05, |
|
"loss": 1.6583, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.864502512562814e-05, |
|
"loss": 1.6639, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.8614874371859297e-05, |
|
"loss": 1.6464, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.8584723618090452e-05, |
|
"loss": 1.6654, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.8554874371859297e-05, |
|
"loss": 1.6364, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.8525025125628143e-05, |
|
"loss": 1.6325, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.8494874371859298e-05, |
|
"loss": 1.6496, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.846502512562814e-05, |
|
"loss": 1.6542, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.8434874371859295e-05, |
|
"loss": 1.6418, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.8404723618090454e-05, |
|
"loss": 1.6421, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.8374874371859296e-05, |
|
"loss": 1.6427, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.8344723618090454e-05, |
|
"loss": 1.6423, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bleu": 78.69642597526826, |
|
"eval_gen_len": 16.314, |
|
"eval_loss": 1.593881607055664, |
|
"eval_meteor": 0.8979075563574866, |
|
"eval_rouge1": 93.7314, |
|
"eval_rouge2": 89.4698, |
|
"eval_rougeL": 91.3989, |
|
"eval_rougeLsum": 91.4076, |
|
"eval_runtime": 25.6849, |
|
"eval_samples_per_second": 19.467, |
|
"eval_steps_per_second": 2.453, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.831457286432161e-05, |
|
"loss": 1.6491, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.8284422110552765e-05, |
|
"loss": 1.6533, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.825427135678392e-05, |
|
"loss": 1.6501, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.8224120603015075e-05, |
|
"loss": 1.6463, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.8193969849246234e-05, |
|
"loss": 1.6341, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.816381909547739e-05, |
|
"loss": 1.6424, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.813366834170854e-05, |
|
"loss": 1.6399, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.8103517587939697e-05, |
|
"loss": 1.6362, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.8073366834170855e-05, |
|
"loss": 1.6357, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.804321608040201e-05, |
|
"loss": 1.6284, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.8013065326633166e-05, |
|
"loss": 1.6333, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.798291457286432e-05, |
|
"loss": 1.6414, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.795276381909548e-05, |
|
"loss": 1.6166, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.7922613065326635e-05, |
|
"loss": 1.6368, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.789246231155779e-05, |
|
"loss": 1.6313, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7862311557788945e-05, |
|
"loss": 1.6241, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7832160804020104e-05, |
|
"loss": 1.619, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.7802010050251256e-05, |
|
"loss": 1.6339, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.777185929648241e-05, |
|
"loss": 1.6152, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.7741708542713567e-05, |
|
"loss": 1.6253, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_bleu": 80.00978050192599, |
|
"eval_gen_len": 16.226, |
|
"eval_loss": 1.5748662948608398, |
|
"eval_meteor": 0.9021016869942787, |
|
"eval_rouge1": 94.2752, |
|
"eval_rouge2": 90.7004, |
|
"eval_rougeL": 92.2246, |
|
"eval_rougeLsum": 92.2489, |
|
"eval_runtime": 25.2021, |
|
"eval_samples_per_second": 19.84, |
|
"eval_steps_per_second": 2.5, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.7711557788944725e-05, |
|
"loss": 1.6291, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.768140703517588e-05, |
|
"loss": 1.6288, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.7651557788944726e-05, |
|
"loss": 1.6158, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.762140703517588e-05, |
|
"loss": 1.6285, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.7591256281407036e-05, |
|
"loss": 1.6231, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.756110552763819e-05, |
|
"loss": 1.6237, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.753095477386935e-05, |
|
"loss": 1.6059, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.7500804020100505e-05, |
|
"loss": 1.6094, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.7470954773869347e-05, |
|
"loss": 1.6092, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.744110552763819e-05, |
|
"loss": 1.6162, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.7410954773869348e-05, |
|
"loss": 1.6059, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.7380804020100503e-05, |
|
"loss": 1.6135, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.735065326633166e-05, |
|
"loss": 1.6144, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.7320804020100504e-05, |
|
"loss": 1.6288, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.729065326633166e-05, |
|
"loss": 1.6098, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.7260804020100504e-05, |
|
"loss": 1.617, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.723065326633166e-05, |
|
"loss": 1.6079, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.7200502512562815e-05, |
|
"loss": 1.611, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.7170351758793974e-05, |
|
"loss": 1.5935, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.7140201005025125e-05, |
|
"loss": 1.5965, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_bleu": 80.29588431741519, |
|
"eval_gen_len": 16.244, |
|
"eval_loss": 1.5651723146438599, |
|
"eval_meteor": 0.9028810347439424, |
|
"eval_rouge1": 94.3213, |
|
"eval_rouge2": 90.869, |
|
"eval_rougeL": 92.4221, |
|
"eval_rougeLsum": 92.4429, |
|
"eval_runtime": 25.551, |
|
"eval_samples_per_second": 19.569, |
|
"eval_steps_per_second": 2.466, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.711035175879397e-05, |
|
"loss": 1.611, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.7080201005025126e-05, |
|
"loss": 1.6066, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.705005025125628e-05, |
|
"loss": 1.6028, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.7019899497487437e-05, |
|
"loss": 1.6192, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.6989748743718595e-05, |
|
"loss": 1.6114, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.695959798994975e-05, |
|
"loss": 1.6043, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.6929447236180906e-05, |
|
"loss": 1.5949, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.689929648241206e-05, |
|
"loss": 1.6017, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.6869145728643216e-05, |
|
"loss": 1.6001, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.683929648241206e-05, |
|
"loss": 1.5983, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.6809145728643213e-05, |
|
"loss": 1.5914, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6778994974874372e-05, |
|
"loss": 1.5997, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6748844221105527e-05, |
|
"loss": 1.5962, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6718693467336683e-05, |
|
"loss": 1.5946, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6688542713567838e-05, |
|
"loss": 1.5969, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6658391959798997e-05, |
|
"loss": 1.6132, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6628241206030152e-05, |
|
"loss": 1.5893, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6598090452261307e-05, |
|
"loss": 1.6123, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.6567939698492462e-05, |
|
"loss": 1.5975, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.653778894472362e-05, |
|
"loss": 1.5908, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_bleu": 80.65979156040467, |
|
"eval_gen_len": 16.216, |
|
"eval_loss": 1.561901330947876, |
|
"eval_meteor": 0.9045236466427484, |
|
"eval_rouge1": 94.5279, |
|
"eval_rouge2": 91.2374, |
|
"eval_rougeL": 92.5949, |
|
"eval_rougeLsum": 92.6157, |
|
"eval_runtime": 25.5485, |
|
"eval_samples_per_second": 19.571, |
|
"eval_steps_per_second": 2.466, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.6508241206030153e-05, |
|
"loss": 1.6137, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.6478090452261305e-05, |
|
"loss": 2.1155, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.644793969849246e-05, |
|
"loss": 2.6991, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.641778894472362e-05, |
|
"loss": 2.8329, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.6387638190954774e-05, |
|
"loss": 2.8749, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.635748743718593e-05, |
|
"loss": 2.8538, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6327336683417085e-05, |
|
"loss": 2.8509, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6297185929648243e-05, |
|
"loss": 2.8037, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.62670351758794e-05, |
|
"loss": 2.8414, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6236884422110554e-05, |
|
"loss": 2.8386, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.620673366834171e-05, |
|
"loss": 2.8152, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6176582914572868e-05, |
|
"loss": 2.7805, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6146432160804023e-05, |
|
"loss": 2.7591, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.6116281407035175e-05, |
|
"loss": 2.7748, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.608613065326633e-05, |
|
"loss": 2.773, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.605597989949749e-05, |
|
"loss": 2.7776, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.602613065326633e-05, |
|
"loss": 2.752, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.599597989949749e-05, |
|
"loss": 2.7383, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.5965829145728645e-05, |
|
"loss": 2.7375, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.59356783919598e-05, |
|
"loss": 2.7502, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_bleu": 76.79295767834411, |
|
"eval_gen_len": 16.252, |
|
"eval_loss": 2.6775336265563965, |
|
"eval_meteor": 0.890923129283697, |
|
"eval_rouge1": 92.9876, |
|
"eval_rouge2": 88.6491, |
|
"eval_rougeL": 91.2913, |
|
"eval_rougeLsum": 91.2855, |
|
"eval_runtime": 25.3646, |
|
"eval_samples_per_second": 19.712, |
|
"eval_steps_per_second": 2.484, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.5905527638190955e-05, |
|
"loss": 2.7346, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.5875376884422114e-05, |
|
"loss": 2.7492, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.584522613065327e-05, |
|
"loss": 2.7273, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.5815075376884424e-05, |
|
"loss": 2.7279, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.5784924623115576e-05, |
|
"loss": 2.7241, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.575477386934673e-05, |
|
"loss": 2.7457, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.572462311557789e-05, |
|
"loss": 2.7347, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.5694472361809045e-05, |
|
"loss": 2.7168, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.56643216080402e-05, |
|
"loss": 2.7086, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.5634170854271356e-05, |
|
"loss": 2.7265, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.5604020100502515e-05, |
|
"loss": 2.7228, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.557386934673367e-05, |
|
"loss": 2.7089, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.5543718592964825e-05, |
|
"loss": 2.6962, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.551356783919598e-05, |
|
"loss": 2.7067, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.548341708542714e-05, |
|
"loss": 2.7016, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.545326633165829e-05, |
|
"loss": 2.6746, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.5423115577889446e-05, |
|
"loss": 2.6954, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.53929648241206e-05, |
|
"loss": 2.6972, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.536281407035176e-05, |
|
"loss": 2.6871, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.5332663316582915e-05, |
|
"loss": 2.7134, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_bleu": 78.02775295125716, |
|
"eval_gen_len": 16.242, |
|
"eval_loss": 2.631150484085083, |
|
"eval_meteor": 0.8935096956153317, |
|
"eval_rouge1": 93.4121, |
|
"eval_rouge2": 89.2356, |
|
"eval_rougeL": 91.593, |
|
"eval_rougeLsum": 91.6198, |
|
"eval_runtime": 25.3474, |
|
"eval_samples_per_second": 19.726, |
|
"eval_steps_per_second": 2.485, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.530251256281407e-05, |
|
"loss": 2.7203, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.5272361809045226e-05, |
|
"loss": 2.7245, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.5242211055276385e-05, |
|
"loss": 2.7023, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.521206030150754e-05, |
|
"loss": 2.7224, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.5181909547738695e-05, |
|
"loss": 2.6802, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.515175879396985e-05, |
|
"loss": 2.6996, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.5121608040201006e-05, |
|
"loss": 2.681, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.509145728643216e-05, |
|
"loss": 2.6895, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.5061306532663316e-05, |
|
"loss": 2.698, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.503115577889447e-05, |
|
"loss": 2.6802, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.5001005025125627e-05, |
|
"loss": 2.6914, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.4970854271356785e-05, |
|
"loss": 2.7011, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.494070351758794e-05, |
|
"loss": 2.6659, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.4910552763819096e-05, |
|
"loss": 2.6905, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.488040201005025e-05, |
|
"loss": 2.6557, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.485025125628141e-05, |
|
"loss": 2.6648, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.4820100502512565e-05, |
|
"loss": 2.6954, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.478994974874372e-05, |
|
"loss": 2.682, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.4759798994974872e-05, |
|
"loss": 2.6767, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.472964824120603e-05, |
|
"loss": 2.6865, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_bleu": 78.46041201212769, |
|
"eval_gen_len": 16.228, |
|
"eval_loss": 2.584897756576538, |
|
"eval_meteor": 0.8947853545083831, |
|
"eval_rouge1": 93.4999, |
|
"eval_rouge2": 89.2925, |
|
"eval_rougeL": 91.7008, |
|
"eval_rougeLsum": 91.7289, |
|
"eval_runtime": 24.8047, |
|
"eval_samples_per_second": 20.157, |
|
"eval_steps_per_second": 2.54, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.4699497487437186e-05, |
|
"loss": 2.6607, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.466934673366834e-05, |
|
"loss": 2.6692, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.4639195979899497e-05, |
|
"loss": 2.6633, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.4609045226130655e-05, |
|
"loss": 2.6655, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.457889447236181e-05, |
|
"loss": 2.6623, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.4548743718592966e-05, |
|
"loss": 2.6679, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.451859296482412e-05, |
|
"loss": 2.674, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.448844221105528e-05, |
|
"loss": 2.6719, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.4458291457286435e-05, |
|
"loss": 2.6832, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4428140703517587e-05, |
|
"loss": 2.6776, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4397989949748742e-05, |
|
"loss": 2.6682, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.4367839195979898e-05, |
|
"loss": 2.6956, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.4337688442211056e-05, |
|
"loss": 2.6586, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.430753768844221e-05, |
|
"loss": 2.6666, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.4277386934673367e-05, |
|
"loss": 2.6663, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.4247236180904522e-05, |
|
"loss": 2.6638, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.421708542713568e-05, |
|
"loss": 2.6112, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.4186934673366836e-05, |
|
"loss": 1.9824, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.415708542713568e-05, |
|
"loss": 1.7259, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.4126934673366836e-05, |
|
"loss": 2.2838, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_bleu": 77.92153227866292, |
|
"eval_gen_len": 16.232, |
|
"eval_loss": 2.501798629760742, |
|
"eval_meteor": 0.8944690987739373, |
|
"eval_rouge1": 93.4893, |
|
"eval_rouge2": 89.2497, |
|
"eval_rougeL": 91.4623, |
|
"eval_rougeLsum": 91.51, |
|
"eval_runtime": 25.1905, |
|
"eval_samples_per_second": 19.849, |
|
"eval_steps_per_second": 2.501, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.409678391959799e-05, |
|
"loss": 2.6248, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.4066633165829144e-05, |
|
"loss": 2.6643, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.4036482412060302e-05, |
|
"loss": 2.642, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.4006331658291458e-05, |
|
"loss": 2.67, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.3976180904522613e-05, |
|
"loss": 2.6563, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.3946030150753768e-05, |
|
"loss": 2.6873, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.3915879396984927e-05, |
|
"loss": 2.65, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.3885728643216082e-05, |
|
"loss": 2.6635, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.3855577889447237e-05, |
|
"loss": 2.6443, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.3825427135678393e-05, |
|
"loss": 2.6504, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.379527638190955e-05, |
|
"loss": 2.6425, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3765125628140703e-05, |
|
"loss": 2.6774, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.373497487437186e-05, |
|
"loss": 2.6575, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3704824120603014e-05, |
|
"loss": 2.6542, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.3674673366834172e-05, |
|
"loss": 2.6508, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.3644522613065328e-05, |
|
"loss": 2.6648, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.3614673366834173e-05, |
|
"loss": 2.6623, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.3584522613065328e-05, |
|
"loss": 2.66, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.3554371859296483e-05, |
|
"loss": 2.6568, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.352422110552764e-05, |
|
"loss": 2.6591, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_bleu": 79.49958192973479, |
|
"eval_gen_len": 16.234, |
|
"eval_loss": 2.5640199184417725, |
|
"eval_meteor": 0.9025279208048376, |
|
"eval_rouge1": 94.1823, |
|
"eval_rouge2": 90.5552, |
|
"eval_rougeL": 92.428, |
|
"eval_rougeLsum": 92.4351, |
|
"eval_runtime": 25.4062, |
|
"eval_samples_per_second": 19.68, |
|
"eval_steps_per_second": 2.48, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.3494070351758794e-05, |
|
"loss": 2.6622, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.3463919597989953e-05, |
|
"loss": 2.6638, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.3433768844221108e-05, |
|
"loss": 2.6521, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.340361809045226e-05, |
|
"loss": 2.657, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3373467336683415e-05, |
|
"loss": 2.632, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3343316582914574e-05, |
|
"loss": 2.6544, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.331316582914573e-05, |
|
"loss": 2.6429, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.3283015075376884e-05, |
|
"loss": 2.6534, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.325286432160804e-05, |
|
"loss": 2.6827, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.3222713567839198e-05, |
|
"loss": 2.6506, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.3192562814070353e-05, |
|
"loss": 2.6396, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.316241206030151e-05, |
|
"loss": 2.6777, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.3132261306532664e-05, |
|
"loss": 2.6548, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3102110552763823e-05, |
|
"loss": 2.6735, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3071959798994974e-05, |
|
"loss": 2.6713, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.304180904522613e-05, |
|
"loss": 2.6752, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.3011658291457285e-05, |
|
"loss": 2.6533, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.2981507537688444e-05, |
|
"loss": 2.6623, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.29513567839196e-05, |
|
"loss": 2.6596, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.2921206030150754e-05, |
|
"loss": 2.662, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_bleu": 79.32255832423239, |
|
"eval_gen_len": 16.256, |
|
"eval_loss": 2.555393695831299, |
|
"eval_meteor": 0.9010940538998614, |
|
"eval_rouge1": 94.0079, |
|
"eval_rouge2": 90.2547, |
|
"eval_rougeL": 92.1916, |
|
"eval_rougeLsum": 92.2075, |
|
"eval_runtime": 25.4585, |
|
"eval_samples_per_second": 19.64, |
|
"eval_steps_per_second": 2.475, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.289105527638191e-05, |
|
"loss": 2.6431, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.2860904522613068e-05, |
|
"loss": 2.6577, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.2830753768844223e-05, |
|
"loss": 2.6452, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.280060301507538e-05, |
|
"loss": 2.6465, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.2770452261306534e-05, |
|
"loss": 2.6566, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.2740301507537686e-05, |
|
"loss": 2.6632, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.2710150753768844e-05, |
|
"loss": 2.6179, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.268e-05, |
|
"loss": 2.6355, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.2649849246231155e-05, |
|
"loss": 2.656, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.261969849246231e-05, |
|
"loss": 2.6485, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.258954773869347e-05, |
|
"loss": 2.6834, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.2559396984924624e-05, |
|
"loss": 2.6586, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.252924623115578e-05, |
|
"loss": 2.6245, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.2499095477386935e-05, |
|
"loss": 2.6439, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.2468944723618093e-05, |
|
"loss": 2.657, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.243879396984925e-05, |
|
"loss": 2.651, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.2408643216080404e-05, |
|
"loss": 2.5159, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.2378492462311556e-05, |
|
"loss": 1.8454, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.2348341708542714e-05, |
|
"loss": 1.7432, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.2318793969849246e-05, |
|
"loss": 1.8874, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_bleu": 79.38237465310864, |
|
"eval_gen_len": 16.272, |
|
"eval_loss": 2.2466025352478027, |
|
"eval_meteor": 0.9008152345684776, |
|
"eval_rouge1": 94.0369, |
|
"eval_rouge2": 90.1224, |
|
"eval_rougeL": 91.9066, |
|
"eval_rougeLsum": 91.944, |
|
"eval_runtime": 24.8009, |
|
"eval_samples_per_second": 20.161, |
|
"eval_steps_per_second": 2.54, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.22886432160804e-05, |
|
"loss": 2.5847, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.2258492462311557e-05, |
|
"loss": 2.623, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.2228341708542716e-05, |
|
"loss": 2.665, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.219819095477387e-05, |
|
"loss": 2.647, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.2168040201005026e-05, |
|
"loss": 2.6653, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.213788944723618e-05, |
|
"loss": 2.6546, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.210773869346734e-05, |
|
"loss": 2.6471, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.2077587939698495e-05, |
|
"loss": 2.6871, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2047437185929647e-05, |
|
"loss": 2.6601, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2017286432160802e-05, |
|
"loss": 2.6541, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.198713567839196e-05, |
|
"loss": 2.6255, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.1956984924623116e-05, |
|
"loss": 2.6765, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.192683417085427e-05, |
|
"loss": 2.6448, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.1896683417085427e-05, |
|
"loss": 2.6667, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.1866532663316582e-05, |
|
"loss": 2.6544, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.183638190954774e-05, |
|
"loss": 2.6492, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.1806231155778896e-05, |
|
"loss": 2.6541, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.177608040201005e-05, |
|
"loss": 2.6601, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1745929648241207e-05, |
|
"loss": 2.653, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1715778894472362e-05, |
|
"loss": 2.6527, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_bleu": 80.15532470386316, |
|
"eval_gen_len": 16.238, |
|
"eval_loss": 2.573094129562378, |
|
"eval_meteor": 0.9042327142167804, |
|
"eval_rouge1": 94.281, |
|
"eval_rouge2": 90.8096, |
|
"eval_rougeL": 92.559, |
|
"eval_rougeLsum": 92.5681, |
|
"eval_runtime": 25.5646, |
|
"eval_samples_per_second": 19.558, |
|
"eval_steps_per_second": 2.464, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1685628140703517e-05, |
|
"loss": 2.6359, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1655477386934672e-05, |
|
"loss": 2.6562, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.1625326633165828e-05, |
|
"loss": 2.6518, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.1595175879396986e-05, |
|
"loss": 2.6403, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.156502512562814e-05, |
|
"loss": 2.658, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1534874371859297e-05, |
|
"loss": 2.6583, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.1504723618090452e-05, |
|
"loss": 2.6719, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.147457286432161e-05, |
|
"loss": 2.6451, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.1444422110552766e-05, |
|
"loss": 2.6701, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.141427135678392e-05, |
|
"loss": 2.6666, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.1384120603015073e-05, |
|
"loss": 2.6536, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.1353969849246232e-05, |
|
"loss": 2.6423, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.1323819095477387e-05, |
|
"loss": 2.6586, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.1293668341708542e-05, |
|
"loss": 2.6554, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.1263517587939698e-05, |
|
"loss": 2.6623, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.1233366834170856e-05, |
|
"loss": 2.6749, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.120321608040201e-05, |
|
"loss": 2.6446, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.1173065326633167e-05, |
|
"loss": 2.6764, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.1142914572864322e-05, |
|
"loss": 2.6522, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.1112763819095477e-05, |
|
"loss": 2.6618, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_bleu": 79.74215483171986, |
|
"eval_gen_len": 16.236, |
|
"eval_loss": 2.4326839447021484, |
|
"eval_meteor": 0.9044505502074225, |
|
"eval_rouge1": 94.3369, |
|
"eval_rouge2": 90.6586, |
|
"eval_rougeL": 92.1744, |
|
"eval_rougeLsum": 92.1895, |
|
"eval_runtime": 25.4582, |
|
"eval_samples_per_second": 19.64, |
|
"eval_steps_per_second": 2.475, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.1082613065326636e-05, |
|
"loss": 1.977, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.105246231155779e-05, |
|
"loss": 1.731, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.1022311557788943e-05, |
|
"loss": 1.7164, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.09921608040201e-05, |
|
"loss": 1.6936, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0962010050251257e-05, |
|
"loss": 1.6888, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0931859296482412e-05, |
|
"loss": 1.7005, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0901708542713568e-05, |
|
"loss": 1.6815, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.0871557788944723e-05, |
|
"loss": 1.6894, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.0841708542713568e-05, |
|
"loss": 1.6858, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0811557788944723e-05, |
|
"loss": 2.3491, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0781407035175882e-05, |
|
"loss": 2.6593, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0751256281407037e-05, |
|
"loss": 2.6468, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.0721407035175883e-05, |
|
"loss": 2.6608, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.069155778894472e-05, |
|
"loss": 2.6587, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.066140703517588e-05, |
|
"loss": 2.6772, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.0631256281407035e-05, |
|
"loss": 2.6861, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.060140703517588e-05, |
|
"loss": 2.6909, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.0571256281407036e-05, |
|
"loss": 2.7178, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.054110552763819e-05, |
|
"loss": 2.7448, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.0510954773869346e-05, |
|
"loss": 2.7368, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_bleu": 79.88380574425997, |
|
"eval_gen_len": 16.228, |
|
"eval_loss": 2.729801893234253, |
|
"eval_meteor": 0.9013053514191987, |
|
"eval_rouge1": 94.1293, |
|
"eval_rouge2": 90.4052, |
|
"eval_rougeL": 92.4025, |
|
"eval_rougeLsum": 92.4078, |
|
"eval_runtime": 25.6909, |
|
"eval_samples_per_second": 19.462, |
|
"eval_steps_per_second": 2.452, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.048110552763819e-05, |
|
"loss": 2.7485, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.0450954773869347e-05, |
|
"loss": 2.7678, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.0420804020100506e-05, |
|
"loss": 2.7518, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.039065326633166e-05, |
|
"loss": 2.7449, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.0360502512562813e-05, |
|
"loss": 2.7359, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.0330351758793968e-05, |
|
"loss": 2.7379, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.0300201005025127e-05, |
|
"loss": 2.743, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.0270050251256282e-05, |
|
"loss": 2.73, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.0240201005025127e-05, |
|
"loss": 2.7259, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.0210050251256282e-05, |
|
"loss": 2.6952, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.0179899497487438e-05, |
|
"loss": 2.6952, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.0149748743718593e-05, |
|
"loss": 2.6855, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.011959798994975e-05, |
|
"loss": 2.7112, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.0089447236180907e-05, |
|
"loss": 2.6988, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.0059296482412062e-05, |
|
"loss": 2.6994, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.0029145728643214e-05, |
|
"loss": 2.6774, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.9998994974874373e-05, |
|
"loss": 2.6856, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.9968844221105528e-05, |
|
"loss": 2.6875, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.9938693467336683e-05, |
|
"loss": 2.6905, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.990854271356784e-05, |
|
"loss": 2.6858, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bleu": 80.43692634867155, |
|
"eval_gen_len": 16.234, |
|
"eval_loss": 2.6981565952301025, |
|
"eval_meteor": 0.9051906006959359, |
|
"eval_rouge1": 94.4532, |
|
"eval_rouge2": 91.0106, |
|
"eval_rougeL": 92.3314, |
|
"eval_rougeLsum": 92.3438, |
|
"eval_runtime": 24.9557, |
|
"eval_samples_per_second": 20.035, |
|
"eval_steps_per_second": 2.524, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.9878391959798994e-05, |
|
"loss": 2.6896, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.9848241206030152e-05, |
|
"loss": 2.688, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.9818090452261308e-05, |
|
"loss": 2.6817, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.9787939698492463e-05, |
|
"loss": 2.6833, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.9758090452261308e-05, |
|
"loss": 2.6626, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.9727939698492464e-05, |
|
"loss": 2.6504, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.9697788944723615e-05, |
|
"loss": 2.6603, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.9667638190954774e-05, |
|
"loss": 2.6381, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.963748743718593e-05, |
|
"loss": 2.6358, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.9607336683417085e-05, |
|
"loss": 2.6053, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.957718592964824e-05, |
|
"loss": 2.6376, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.95470351758794e-05, |
|
"loss": 2.619, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.9516884422110554e-05, |
|
"loss": 2.6132, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.948673366834171e-05, |
|
"loss": 2.6241, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.9456582914572864e-05, |
|
"loss": 2.6128, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.9426432160804023e-05, |
|
"loss": 2.6169, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.9396281407035178e-05, |
|
"loss": 2.6206, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.936613065326633e-05, |
|
"loss": 2.5968, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.9335979899497485e-05, |
|
"loss": 2.6079, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.9305829145728644e-05, |
|
"loss": 2.6236, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_bleu": 80.05686051541119, |
|
"eval_gen_len": 16.246, |
|
"eval_loss": 2.579113245010376, |
|
"eval_meteor": 0.9032701849367213, |
|
"eval_rouge1": 94.3329, |
|
"eval_rouge2": 90.4972, |
|
"eval_rougeL": 92.1838, |
|
"eval_rougeLsum": 92.1803, |
|
"eval_runtime": 25.6081, |
|
"eval_samples_per_second": 19.525, |
|
"eval_steps_per_second": 2.46, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.92756783919598e-05, |
|
"loss": 2.6189, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.9245527638190955e-05, |
|
"loss": 2.6045, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.921537688442211e-05, |
|
"loss": 2.6162, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.918522613065327e-05, |
|
"loss": 2.6158, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.9155075376884424e-05, |
|
"loss": 2.6142, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.912492462311558e-05, |
|
"loss": 2.6383, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.9095075376884424e-05, |
|
"loss": 2.6594, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.906522613065327e-05, |
|
"loss": 2.6318, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.9035075376884425e-05, |
|
"loss": 2.6514, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.9004924623115577e-05, |
|
"loss": 2.6613, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.8974773869346732e-05, |
|
"loss": 2.6357, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.894462311557789e-05, |
|
"loss": 2.6453, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.8914773869346733e-05, |
|
"loss": 2.6371, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.888462311557789e-05, |
|
"loss": 2.6155, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.8854773869346733e-05, |
|
"loss": 2.6097, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.882522613065327e-05, |
|
"loss": 2.6346, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.8795075376884424e-05, |
|
"loss": 2.6374, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.876492462311558e-05, |
|
"loss": 2.6266, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.8734773869346734e-05, |
|
"loss": 2.6392, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.870462311557789e-05, |
|
"loss": 2.6279, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_bleu": 80.26404550464542, |
|
"eval_gen_len": 16.212, |
|
"eval_loss": 2.613689422607422, |
|
"eval_meteor": 0.9019056042294404, |
|
"eval_rouge1": 94.1522, |
|
"eval_rouge2": 90.7421, |
|
"eval_rougeL": 92.4197, |
|
"eval_rougeLsum": 92.443, |
|
"eval_runtime": 25.3778, |
|
"eval_samples_per_second": 19.702, |
|
"eval_steps_per_second": 2.482, |
|
"step": 38000 |
|
} |
|
], |
|
"max_steps": 100000, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.8512320510138778e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|