|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.995936461026968, |
|
"global_step": 3834, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 69.52679870560905, |
|
"eval_loss": 0.16526192426681519, |
|
"eval_perfect_accuracy": 0.10141313383208644, |
|
"eval_runtime": 3340.3895, |
|
"eval_samples_per_second": 1.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_subword_accuracy": 0.5594287933042841, |
|
"eval_token_accuracy": 0.6964062297769533, |
|
"eval_word_accuracy": 0.4744973429025366, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 74.31204559162617, |
|
"eval_loss": 0.11631891876459122, |
|
"eval_perfect_accuracy": 0.14990302022720975, |
|
"eval_runtime": 3269.4249, |
|
"eval_samples_per_second": 1.104, |
|
"eval_steps_per_second": 0.046, |
|
"eval_subword_accuracy": 0.6142826689405244, |
|
"eval_token_accuracy": 0.7389895544659987, |
|
"eval_word_accuracy": 0.532967032967033, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.6045, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 76.85411902074411, |
|
"eval_loss": 0.09909386187791824, |
|
"eval_perfect_accuracy": 0.18952618453865336, |
|
"eval_runtime": 3381.7443, |
|
"eval_samples_per_second": 1.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_subword_accuracy": 0.6512524901453821, |
|
"eval_token_accuracy": 0.7695926757936676, |
|
"eval_word_accuracy": 0.5709718317623064, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 79.14575222404268, |
|
"eval_loss": 0.09123498946428299, |
|
"eval_perfect_accuracy": 0.21169298974785258, |
|
"eval_runtime": 3480.4117, |
|
"eval_samples_per_second": 1.037, |
|
"eval_steps_per_second": 0.043, |
|
"eval_subword_accuracy": 0.6773669070746654, |
|
"eval_token_accuracy": 0.7950973443948806, |
|
"eval_word_accuracy": 0.5997376762487704, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.1137, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 80.22128752790911, |
|
"eval_loss": 0.08361164480447769, |
|
"eval_perfect_accuracy": 0.23330562482682182, |
|
"eval_runtime": 1727.4258, |
|
"eval_samples_per_second": 2.089, |
|
"eval_steps_per_second": 0.087, |
|
"eval_subword_accuracy": 0.693643785404865, |
|
"eval_token_accuracy": 0.8034821726091302, |
|
"eval_word_accuracy": 0.6188809276376962, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 80.47331007551738, |
|
"eval_loss": 0.07888099551200867, |
|
"eval_perfect_accuracy": 0.25270157938487114, |
|
"eval_runtime": 1706.4952, |
|
"eval_samples_per_second": 2.115, |
|
"eval_steps_per_second": 0.088, |
|
"eval_subword_accuracy": 0.6985128247973933, |
|
"eval_token_accuracy": 0.8036176790040939, |
|
"eval_word_accuracy": 0.6238234160841085, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.0886, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 81.35496460228585, |
|
"eval_loss": 0.07820562273263931, |
|
"eval_perfect_accuracy": 0.257689110556941, |
|
"eval_runtime": 1696.8822, |
|
"eval_samples_per_second": 2.127, |
|
"eval_steps_per_second": 0.089, |
|
"eval_subword_accuracy": 0.7077631303620004, |
|
"eval_token_accuracy": 0.8164888051893702, |
|
"eval_word_accuracy": 0.6347371318469585, |
|
"step": 1579 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 81.4155970043395, |
|
"eval_loss": 0.07889819890260696, |
|
"eval_perfect_accuracy": 0.2454973676918814, |
|
"eval_runtime": 1666.9192, |
|
"eval_samples_per_second": 2.165, |
|
"eval_steps_per_second": 0.091, |
|
"eval_subword_accuracy": 0.6984033613445378, |
|
"eval_token_accuracy": 0.8176890756302521, |
|
"eval_word_accuracy": 0.6222492535662943, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.0768, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 81.70774187369526, |
|
"eval_loss": 0.07279801368713379, |
|
"eval_perfect_accuracy": 0.27847049044056527, |
|
"eval_runtime": 1589.4085, |
|
"eval_samples_per_second": 2.271, |
|
"eval_steps_per_second": 0.095, |
|
"eval_subword_accuracy": 0.7123310810810811, |
|
"eval_token_accuracy": 0.8106841216216216, |
|
"eval_word_accuracy": 0.6412536119137586, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 82.60753055516308, |
|
"eval_loss": 0.07139766961336136, |
|
"eval_perfect_accuracy": 0.29897478525907456, |
|
"eval_runtime": 1630.9624, |
|
"eval_samples_per_second": 2.213, |
|
"eval_steps_per_second": 0.093, |
|
"eval_subword_accuracy": 0.7325649377766642, |
|
"eval_token_accuracy": 0.8289902280130294, |
|
"eval_word_accuracy": 0.6648324437352116, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 83.62166331521699, |
|
"eval_loss": 0.06930351257324219, |
|
"eval_perfect_accuracy": 0.30950401773344416, |
|
"eval_runtime": 1612.5857, |
|
"eval_samples_per_second": 2.238, |
|
"eval_steps_per_second": 0.094, |
|
"eval_subword_accuracy": 0.739740238184157, |
|
"eval_token_accuracy": 0.8358438109465123, |
|
"eval_word_accuracy": 0.6716549295774648, |
|
"step": 2481 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0681, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 84.02807089662639, |
|
"eval_loss": 0.06798376142978668, |
|
"eval_perfect_accuracy": 0.3081185924078692, |
|
"eval_runtime": 1680.6877, |
|
"eval_samples_per_second": 2.147, |
|
"eval_steps_per_second": 0.09, |
|
"eval_subword_accuracy": 0.745770423991727, |
|
"eval_token_accuracy": 0.8428541882109617, |
|
"eval_word_accuracy": 0.6793930762489044, |
|
"step": 2707 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 84.42354785113844, |
|
"eval_loss": 0.06762586534023285, |
|
"eval_perfect_accuracy": 0.31698531449154893, |
|
"eval_runtime": 1751.9665, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 0.086, |
|
"eval_subword_accuracy": 0.7447953313190679, |
|
"eval_token_accuracy": 0.8407350689127105, |
|
"eval_word_accuracy": 0.67777227994942, |
|
"step": 2932 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.0619, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 84.59233008431931, |
|
"eval_loss": 0.06648800522089005, |
|
"eval_perfect_accuracy": 0.32696037683568857, |
|
"eval_runtime": 1738.746, |
|
"eval_samples_per_second": 2.076, |
|
"eval_steps_per_second": 0.087, |
|
"eval_subword_accuracy": 0.7536297454036935, |
|
"eval_token_accuracy": 0.8476946489532349, |
|
"eval_word_accuracy": 0.6883173734610123, |
|
"step": 3158 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 84.49240456076029, |
|
"eval_loss": 0.06621185690164566, |
|
"eval_perfect_accuracy": 0.32917705735660846, |
|
"eval_runtime": 1720.559, |
|
"eval_samples_per_second": 2.098, |
|
"eval_steps_per_second": 0.088, |
|
"eval_subword_accuracy": 0.7496996063807748, |
|
"eval_token_accuracy": 0.8439196188108556, |
|
"eval_word_accuracy": 0.6839415256100242, |
|
"step": 3383 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.0574, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 85.02924908908608, |
|
"eval_loss": 0.06474177539348602, |
|
"eval_perfect_accuracy": 0.343585480742588, |
|
"eval_runtime": 1678.6132, |
|
"eval_samples_per_second": 2.15, |
|
"eval_steps_per_second": 0.09, |
|
"eval_subword_accuracy": 0.7591370348981107, |
|
"eval_token_accuracy": 0.8501773781041169, |
|
"eval_word_accuracy": 0.6949701058636388, |
|
"step": 3609 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 84.79034637013908, |
|
"eval_loss": 0.06463250517845154, |
|
"eval_perfect_accuracy": 0.34968135217511775, |
|
"eval_runtime": 1711.9697, |
|
"eval_samples_per_second": 2.108, |
|
"eval_steps_per_second": 0.088, |
|
"eval_subword_accuracy": 0.762099607356892, |
|
"eval_token_accuracy": 0.8493490390576566, |
|
"eval_word_accuracy": 0.7003675462175655, |
|
"step": 3834 |
|
} |
|
], |
|
"max_steps": 5625, |
|
"num_train_epochs": 25, |
|
"total_flos": 6098403774431232.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|