flan-xl-gen5 / last-checkpoint /trainer_state.json
devvanshhh's picture
Training in progress, epoch 12, checkpoint
9ba1d10
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.0,
"eval_steps": 500,
"global_step": 3936,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_gen_len": 15.577319587628866,
"eval_loss": 8.096635818481445,
"eval_rouge1": 16.5418,
"eval_rouge2": 10.3523,
"eval_rougeL": 13.972,
"eval_rougeLsum": 14.1918,
"eval_runtime": 157.1024,
"eval_samples_per_second": 1.852,
"eval_steps_per_second": 0.236,
"step": 328
},
{
"epoch": 1.52,
"learning_rate": 1.8394004282655248e-05,
"loss": 18.3143,
"step": 500
},
{
"epoch": 2.0,
"eval_gen_len": 8.896907216494846,
"eval_loss": 0.9259825944900513,
"eval_rouge1": 31.5806,
"eval_rouge2": 27.0287,
"eval_rougeL": 29.6916,
"eval_rougeLsum": 30.0327,
"eval_runtime": 155.4292,
"eval_samples_per_second": 1.872,
"eval_steps_per_second": 0.238,
"step": 656
},
{
"epoch": 3.0,
"eval_gen_len": 13.807560137457045,
"eval_loss": 0.7708158493041992,
"eval_rouge1": 22.6847,
"eval_rouge2": 15.805,
"eval_rougeL": 19.6336,
"eval_rougeLsum": 19.8945,
"eval_runtime": 155.3127,
"eval_samples_per_second": 1.874,
"eval_steps_per_second": 0.238,
"step": 984
},
{
"epoch": 3.05,
"learning_rate": 1.5717344753747325e-05,
"loss": 1.0739,
"step": 1000
},
{
"epoch": 4.0,
"eval_gen_len": 9.618556701030927,
"eval_loss": 0.7307576537132263,
"eval_rouge1": 35.1675,
"eval_rouge2": 27.3998,
"eval_rougeL": 31.8527,
"eval_rougeLsum": 32.0356,
"eval_runtime": 133.4505,
"eval_samples_per_second": 2.181,
"eval_steps_per_second": 0.277,
"step": 1312
},
{
"epoch": 4.57,
"learning_rate": 1.3040685224839403e-05,
"loss": 0.8085,
"step": 1500
},
{
"epoch": 5.0,
"eval_gen_len": 10.116838487972508,
"eval_loss": 0.708371102809906,
"eval_rouge1": 34.4346,
"eval_rouge2": 26.202,
"eval_rougeL": 30.8999,
"eval_rougeLsum": 31.212,
"eval_runtime": 139.0571,
"eval_samples_per_second": 2.093,
"eval_steps_per_second": 0.266,
"step": 1640
},
{
"epoch": 6.0,
"eval_gen_len": 10.268041237113403,
"eval_loss": 0.6923775672912598,
"eval_rouge1": 34.3345,
"eval_rouge2": 26.0144,
"eval_rougeL": 30.692,
"eval_rougeLsum": 31.0384,
"eval_runtime": 141.1422,
"eval_samples_per_second": 2.062,
"eval_steps_per_second": 0.262,
"step": 1968
},
{
"epoch": 6.1,
"learning_rate": 1.036402569593148e-05,
"loss": 0.7597,
"step": 2000
},
{
"epoch": 7.0,
"eval_gen_len": 10.31958762886598,
"eval_loss": 0.6812536716461182,
"eval_rouge1": 34.3854,
"eval_rouge2": 26.0495,
"eval_rougeL": 30.8335,
"eval_rougeLsum": 31.1696,
"eval_runtime": 143.6068,
"eval_samples_per_second": 2.026,
"eval_steps_per_second": 0.258,
"step": 2296
},
{
"epoch": 7.62,
"learning_rate": 7.687366167023556e-06,
"loss": 0.7442,
"step": 2500
},
{
"epoch": 8.0,
"eval_gen_len": 10.360824742268042,
"eval_loss": 0.6728662252426147,
"eval_rouge1": 34.3758,
"eval_rouge2": 26.0079,
"eval_rougeL": 30.7863,
"eval_rougeLsum": 31.1239,
"eval_runtime": 142.7676,
"eval_samples_per_second": 2.038,
"eval_steps_per_second": 0.259,
"step": 2624
},
{
"epoch": 9.0,
"eval_gen_len": 10.49828178694158,
"eval_loss": 0.6669635772705078,
"eval_rouge1": 34.2115,
"eval_rouge2": 25.7443,
"eval_rougeL": 30.5369,
"eval_rougeLsum": 30.9282,
"eval_runtime": 145.2485,
"eval_samples_per_second": 2.003,
"eval_steps_per_second": 0.255,
"step": 2952
},
{
"epoch": 9.15,
"learning_rate": 5.010706638115633e-06,
"loss": 0.7252,
"step": 3000
},
{
"epoch": 10.0,
"eval_gen_len": 10.529209621993127,
"eval_loss": 0.6624875664710999,
"eval_rouge1": 34.2518,
"eval_rouge2": 25.7147,
"eval_rougeL": 30.5433,
"eval_rougeLsum": 30.9116,
"eval_runtime": 146.9477,
"eval_samples_per_second": 1.98,
"eval_steps_per_second": 0.252,
"step": 3280
},
{
"epoch": 10.67,
"learning_rate": 2.334047109207709e-06,
"loss": 0.7168,
"step": 3500
},
{
"epoch": 11.0,
"eval_gen_len": 10.618556701030927,
"eval_loss": 0.6601226925849915,
"eval_rouge1": 34.0539,
"eval_rouge2": 25.5073,
"eval_rougeL": 30.329,
"eval_rougeLsum": 30.6828,
"eval_runtime": 146.8209,
"eval_samples_per_second": 1.982,
"eval_steps_per_second": 0.252,
"step": 3608
},
{
"epoch": 12.0,
"eval_gen_len": 10.532646048109966,
"eval_loss": 0.6593531370162964,
"eval_rouge1": 34.2696,
"eval_rouge2": 25.7973,
"eval_rougeL": 30.5609,
"eval_rougeLsum": 30.9651,
"eval_runtime": 145.5529,
"eval_samples_per_second": 1.999,
"eval_steps_per_second": 0.254,
"step": 3936
}
],
"logging_steps": 500,
"max_steps": 3936,
"num_train_epochs": 12,
"save_steps": 500,
"total_flos": 2.949770616687821e+16,
"trial_name": null,
"trial_params": null
}