llama2-alpaca-sft-2epoch / trainer_state.json
zhangchuheng123's picture
First model version
41e3ee8
raw
history blame
5.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.292993630573248,
"eval_steps": 100,
"global_step": 1616,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.64,
"learning_rate": 2e-05,
"loss": 1.1355,
"step": 100
},
{
"epoch": 0.64,
"eval_loss": 1.157366156578064,
"eval_runtime": 30.4555,
"eval_samples_per_second": 65.67,
"eval_steps_per_second": 1.051,
"step": 100
},
{
"epoch": 1.27,
"learning_rate": 2e-05,
"loss": 0.881,
"step": 200
},
{
"epoch": 1.27,
"eval_loss": 1.3262789249420166,
"eval_runtime": 29.1383,
"eval_samples_per_second": 68.638,
"eval_steps_per_second": 1.098,
"step": 200
},
{
"epoch": 1.91,
"learning_rate": 2e-05,
"loss": 0.5496,
"step": 300
},
{
"epoch": 1.91,
"eval_loss": 1.3176809549331665,
"eval_runtime": 28.8909,
"eval_samples_per_second": 69.226,
"eval_steps_per_second": 1.108,
"step": 300
},
{
"epoch": 2.55,
"learning_rate": 2e-05,
"loss": 0.2597,
"step": 400
},
{
"epoch": 2.55,
"eval_loss": 1.5806535482406616,
"eval_runtime": 29.1193,
"eval_samples_per_second": 68.683,
"eval_steps_per_second": 1.099,
"step": 400
},
{
"epoch": 3.18,
"learning_rate": 2e-05,
"loss": 0.1877,
"step": 500
},
{
"epoch": 3.18,
"eval_loss": 1.7526657581329346,
"eval_runtime": 29.1196,
"eval_samples_per_second": 68.682,
"eval_steps_per_second": 1.099,
"step": 500
},
{
"epoch": 3.82,
"learning_rate": 2e-05,
"loss": 0.1158,
"step": 600
},
{
"epoch": 3.82,
"eval_loss": 1.7486767768859863,
"eval_runtime": 29.2706,
"eval_samples_per_second": 68.328,
"eval_steps_per_second": 1.093,
"step": 600
},
{
"epoch": 4.46,
"learning_rate": 2e-05,
"loss": 0.0855,
"step": 700
},
{
"epoch": 4.46,
"eval_loss": 1.873838186264038,
"eval_runtime": 29.2292,
"eval_samples_per_second": 68.425,
"eval_steps_per_second": 1.095,
"step": 700
},
{
"epoch": 5.1,
"learning_rate": 2e-05,
"loss": 0.0645,
"step": 800
},
{
"epoch": 5.1,
"eval_loss": 1.9275007247924805,
"eval_runtime": 29.013,
"eval_samples_per_second": 68.935,
"eval_steps_per_second": 1.103,
"step": 800
},
{
"epoch": 5.73,
"learning_rate": 2e-05,
"loss": 0.0518,
"step": 900
},
{
"epoch": 5.73,
"eval_loss": 1.9070993661880493,
"eval_runtime": 30.5536,
"eval_samples_per_second": 65.459,
"eval_steps_per_second": 1.047,
"step": 900
},
{
"epoch": 6.37,
"learning_rate": 2e-05,
"loss": 0.0464,
"step": 1000
},
{
"epoch": 6.37,
"eval_loss": 1.9601927995681763,
"eval_runtime": 28.9827,
"eval_samples_per_second": 69.007,
"eval_steps_per_second": 1.104,
"step": 1000
},
{
"epoch": 7.01,
"learning_rate": 2e-05,
"loss": 0.0367,
"step": 1100
},
{
"epoch": 7.01,
"eval_loss": 1.973179817199707,
"eval_runtime": 29.0672,
"eval_samples_per_second": 68.806,
"eval_steps_per_second": 1.101,
"step": 1100
},
{
"epoch": 7.64,
"learning_rate": 2e-05,
"loss": 0.0288,
"step": 1200
},
{
"epoch": 7.64,
"eval_loss": 2.0399632453918457,
"eval_runtime": 29.0049,
"eval_samples_per_second": 68.954,
"eval_steps_per_second": 1.103,
"step": 1200
},
{
"epoch": 8.28,
"learning_rate": 2e-05,
"loss": 0.0265,
"step": 1300
},
{
"epoch": 8.28,
"eval_loss": 2.0276734828948975,
"eval_runtime": 28.9115,
"eval_samples_per_second": 69.177,
"eval_steps_per_second": 1.107,
"step": 1300
},
{
"epoch": 8.92,
"learning_rate": 2e-05,
"loss": 0.0287,
"step": 1400
},
{
"epoch": 8.92,
"eval_loss": 2.049071788787842,
"eval_runtime": 29.0231,
"eval_samples_per_second": 68.911,
"eval_steps_per_second": 1.103,
"step": 1400
},
{
"epoch": 9.55,
"learning_rate": 2e-05,
"loss": 0.0195,
"step": 1500
},
{
"epoch": 9.55,
"eval_loss": 2.043515205383301,
"eval_runtime": 29.1891,
"eval_samples_per_second": 68.519,
"eval_steps_per_second": 1.096,
"step": 1500
},
{
"epoch": 10.19,
"learning_rate": 2e-05,
"loss": 0.0215,
"step": 1600
},
{
"epoch": 10.19,
"eval_loss": 2.101804733276367,
"eval_runtime": 28.9478,
"eval_samples_per_second": 69.09,
"eval_steps_per_second": 1.105,
"step": 1600
}
],
"logging_steps": 100,
"max_steps": 1616,
"num_train_epochs": 11,
"save_steps": 808,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}