Mistral-7B-v0.1_caselaw / trainer_state.json
zlucia's picture
End of training
5fa7ddf verified
raw
history blame
8.02 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9980713596914175,
"eval_steps": 50,
"global_step": 518,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 5e-05,
"loss": 1.5247,
"step": 10
},
{
"epoch": 0.08,
"learning_rate": 5e-05,
"loss": 1.4114,
"step": 20
},
{
"epoch": 0.12,
"learning_rate": 5e-05,
"loss": 1.3203,
"step": 30
},
{
"epoch": 0.15,
"learning_rate": 5e-05,
"loss": 1.3566,
"step": 40
},
{
"epoch": 0.19,
"learning_rate": 5e-05,
"loss": 1.309,
"step": 50
},
{
"epoch": 0.19,
"eval_loss": 1.3630590438842773,
"eval_runtime": 26.2031,
"eval_samples_per_second": 33.355,
"eval_steps_per_second": 2.099,
"step": 50
},
{
"epoch": 0.23,
"learning_rate": 5e-05,
"loss": 1.2856,
"step": 60
},
{
"epoch": 0.27,
"learning_rate": 5e-05,
"loss": 1.3524,
"step": 70
},
{
"epoch": 0.31,
"learning_rate": 5e-05,
"loss": 1.317,
"step": 80
},
{
"epoch": 0.35,
"learning_rate": 5e-05,
"loss": 1.324,
"step": 90
},
{
"epoch": 0.39,
"learning_rate": 5e-05,
"loss": 1.2966,
"step": 100
},
{
"epoch": 0.39,
"eval_loss": 1.3339532613754272,
"eval_runtime": 25.949,
"eval_samples_per_second": 33.681,
"eval_steps_per_second": 2.12,
"step": 100
},
{
"epoch": 0.42,
"learning_rate": 5e-05,
"loss": 1.3073,
"step": 110
},
{
"epoch": 0.46,
"learning_rate": 5e-05,
"loss": 1.3162,
"step": 120
},
{
"epoch": 0.5,
"learning_rate": 5e-05,
"loss": 1.3299,
"step": 130
},
{
"epoch": 0.54,
"learning_rate": 5e-05,
"loss": 1.3271,
"step": 140
},
{
"epoch": 0.58,
"learning_rate": 5e-05,
"loss": 1.2914,
"step": 150
},
{
"epoch": 0.58,
"eval_loss": 1.3168174028396606,
"eval_runtime": 26.0065,
"eval_samples_per_second": 33.607,
"eval_steps_per_second": 2.115,
"step": 150
},
{
"epoch": 0.62,
"learning_rate": 5e-05,
"loss": 1.2526,
"step": 160
},
{
"epoch": 0.66,
"learning_rate": 5e-05,
"loss": 1.2924,
"step": 170
},
{
"epoch": 0.69,
"learning_rate": 5e-05,
"loss": 1.3303,
"step": 180
},
{
"epoch": 0.73,
"learning_rate": 5e-05,
"loss": 1.3173,
"step": 190
},
{
"epoch": 0.77,
"learning_rate": 5e-05,
"loss": 1.298,
"step": 200
},
{
"epoch": 0.77,
"eval_loss": 1.303916096687317,
"eval_runtime": 26.2236,
"eval_samples_per_second": 33.329,
"eval_steps_per_second": 2.097,
"step": 200
},
{
"epoch": 0.81,
"learning_rate": 5e-05,
"loss": 1.2402,
"step": 210
},
{
"epoch": 0.85,
"learning_rate": 5e-05,
"loss": 1.2768,
"step": 220
},
{
"epoch": 0.89,
"learning_rate": 5e-05,
"loss": 1.2929,
"step": 230
},
{
"epoch": 0.93,
"learning_rate": 5e-05,
"loss": 1.2744,
"step": 240
},
{
"epoch": 0.96,
"learning_rate": 5e-05,
"loss": 1.2678,
"step": 250
},
{
"epoch": 0.96,
"eval_loss": 1.2991052865982056,
"eval_runtime": 26.0876,
"eval_samples_per_second": 33.502,
"eval_steps_per_second": 2.108,
"step": 250
},
{
"epoch": 1.0,
"learning_rate": 5e-05,
"loss": 1.2506,
"step": 260
},
{
"epoch": 1.04,
"learning_rate": 5e-05,
"loss": 1.1717,
"step": 270
},
{
"epoch": 1.08,
"learning_rate": 5e-05,
"loss": 1.2022,
"step": 280
},
{
"epoch": 1.12,
"learning_rate": 5e-05,
"loss": 1.2237,
"step": 290
},
{
"epoch": 1.16,
"learning_rate": 5e-05,
"loss": 1.216,
"step": 300
},
{
"epoch": 1.16,
"eval_loss": 1.3007503747940063,
"eval_runtime": 28.1383,
"eval_samples_per_second": 31.061,
"eval_steps_per_second": 1.955,
"step": 300
},
{
"epoch": 1.2,
"learning_rate": 5e-05,
"loss": 1.2264,
"step": 310
},
{
"epoch": 1.23,
"learning_rate": 5e-05,
"loss": 1.1461,
"step": 320
},
{
"epoch": 1.27,
"learning_rate": 5e-05,
"loss": 1.1855,
"step": 330
},
{
"epoch": 1.31,
"learning_rate": 5e-05,
"loss": 1.2014,
"step": 340
},
{
"epoch": 1.35,
"learning_rate": 5e-05,
"loss": 1.2467,
"step": 350
},
{
"epoch": 1.35,
"eval_loss": 1.2945101261138916,
"eval_runtime": 25.9821,
"eval_samples_per_second": 33.639,
"eval_steps_per_second": 2.117,
"step": 350
},
{
"epoch": 1.39,
"learning_rate": 5e-05,
"loss": 1.2136,
"step": 360
},
{
"epoch": 1.43,
"learning_rate": 5e-05,
"loss": 1.1727,
"step": 370
},
{
"epoch": 1.47,
"learning_rate": 5e-05,
"loss": 1.192,
"step": 380
},
{
"epoch": 1.5,
"learning_rate": 5e-05,
"loss": 1.1963,
"step": 390
},
{
"epoch": 1.54,
"learning_rate": 5e-05,
"loss": 1.223,
"step": 400
},
{
"epoch": 1.54,
"eval_loss": 1.293986201286316,
"eval_runtime": 26.8556,
"eval_samples_per_second": 32.544,
"eval_steps_per_second": 2.048,
"step": 400
},
{
"epoch": 1.58,
"learning_rate": 5e-05,
"loss": 1.2076,
"step": 410
},
{
"epoch": 1.62,
"learning_rate": 5e-05,
"loss": 1.1373,
"step": 420
},
{
"epoch": 1.66,
"learning_rate": 5e-05,
"loss": 1.224,
"step": 430
},
{
"epoch": 1.7,
"learning_rate": 5e-05,
"loss": 1.1841,
"step": 440
},
{
"epoch": 1.74,
"learning_rate": 5e-05,
"loss": 1.2,
"step": 450
},
{
"epoch": 1.74,
"eval_loss": 1.2924402952194214,
"eval_runtime": 29.7855,
"eval_samples_per_second": 29.343,
"eval_steps_per_second": 1.847,
"step": 450
},
{
"epoch": 1.77,
"learning_rate": 5e-05,
"loss": 1.1896,
"step": 460
},
{
"epoch": 1.81,
"learning_rate": 5e-05,
"loss": 1.1476,
"step": 470
},
{
"epoch": 1.85,
"learning_rate": 5e-05,
"loss": 1.1986,
"step": 480
},
{
"epoch": 1.89,
"learning_rate": 5e-05,
"loss": 1.2185,
"step": 490
},
{
"epoch": 1.93,
"learning_rate": 5e-05,
"loss": 1.2406,
"step": 500
},
{
"epoch": 1.93,
"eval_loss": 1.2884066104888916,
"eval_runtime": 26.2052,
"eval_samples_per_second": 33.352,
"eval_steps_per_second": 2.099,
"step": 500
},
{
"epoch": 1.97,
"learning_rate": 5e-05,
"loss": 1.1962,
"step": 510
},
{
"epoch": 2.0,
"step": 518,
"total_flos": 5.99394721244119e+17,
"train_loss": 1.2557248572124937,
"train_runtime": 2763.6306,
"train_samples_per_second": 12.007,
"train_steps_per_second": 0.187
}
],
"logging_steps": 10,
"max_steps": 518,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 5.99394721244119e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}