stablelm-exp / trainer_state.json
TinyPixel's picture
Upload folder using huggingface_hub
c407ef4 verified
raw
history blame
3.68 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.992,
"eval_steps": 500,
"global_step": 62,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.6044,
"step": 2
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 1.5881,
"step": 4
},
{
"epoch": 0.1,
"learning_rate": 0.0002,
"loss": 1.5893,
"step": 6
},
{
"epoch": 0.13,
"learning_rate": 0.0002,
"loss": 1.6713,
"step": 8
},
{
"epoch": 0.16,
"learning_rate": 0.0002,
"loss": 1.6675,
"step": 10
},
{
"epoch": 0.19,
"learning_rate": 0.0002,
"loss": 1.728,
"step": 12
},
{
"epoch": 0.22,
"learning_rate": 0.0002,
"loss": 2.0484,
"step": 14
},
{
"epoch": 0.26,
"learning_rate": 0.0002,
"loss": 1.6691,
"step": 16
},
{
"epoch": 0.29,
"learning_rate": 0.0002,
"loss": 1.2777,
"step": 18
},
{
"epoch": 0.32,
"learning_rate": 0.0002,
"loss": 1.3286,
"step": 20
},
{
"epoch": 0.35,
"learning_rate": 0.0002,
"loss": 1.2907,
"step": 22
},
{
"epoch": 0.38,
"learning_rate": 0.0002,
"loss": 1.2286,
"step": 24
},
{
"epoch": 0.42,
"learning_rate": 0.0002,
"loss": 1.3034,
"step": 26
},
{
"epoch": 0.45,
"learning_rate": 0.0002,
"loss": 1.2253,
"step": 28
},
{
"epoch": 0.48,
"learning_rate": 0.0002,
"loss": 1.398,
"step": 30
},
{
"epoch": 0.51,
"learning_rate": 0.0002,
"loss": 1.1233,
"step": 32
},
{
"epoch": 0.54,
"learning_rate": 0.0002,
"loss": 1.181,
"step": 34
},
{
"epoch": 0.58,
"learning_rate": 0.0002,
"loss": 1.1303,
"step": 36
},
{
"epoch": 0.61,
"learning_rate": 0.0002,
"loss": 1.12,
"step": 38
},
{
"epoch": 0.64,
"learning_rate": 0.0002,
"loss": 1.191,
"step": 40
},
{
"epoch": 0.67,
"learning_rate": 0.0002,
"loss": 1.1562,
"step": 42
},
{
"epoch": 0.7,
"learning_rate": 0.0002,
"loss": 1.1221,
"step": 44
},
{
"epoch": 0.74,
"learning_rate": 0.0002,
"loss": 1.2625,
"step": 46
},
{
"epoch": 0.77,
"learning_rate": 0.0002,
"loss": 1.0936,
"step": 48
},
{
"epoch": 0.8,
"learning_rate": 0.0002,
"loss": 1.048,
"step": 50
},
{
"epoch": 0.83,
"learning_rate": 0.0002,
"loss": 0.9645,
"step": 52
},
{
"epoch": 0.86,
"learning_rate": 0.0002,
"loss": 1.0299,
"step": 54
},
{
"epoch": 0.9,
"learning_rate": 0.0002,
"loss": 1.0774,
"step": 56
},
{
"epoch": 0.93,
"learning_rate": 0.0002,
"loss": 1.1457,
"step": 58
},
{
"epoch": 0.96,
"learning_rate": 0.0002,
"loss": 1.1011,
"step": 60
},
{
"epoch": 0.99,
"learning_rate": 0.0002,
"loss": 1.0221,
"step": 62
}
],
"logging_steps": 2,
"max_steps": 62,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 2179055645245440.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}