stablelm-exp / trainer_state.json
TinyPixel's picture
Upload folder using huggingface_hub
e62d8f3 verified
raw
history blame
3.68 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.992,
"eval_steps": 500,
"global_step": 62,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 0.0002,
"loss": 1.6287,
"step": 2
},
{
"epoch": 0.06,
"learning_rate": 0.0002,
"loss": 1.6844,
"step": 4
},
{
"epoch": 0.1,
"learning_rate": 0.0002,
"loss": 1.6992,
"step": 6
},
{
"epoch": 0.13,
"learning_rate": 0.0002,
"loss": 1.7574,
"step": 8
},
{
"epoch": 0.16,
"learning_rate": 0.0002,
"loss": 1.7539,
"step": 10
},
{
"epoch": 0.19,
"learning_rate": 0.0002,
"loss": 1.768,
"step": 12
},
{
"epoch": 0.22,
"learning_rate": 0.0002,
"loss": 2.0735,
"step": 14
},
{
"epoch": 0.26,
"learning_rate": 0.0002,
"loss": 1.6763,
"step": 16
},
{
"epoch": 0.29,
"learning_rate": 0.0002,
"loss": 1.2736,
"step": 18
},
{
"epoch": 0.32,
"learning_rate": 0.0002,
"loss": 1.273,
"step": 20
},
{
"epoch": 0.35,
"learning_rate": 0.0002,
"loss": 1.3671,
"step": 22
},
{
"epoch": 0.38,
"learning_rate": 0.0002,
"loss": 1.1594,
"step": 24
},
{
"epoch": 0.42,
"learning_rate": 0.0002,
"loss": 1.2744,
"step": 26
},
{
"epoch": 0.45,
"learning_rate": 0.0002,
"loss": 1.3281,
"step": 28
},
{
"epoch": 0.48,
"learning_rate": 0.0002,
"loss": 1.2365,
"step": 30
},
{
"epoch": 0.51,
"learning_rate": 0.0002,
"loss": 1.1392,
"step": 32
},
{
"epoch": 0.54,
"learning_rate": 0.0002,
"loss": 1.1334,
"step": 34
},
{
"epoch": 0.58,
"learning_rate": 0.0002,
"loss": 1.0542,
"step": 36
},
{
"epoch": 0.61,
"learning_rate": 0.0002,
"loss": 1.1079,
"step": 38
},
{
"epoch": 0.64,
"learning_rate": 0.0002,
"loss": 1.1394,
"step": 40
},
{
"epoch": 0.67,
"learning_rate": 0.0002,
"loss": 1.122,
"step": 42
},
{
"epoch": 0.7,
"learning_rate": 0.0002,
"loss": 1.1356,
"step": 44
},
{
"epoch": 0.74,
"learning_rate": 0.0002,
"loss": 1.2301,
"step": 46
},
{
"epoch": 0.77,
"learning_rate": 0.0002,
"loss": 1.1028,
"step": 48
},
{
"epoch": 0.8,
"learning_rate": 0.0002,
"loss": 1.0714,
"step": 50
},
{
"epoch": 0.83,
"learning_rate": 0.0002,
"loss": 1.0011,
"step": 52
},
{
"epoch": 0.86,
"learning_rate": 0.0002,
"loss": 1.0505,
"step": 54
},
{
"epoch": 0.9,
"learning_rate": 0.0002,
"loss": 1.0133,
"step": 56
},
{
"epoch": 0.93,
"learning_rate": 0.0002,
"loss": 1.0462,
"step": 58
},
{
"epoch": 0.96,
"learning_rate": 0.0002,
"loss": 1.0668,
"step": 60
},
{
"epoch": 0.99,
"learning_rate": 0.0002,
"loss": 1.0093,
"step": 62
}
],
"logging_steps": 2,
"max_steps": 62,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 2204302063927296.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}