ryan_model314_3 / trainer_state.json
rshrott's picture
🍻 cheers
ff9cd40 verified
{
"best_metric": 0.2750368118286133,
"best_model_checkpoint": "./ryan_model314_3/checkpoint-550",
"epoch": 0.88,
"eval_steps": 50,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 1.5017586946487427,
"learning_rate": 0.000192,
"loss": 0.4423,
"step": 50
},
{
"epoch": 0.08,
"eval_loss": 0.33861014246940613,
"eval_na_accuracy": 0.904,
"eval_ordinal_accuracy": 0.4629418472063854,
"eval_ordinal_mae": 0.6577621472191316,
"eval_runtime": 123.3898,
"eval_samples_per_second": 8.104,
"eval_steps_per_second": 1.013,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 0.8501819372177124,
"learning_rate": 0.00018400000000000003,
"loss": 0.3088,
"step": 100
},
{
"epoch": 0.16,
"eval_loss": 0.3268783390522003,
"eval_na_accuracy": 0.928,
"eval_ordinal_accuracy": 0.5370581527936146,
"eval_ordinal_mae": 0.5969413880658287,
"eval_runtime": 43.4997,
"eval_samples_per_second": 22.989,
"eval_steps_per_second": 2.874,
"step": 100
},
{
"epoch": 0.24,
"grad_norm": 0.5424334406852722,
"learning_rate": 0.00017600000000000002,
"loss": 0.316,
"step": 150
},
{
"epoch": 0.24,
"eval_loss": 0.3395713269710541,
"eval_na_accuracy": 0.902,
"eval_ordinal_accuracy": 0.5142531356898518,
"eval_ordinal_mae": 0.6323422620227872,
"eval_runtime": 43.339,
"eval_samples_per_second": 23.074,
"eval_steps_per_second": 2.884,
"step": 150
},
{
"epoch": 0.32,
"grad_norm": 1.2484453916549683,
"learning_rate": 0.000168,
"loss": 0.2821,
"step": 200
},
{
"epoch": 0.32,
"eval_loss": 0.32339948415756226,
"eval_na_accuracy": 0.927,
"eval_ordinal_accuracy": 0.5131128848346637,
"eval_ordinal_mae": 0.6292874569299393,
"eval_runtime": 42.0004,
"eval_samples_per_second": 23.809,
"eval_steps_per_second": 2.976,
"step": 200
},
{
"epoch": 0.4,
"grad_norm": 1.4807660579681396,
"learning_rate": 0.00016,
"loss": 0.2731,
"step": 250
},
{
"epoch": 0.4,
"eval_loss": 0.3313509225845337,
"eval_na_accuracy": 0.925,
"eval_ordinal_accuracy": 0.508551881413911,
"eval_ordinal_mae": 0.5856009521101041,
"eval_runtime": 55.6564,
"eval_samples_per_second": 17.967,
"eval_steps_per_second": 2.246,
"step": 250
},
{
"epoch": 0.48,
"grad_norm": 1.4179209470748901,
"learning_rate": 0.000152,
"loss": 0.2975,
"step": 300
},
{
"epoch": 0.48,
"eval_loss": 0.3036611080169678,
"eval_na_accuracy": 0.927,
"eval_ordinal_accuracy": 0.5963511972633979,
"eval_ordinal_mae": 0.5690023564742932,
"eval_runtime": 42.7034,
"eval_samples_per_second": 23.417,
"eval_steps_per_second": 2.927,
"step": 300
},
{
"epoch": 0.56,
"grad_norm": 0.7659221887588501,
"learning_rate": 0.000144,
"loss": 0.2609,
"step": 350
},
{
"epoch": 0.56,
"eval_loss": 0.3209022283554077,
"eval_na_accuracy": 0.928,
"eval_ordinal_accuracy": 0.5450399087799316,
"eval_ordinal_mae": 0.5764862077817825,
"eval_runtime": 43.1206,
"eval_samples_per_second": 23.191,
"eval_steps_per_second": 2.899,
"step": 350
},
{
"epoch": 0.64,
"grad_norm": 1.0847711563110352,
"learning_rate": 0.00013600000000000003,
"loss": 0.287,
"step": 400
},
{
"epoch": 0.64,
"eval_loss": 0.29075464606285095,
"eval_na_accuracy": 0.931,
"eval_ordinal_accuracy": 0.5826681870011402,
"eval_ordinal_mae": 0.5458187616535902,
"eval_runtime": 42.3269,
"eval_samples_per_second": 23.626,
"eval_steps_per_second": 2.953,
"step": 400
},
{
"epoch": 0.72,
"grad_norm": 0.9720218181610107,
"learning_rate": 0.00012800000000000002,
"loss": 0.2905,
"step": 450
},
{
"epoch": 0.72,
"eval_loss": 0.30074238777160645,
"eval_na_accuracy": 0.919,
"eval_ordinal_accuracy": 0.5986316989737742,
"eval_ordinal_mae": 0.548372159519042,
"eval_runtime": 76.7524,
"eval_samples_per_second": 13.029,
"eval_steps_per_second": 1.629,
"step": 450
},
{
"epoch": 0.8,
"grad_norm": 0.8414099216461182,
"learning_rate": 0.00012,
"loss": 0.2574,
"step": 500
},
{
"epoch": 0.8,
"eval_loss": 0.28344637155532837,
"eval_na_accuracy": 0.929,
"eval_ordinal_accuracy": 0.6031927023945268,
"eval_ordinal_mae": 0.5363022306512,
"eval_runtime": 42.8484,
"eval_samples_per_second": 23.338,
"eval_steps_per_second": 2.917,
"step": 500
},
{
"epoch": 0.88,
"grad_norm": 0.5895617604255676,
"learning_rate": 0.00011200000000000001,
"loss": 0.2855,
"step": 550
},
{
"epoch": 0.88,
"eval_loss": 0.2750368118286133,
"eval_na_accuracy": 0.931,
"eval_ordinal_accuracy": 0.6271379703534777,
"eval_ordinal_mae": 0.5319093595330124,
"eval_runtime": 42.3171,
"eval_samples_per_second": 23.631,
"eval_steps_per_second": 2.954,
"step": 550
},
{
"epoch": 0.88,
"step": 550,
"total_flos": 6.81953956282368e+17,
"train_loss": 0.3000895881652832,
"train_runtime": 2172.3633,
"train_samples_per_second": 9.207,
"train_steps_per_second": 0.575
}
],
"logging_steps": 50,
"max_steps": 1250,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"total_flos": 6.81953956282368e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}