|
{ |
|
"best_metric": 0.2750368118286133, |
|
"best_model_checkpoint": "./ryan_model314_3/checkpoint-550", |
|
"epoch": 0.88, |
|
"eval_steps": 50, |
|
"global_step": 550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.5017586946487427, |
|
"learning_rate": 0.000192, |
|
"loss": 0.4423, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 0.33861014246940613, |
|
"eval_na_accuracy": 0.904, |
|
"eval_ordinal_accuracy": 0.4629418472063854, |
|
"eval_ordinal_mae": 0.6577621472191316, |
|
"eval_runtime": 123.3898, |
|
"eval_samples_per_second": 8.104, |
|
"eval_steps_per_second": 1.013, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.8501819372177124, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 0.3088, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 0.3268783390522003, |
|
"eval_na_accuracy": 0.928, |
|
"eval_ordinal_accuracy": 0.5370581527936146, |
|
"eval_ordinal_mae": 0.5969413880658287, |
|
"eval_runtime": 43.4997, |
|
"eval_samples_per_second": 22.989, |
|
"eval_steps_per_second": 2.874, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.5424334406852722, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 0.316, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.3395713269710541, |
|
"eval_na_accuracy": 0.902, |
|
"eval_ordinal_accuracy": 0.5142531356898518, |
|
"eval_ordinal_mae": 0.6323422620227872, |
|
"eval_runtime": 43.339, |
|
"eval_samples_per_second": 23.074, |
|
"eval_steps_per_second": 2.884, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.2484453916549683, |
|
"learning_rate": 0.000168, |
|
"loss": 0.2821, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.32339948415756226, |
|
"eval_na_accuracy": 0.927, |
|
"eval_ordinal_accuracy": 0.5131128848346637, |
|
"eval_ordinal_mae": 0.6292874569299393, |
|
"eval_runtime": 42.0004, |
|
"eval_samples_per_second": 23.809, |
|
"eval_steps_per_second": 2.976, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.4807660579681396, |
|
"learning_rate": 0.00016, |
|
"loss": 0.2731, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.3313509225845337, |
|
"eval_na_accuracy": 0.925, |
|
"eval_ordinal_accuracy": 0.508551881413911, |
|
"eval_ordinal_mae": 0.5856009521101041, |
|
"eval_runtime": 55.6564, |
|
"eval_samples_per_second": 17.967, |
|
"eval_steps_per_second": 2.246, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.4179209470748901, |
|
"learning_rate": 0.000152, |
|
"loss": 0.2975, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.3036611080169678, |
|
"eval_na_accuracy": 0.927, |
|
"eval_ordinal_accuracy": 0.5963511972633979, |
|
"eval_ordinal_mae": 0.5690023564742932, |
|
"eval_runtime": 42.7034, |
|
"eval_samples_per_second": 23.417, |
|
"eval_steps_per_second": 2.927, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.7659221887588501, |
|
"learning_rate": 0.000144, |
|
"loss": 0.2609, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.3209022283554077, |
|
"eval_na_accuracy": 0.928, |
|
"eval_ordinal_accuracy": 0.5450399087799316, |
|
"eval_ordinal_mae": 0.5764862077817825, |
|
"eval_runtime": 43.1206, |
|
"eval_samples_per_second": 23.191, |
|
"eval_steps_per_second": 2.899, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0847711563110352, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 0.287, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.29075464606285095, |
|
"eval_na_accuracy": 0.931, |
|
"eval_ordinal_accuracy": 0.5826681870011402, |
|
"eval_ordinal_mae": 0.5458187616535902, |
|
"eval_runtime": 42.3269, |
|
"eval_samples_per_second": 23.626, |
|
"eval_steps_per_second": 2.953, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.9720218181610107, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 0.2905, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.30074238777160645, |
|
"eval_na_accuracy": 0.919, |
|
"eval_ordinal_accuracy": 0.5986316989737742, |
|
"eval_ordinal_mae": 0.548372159519042, |
|
"eval_runtime": 76.7524, |
|
"eval_samples_per_second": 13.029, |
|
"eval_steps_per_second": 1.629, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.8414099216461182, |
|
"learning_rate": 0.00012, |
|
"loss": 0.2574, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.28344637155532837, |
|
"eval_na_accuracy": 0.929, |
|
"eval_ordinal_accuracy": 0.6031927023945268, |
|
"eval_ordinal_mae": 0.5363022306512, |
|
"eval_runtime": 42.8484, |
|
"eval_samples_per_second": 23.338, |
|
"eval_steps_per_second": 2.917, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.5895617604255676, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 0.2855, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.2750368118286133, |
|
"eval_na_accuracy": 0.931, |
|
"eval_ordinal_accuracy": 0.6271379703534777, |
|
"eval_ordinal_mae": 0.5319093595330124, |
|
"eval_runtime": 42.3171, |
|
"eval_samples_per_second": 23.631, |
|
"eval_steps_per_second": 2.954, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"step": 550, |
|
"total_flos": 6.81953956282368e+17, |
|
"train_loss": 0.3000895881652832, |
|
"train_runtime": 2172.3633, |
|
"train_samples_per_second": 9.207, |
|
"train_steps_per_second": 0.575 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"total_flos": 6.81953956282368e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|