|
{ |
|
"best_metric": 0.9905983805656433, |
|
"best_model_checkpoint": "th_cl_13epochs_lora_pos_neg/checkpoint-96", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 96, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 28.00575065612793, |
|
"learning_rate": 8.958333333333335e-05, |
|
"loss": 0.4959, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 26.350143432617188, |
|
"learning_rate": 7.916666666666666e-05, |
|
"loss": 0.6218, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 31.33298110961914, |
|
"learning_rate": 6.875e-05, |
|
"loss": 0.7442, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5111111111111111, |
|
"eval_balanced_accuracy": 0.515, |
|
"eval_loss": 1.0410274267196655, |
|
"eval_runtime": 86.3882, |
|
"eval_samples_per_second": 0.521, |
|
"eval_steps_per_second": 0.069, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 32.04572677612305, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 0.3933, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 27.043235778808594, |
|
"learning_rate": 4.791666666666667e-05, |
|
"loss": 0.3407, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 20.865083694458008, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4596, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5111111111111111, |
|
"eval_balanced_accuracy": 0.5118577075098814, |
|
"eval_loss": 1.0013209581375122, |
|
"eval_runtime": 87.3168, |
|
"eval_samples_per_second": 0.515, |
|
"eval_steps_per_second": 0.069, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.1875, |
|
"grad_norm": 7.867964744567871, |
|
"learning_rate": 2.7083333333333332e-05, |
|
"loss": 0.317, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 21.569629669189453, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.2957, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.8125, |
|
"grad_norm": 14.822030067443848, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2864, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5111111111111111, |
|
"eval_balanced_accuracy": 0.5118577075098814, |
|
"eval_loss": 0.9905983805656433, |
|
"eval_runtime": 86.9479, |
|
"eval_samples_per_second": 0.518, |
|
"eval_steps_per_second": 0.069, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 96, |
|
"total_flos": 1.6176865332953088e+16, |
|
"train_loss": 0.4266343352695306, |
|
"train_runtime": 4808.092, |
|
"train_samples_per_second": 0.157, |
|
"train_steps_per_second": 0.02 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.6176865332953088e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|