|
{ |
|
"best_metric": 0.9320297951582869, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4350", |
|
"epoch": 9.988518943742825, |
|
"eval_steps": 500, |
|
"global_step": 4350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9988518943742825, |
|
"eval_accuracy": 0.9981441687829654, |
|
"eval_f1": 0.8958333333333334, |
|
"eval_loss": 0.006047643255442381, |
|
"eval_precision": 0.8713656387665198, |
|
"eval_recall": 0.9217148182665424, |
|
"eval_runtime": 14.9983, |
|
"eval_samples_per_second": 463.12, |
|
"eval_steps_per_second": 57.94, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.148105625717566, |
|
"grad_norm": 0.025908155366778374, |
|
"learning_rate": 4.4252873563218394e-05, |
|
"loss": 0.0156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9986604526553735, |
|
"eval_f1": 0.92, |
|
"eval_loss": 0.004434330388903618, |
|
"eval_precision": 0.9182915506035283, |
|
"eval_recall": 0.9217148182665424, |
|
"eval_runtime": 15.157, |
|
"eval_samples_per_second": 458.27, |
|
"eval_steps_per_second": 57.333, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.296211251435132, |
|
"grad_norm": 0.015398409217596054, |
|
"learning_rate": 3.850574712643678e-05, |
|
"loss": 0.0038, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.9988518943742823, |
|
"eval_accuracy": 0.9986883598917199, |
|
"eval_f1": 0.9181073703366698, |
|
"eval_loss": 0.0040275463834404945, |
|
"eval_precision": 0.8968888888888888, |
|
"eval_recall": 0.940354147250699, |
|
"eval_runtime": 15.1362, |
|
"eval_samples_per_second": 458.9, |
|
"eval_steps_per_second": 57.412, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 3.444316877152698, |
|
"grad_norm": 0.024467067793011665, |
|
"learning_rate": 3.275862068965517e-05, |
|
"loss": 0.0025, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9985837077554209, |
|
"eval_f1": 0.9215236346948141, |
|
"eval_loss": 0.004492571111768484, |
|
"eval_precision": 0.9077757685352622, |
|
"eval_recall": 0.9356943150046598, |
|
"eval_runtime": 15.2768, |
|
"eval_samples_per_second": 454.675, |
|
"eval_steps_per_second": 56.884, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 4.592422502870264, |
|
"grad_norm": 0.014663909561932087, |
|
"learning_rate": 2.7011494252873566e-05, |
|
"loss": 0.0016, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.998851894374282, |
|
"eval_accuracy": 0.9985906845645076, |
|
"eval_f1": 0.9138576779026217, |
|
"eval_loss": 0.0054086255840957165, |
|
"eval_precision": 0.9181561618062088, |
|
"eval_recall": 0.9095992544268406, |
|
"eval_runtime": 15.0107, |
|
"eval_samples_per_second": 462.737, |
|
"eval_steps_per_second": 57.892, |
|
"step": 2177 |
|
}, |
|
{ |
|
"epoch": 5.7405281285878305, |
|
"grad_norm": 0.002929804613813758, |
|
"learning_rate": 2.1264367816091954e-05, |
|
"loss": 0.0011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9986395222281137, |
|
"eval_f1": 0.9202965708989805, |
|
"eval_loss": 0.00529602263122797, |
|
"eval_precision": 0.9152073732718894, |
|
"eval_recall": 0.9254426840633737, |
|
"eval_runtime": 15.2091, |
|
"eval_samples_per_second": 456.701, |
|
"eval_steps_per_second": 57.137, |
|
"step": 2613 |
|
}, |
|
{ |
|
"epoch": 6.888633754305396, |
|
"grad_norm": 0.06796916574239731, |
|
"learning_rate": 1.5517241379310346e-05, |
|
"loss": 0.0009, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.998851894374282, |
|
"eval_accuracy": 0.9986604526553735, |
|
"eval_f1": 0.9314179796107507, |
|
"eval_loss": 0.006041177082806826, |
|
"eval_precision": 0.9262672811059908, |
|
"eval_recall": 0.9366262814538676, |
|
"eval_runtime": 15.0006, |
|
"eval_samples_per_second": 463.048, |
|
"eval_steps_per_second": 57.931, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9987511511734993, |
|
"eval_f1": 0.9290976058931859, |
|
"eval_loss": 0.005891016684472561, |
|
"eval_precision": 0.9181073703366697, |
|
"eval_recall": 0.940354147250699, |
|
"eval_runtime": 15.2005, |
|
"eval_samples_per_second": 456.957, |
|
"eval_steps_per_second": 57.169, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 8.036739380022961, |
|
"grad_norm": 0.0015728959115222096, |
|
"learning_rate": 9.770114942528738e-06, |
|
"loss": 0.0005, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.998851894374283, |
|
"eval_accuracy": 0.9987581279825859, |
|
"eval_f1": 0.9279404927940494, |
|
"eval_loss": 0.006726197898387909, |
|
"eval_precision": 0.9257884972170687, |
|
"eval_recall": 0.9301025163094129, |
|
"eval_runtime": 15.4019, |
|
"eval_samples_per_second": 450.984, |
|
"eval_steps_per_second": 56.422, |
|
"step": 3919 |
|
}, |
|
{ |
|
"epoch": 9.184845005740527, |
|
"grad_norm": 0.11025261133909225, |
|
"learning_rate": 4.022988505747127e-06, |
|
"loss": 0.0003, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.988518943742825, |
|
"eval_accuracy": 0.998772081600759, |
|
"eval_f1": 0.9320297951582869, |
|
"eval_loss": 0.007080046460032463, |
|
"eval_precision": 0.9311627906976744, |
|
"eval_recall": 0.9328984156570364, |
|
"eval_runtime": 15.2679, |
|
"eval_samples_per_second": 454.941, |
|
"eval_steps_per_second": 56.917, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 9.988518943742825, |
|
"step": 4350, |
|
"total_flos": 1.1084127968547612e+16, |
|
"train_loss": 0.003044272955806776, |
|
"train_runtime": 1809.5975, |
|
"train_samples_per_second": 153.852, |
|
"train_steps_per_second": 2.404 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1084127968547612e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|