|
{ |
|
"best_metric": 0.9622508792497069, |
|
"best_model_checkpoint": "ky-finetuned-skindiseaseicthuawei32/checkpoint-3000", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 135.11822509765625, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3894, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8060961313012895, |
|
"eval_loss": 0.6159797310829163, |
|
"eval_runtime": 59.1496, |
|
"eval_samples_per_second": 72.105, |
|
"eval_steps_per_second": 2.265, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 114.53544616699219, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.6543, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8635404454865182, |
|
"eval_loss": 0.43777740001678467, |
|
"eval_runtime": 58.9516, |
|
"eval_samples_per_second": 72.348, |
|
"eval_steps_per_second": 2.273, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 72.00625610351562, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.471, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9160609613130128, |
|
"eval_loss": 0.2565539479255676, |
|
"eval_runtime": 59.0158, |
|
"eval_samples_per_second": 72.269, |
|
"eval_steps_per_second": 2.271, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 76.05452728271484, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3853, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9134818288393904, |
|
"eval_loss": 0.2498469352722168, |
|
"eval_runtime": 59.1502, |
|
"eval_samples_per_second": 72.105, |
|
"eval_steps_per_second": 2.265, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 110.07782745361328, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3225, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9289566236811254, |
|
"eval_loss": 0.21574878692626953, |
|
"eval_runtime": 59.1951, |
|
"eval_samples_per_second": 72.05, |
|
"eval_steps_per_second": 2.264, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 54.6344108581543, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.2769, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9406799531066823, |
|
"eval_loss": 0.1746564507484436, |
|
"eval_runtime": 59.244, |
|
"eval_samples_per_second": 71.99, |
|
"eval_steps_per_second": 2.262, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 49.57483673095703, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.2364, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.948651817116061, |
|
"eval_loss": 0.15020643174648285, |
|
"eval_runtime": 59.6495, |
|
"eval_samples_per_second": 71.501, |
|
"eval_steps_per_second": 2.246, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 85.14453125, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.2005, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9547479484173506, |
|
"eval_loss": 0.12820766866207123, |
|
"eval_runtime": 59.7687, |
|
"eval_samples_per_second": 71.358, |
|
"eval_steps_per_second": 2.242, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 20.916675567626953, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1737, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9596717467760844, |
|
"eval_loss": 0.11289093643426895, |
|
"eval_runtime": 59.3074, |
|
"eval_samples_per_second": 71.913, |
|
"eval_steps_per_second": 2.259, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 45.390342712402344, |
|
"learning_rate": 0.0, |
|
"loss": 0.1468, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9622508792497069, |
|
"eval_loss": 0.10576048493385315, |
|
"eval_runtime": 59.8846, |
|
"eval_samples_per_second": 71.22, |
|
"eval_steps_per_second": 2.238, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3000, |
|
"total_flos": 3.922659491677471e+19, |
|
"train_loss": 0.4256939633687337, |
|
"train_runtime": 8577.8057, |
|
"train_samples_per_second": 44.749, |
|
"train_steps_per_second": 0.35 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.922659491677471e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|