|
{
|
|
"best_metric": 0.4597996771335602,
|
|
"best_model_checkpoint": "./vit-base-brain-mri\\checkpoint-1440",
|
|
"epoch": 20.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1440,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": 0.6097560975609756,
|
|
"eval_loss": 0.998555600643158,
|
|
"eval_runtime": 4.892,
|
|
"eval_samples_per_second": 117.334,
|
|
"eval_steps_per_second": 14.718,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 1.3888888888888888,
|
|
"grad_norm": 2.622373342514038,
|
|
"learning_rate": 0.00027916666666666666,
|
|
"loss": 1.098,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": 0.7003484320557491,
|
|
"eval_loss": 0.8445045948028564,
|
|
"eval_runtime": 4.8929,
|
|
"eval_samples_per_second": 117.312,
|
|
"eval_steps_per_second": 14.715,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 4.336460590362549,
|
|
"learning_rate": 0.00025833333333333334,
|
|
"loss": 0.7895,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_accuracy": 0.7526132404181185,
|
|
"eval_loss": 0.7317853569984436,
|
|
"eval_runtime": 4.7479,
|
|
"eval_samples_per_second": 120.896,
|
|
"eval_steps_per_second": 15.165,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.7473867595818815,
|
|
"eval_loss": 0.6842443943023682,
|
|
"eval_runtime": 4.5716,
|
|
"eval_samples_per_second": 125.558,
|
|
"eval_steps_per_second": 15.749,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 3.367997169494629,
|
|
"learning_rate": 0.00023749999999999997,
|
|
"loss": 0.6629,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": 0.7857142857142857,
|
|
"eval_loss": 0.6328176856040955,
|
|
"eval_runtime": 4.5619,
|
|
"eval_samples_per_second": 125.825,
|
|
"eval_steps_per_second": 15.783,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 2.0712759494781494,
|
|
"learning_rate": 0.00021666666666666666,
|
|
"loss": 0.5966,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_accuracy": 0.8101045296167247,
|
|
"eval_loss": 0.5956693291664124,
|
|
"eval_runtime": 4.6073,
|
|
"eval_samples_per_second": 124.585,
|
|
"eval_steps_per_second": 15.627,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 6.944444444444445,
|
|
"grad_norm": 1.491408348083496,
|
|
"learning_rate": 0.00019583333333333331,
|
|
"loss": 0.5546,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_accuracy": 0.8118466898954704,
|
|
"eval_loss": 0.5646191835403442,
|
|
"eval_runtime": 4.5982,
|
|
"eval_samples_per_second": 124.83,
|
|
"eval_steps_per_second": 15.658,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.8048780487804879,
|
|
"eval_loss": 0.5646994709968567,
|
|
"eval_runtime": 4.6362,
|
|
"eval_samples_per_second": 123.809,
|
|
"eval_steps_per_second": 15.53,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 8.333333333333334,
|
|
"grad_norm": 1.5481159687042236,
|
|
"learning_rate": 0.000175,
|
|
"loss": 0.5113,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_accuracy": 0.8275261324041812,
|
|
"eval_loss": 0.5340307354927063,
|
|
"eval_runtime": 4.6122,
|
|
"eval_samples_per_second": 124.453,
|
|
"eval_steps_per_second": 15.611,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 9.722222222222221,
|
|
"grad_norm": 2.323460817337036,
|
|
"learning_rate": 0.00015416666666666663,
|
|
"loss": 0.4882,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"eval_accuracy": 0.8327526132404182,
|
|
"eval_loss": 0.5189912915229797,
|
|
"eval_runtime": 4.6128,
|
|
"eval_samples_per_second": 124.435,
|
|
"eval_steps_per_second": 15.609,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"eval_accuracy": 0.8327526132404182,
|
|
"eval_loss": 0.5197045803070068,
|
|
"eval_runtime": 4.6509,
|
|
"eval_samples_per_second": 123.417,
|
|
"eval_steps_per_second": 15.481,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 11.11111111111111,
|
|
"grad_norm": 2.1213157176971436,
|
|
"learning_rate": 0.0001333333333333333,
|
|
"loss": 0.4789,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.8257839721254355,
|
|
"eval_loss": 0.5001842975616455,
|
|
"eval_runtime": 4.5564,
|
|
"eval_samples_per_second": 125.977,
|
|
"eval_steps_per_second": 15.802,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 12.5,
|
|
"grad_norm": 1.9881811141967773,
|
|
"learning_rate": 0.0001125,
|
|
"loss": 0.4582,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_accuracy": 0.8310104529616724,
|
|
"eval_loss": 0.4956616163253784,
|
|
"eval_runtime": 4.5548,
|
|
"eval_samples_per_second": 126.02,
|
|
"eval_steps_per_second": 15.807,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 13.88888888888889,
|
|
"grad_norm": 2.0128438472747803,
|
|
"learning_rate": 9.166666666666667e-05,
|
|
"loss": 0.4426,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"eval_accuracy": 0.8310104529616724,
|
|
"eval_loss": 0.4820682108402252,
|
|
"eval_runtime": 4.6027,
|
|
"eval_samples_per_second": 124.708,
|
|
"eval_steps_per_second": 15.643,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"eval_accuracy": 0.8466898954703833,
|
|
"eval_loss": 0.4706496000289917,
|
|
"eval_runtime": 4.5952,
|
|
"eval_samples_per_second": 124.913,
|
|
"eval_steps_per_second": 15.669,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 15.277777777777779,
|
|
"grad_norm": 1.9610830545425415,
|
|
"learning_rate": 7.083333333333332e-05,
|
|
"loss": 0.4328,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.8153310104529616,
|
|
"eval_loss": 0.4820646047592163,
|
|
"eval_runtime": 4.6291,
|
|
"eval_samples_per_second": 123.999,
|
|
"eval_steps_per_second": 15.554,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 16.666666666666668,
|
|
"grad_norm": 1.850261926651001,
|
|
"learning_rate": 4.9999999999999996e-05,
|
|
"loss": 0.432,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_accuracy": 0.8275261324041812,
|
|
"eval_loss": 0.4991794228553772,
|
|
"eval_runtime": 4.5987,
|
|
"eval_samples_per_second": 124.818,
|
|
"eval_steps_per_second": 15.657,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"eval_accuracy": 0.8344947735191638,
|
|
"eval_loss": 0.4799434542655945,
|
|
"eval_runtime": 4.621,
|
|
"eval_samples_per_second": 124.216,
|
|
"eval_steps_per_second": 15.581,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 18.055555555555557,
|
|
"grad_norm": 1.6390336751937866,
|
|
"learning_rate": 2.9166666666666663e-05,
|
|
"loss": 0.4196,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"eval_accuracy": 0.8310104529616724,
|
|
"eval_loss": 0.4837837815284729,
|
|
"eval_runtime": 4.5576,
|
|
"eval_samples_per_second": 125.943,
|
|
"eval_steps_per_second": 15.798,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 19.444444444444443,
|
|
"grad_norm": 3.292024612426758,
|
|
"learning_rate": 8.333333333333332e-06,
|
|
"loss": 0.4287,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.8658536585365854,
|
|
"eval_loss": 0.4597996771335602,
|
|
"eval_runtime": 4.6098,
|
|
"eval_samples_per_second": 124.517,
|
|
"eval_steps_per_second": 15.619,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"step": 1440,
|
|
"total_flos": 3.558495949305938e+18,
|
|
"train_loss": 0.5529726452297634,
|
|
"train_runtime": 477.7876,
|
|
"train_samples_per_second": 96.11,
|
|
"train_steps_per_second": 3.014
|
|
}
|
|
],
|
|
"logging_steps": 100,
|
|
"max_steps": 1440,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 20,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.558495949305938e+18,
|
|
"train_batch_size": 32,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|