{ "best_metric": 0.4597996771335602, "best_model_checkpoint": "./vit-base-brain-mri\\checkpoint-1440", "epoch": 20.0, "eval_steps": 500, "global_step": 1440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6097560975609756, "eval_loss": 0.998555600643158, "eval_runtime": 4.892, "eval_samples_per_second": 117.334, "eval_steps_per_second": 14.718, "step": 72 }, { "epoch": 1.3888888888888888, "grad_norm": 2.622373342514038, "learning_rate": 0.00027916666666666666, "loss": 1.098, "step": 100 }, { "epoch": 2.0, "eval_accuracy": 0.7003484320557491, "eval_loss": 0.8445045948028564, "eval_runtime": 4.8929, "eval_samples_per_second": 117.312, "eval_steps_per_second": 14.715, "step": 144 }, { "epoch": 2.7777777777777777, "grad_norm": 4.336460590362549, "learning_rate": 0.00025833333333333334, "loss": 0.7895, "step": 200 }, { "epoch": 3.0, "eval_accuracy": 0.7526132404181185, "eval_loss": 0.7317853569984436, "eval_runtime": 4.7479, "eval_samples_per_second": 120.896, "eval_steps_per_second": 15.165, "step": 216 }, { "epoch": 4.0, "eval_accuracy": 0.7473867595818815, "eval_loss": 0.6842443943023682, "eval_runtime": 4.5716, "eval_samples_per_second": 125.558, "eval_steps_per_second": 15.749, "step": 288 }, { "epoch": 4.166666666666667, "grad_norm": 3.367997169494629, "learning_rate": 0.00023749999999999997, "loss": 0.6629, "step": 300 }, { "epoch": 5.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6328176856040955, "eval_runtime": 4.5619, "eval_samples_per_second": 125.825, "eval_steps_per_second": 15.783, "step": 360 }, { "epoch": 5.555555555555555, "grad_norm": 2.0712759494781494, "learning_rate": 0.00021666666666666666, "loss": 0.5966, "step": 400 }, { "epoch": 6.0, "eval_accuracy": 0.8101045296167247, "eval_loss": 0.5956693291664124, "eval_runtime": 4.6073, "eval_samples_per_second": 124.585, "eval_steps_per_second": 15.627, "step": 432 }, { "epoch": 6.944444444444445, "grad_norm": 1.491408348083496, "learning_rate": 0.00019583333333333331, "loss": 0.5546, "step": 500 }, { "epoch": 7.0, "eval_accuracy": 0.8118466898954704, "eval_loss": 0.5646191835403442, "eval_runtime": 4.5982, "eval_samples_per_second": 124.83, "eval_steps_per_second": 15.658, "step": 504 }, { "epoch": 8.0, "eval_accuracy": 0.8048780487804879, "eval_loss": 0.5646994709968567, "eval_runtime": 4.6362, "eval_samples_per_second": 123.809, "eval_steps_per_second": 15.53, "step": 576 }, { "epoch": 8.333333333333334, "grad_norm": 1.5481159687042236, "learning_rate": 0.000175, "loss": 0.5113, "step": 600 }, { "epoch": 9.0, "eval_accuracy": 0.8275261324041812, "eval_loss": 0.5340307354927063, "eval_runtime": 4.6122, "eval_samples_per_second": 124.453, "eval_steps_per_second": 15.611, "step": 648 }, { "epoch": 9.722222222222221, "grad_norm": 2.323460817337036, "learning_rate": 0.00015416666666666663, "loss": 0.4882, "step": 700 }, { "epoch": 10.0, "eval_accuracy": 0.8327526132404182, "eval_loss": 0.5189912915229797, "eval_runtime": 4.6128, "eval_samples_per_second": 124.435, "eval_steps_per_second": 15.609, "step": 720 }, { "epoch": 11.0, "eval_accuracy": 0.8327526132404182, "eval_loss": 0.5197045803070068, "eval_runtime": 4.6509, "eval_samples_per_second": 123.417, "eval_steps_per_second": 15.481, "step": 792 }, { "epoch": 11.11111111111111, "grad_norm": 2.1213157176971436, "learning_rate": 0.0001333333333333333, "loss": 0.4789, "step": 800 }, { "epoch": 12.0, "eval_accuracy": 0.8257839721254355, "eval_loss": 0.5001842975616455, "eval_runtime": 4.5564, "eval_samples_per_second": 125.977, "eval_steps_per_second": 15.802, "step": 864 }, { "epoch": 12.5, "grad_norm": 1.9881811141967773, "learning_rate": 0.0001125, "loss": 0.4582, "step": 900 }, { "epoch": 13.0, "eval_accuracy": 0.8310104529616724, "eval_loss": 0.4956616163253784, "eval_runtime": 4.5548, "eval_samples_per_second": 126.02, "eval_steps_per_second": 15.807, "step": 936 }, { "epoch": 13.88888888888889, "grad_norm": 2.0128438472747803, "learning_rate": 9.166666666666667e-05, "loss": 0.4426, "step": 1000 }, { "epoch": 14.0, "eval_accuracy": 0.8310104529616724, "eval_loss": 0.4820682108402252, "eval_runtime": 4.6027, "eval_samples_per_second": 124.708, "eval_steps_per_second": 15.643, "step": 1008 }, { "epoch": 15.0, "eval_accuracy": 0.8466898954703833, "eval_loss": 0.4706496000289917, "eval_runtime": 4.5952, "eval_samples_per_second": 124.913, "eval_steps_per_second": 15.669, "step": 1080 }, { "epoch": 15.277777777777779, "grad_norm": 1.9610830545425415, "learning_rate": 7.083333333333332e-05, "loss": 0.4328, "step": 1100 }, { "epoch": 16.0, "eval_accuracy": 0.8153310104529616, "eval_loss": 0.4820646047592163, "eval_runtime": 4.6291, "eval_samples_per_second": 123.999, "eval_steps_per_second": 15.554, "step": 1152 }, { "epoch": 16.666666666666668, "grad_norm": 1.850261926651001, "learning_rate": 4.9999999999999996e-05, "loss": 0.432, "step": 1200 }, { "epoch": 17.0, "eval_accuracy": 0.8275261324041812, "eval_loss": 0.4991794228553772, "eval_runtime": 4.5987, "eval_samples_per_second": 124.818, "eval_steps_per_second": 15.657, "step": 1224 }, { "epoch": 18.0, "eval_accuracy": 0.8344947735191638, "eval_loss": 0.4799434542655945, "eval_runtime": 4.621, "eval_samples_per_second": 124.216, "eval_steps_per_second": 15.581, "step": 1296 }, { "epoch": 18.055555555555557, "grad_norm": 1.6390336751937866, "learning_rate": 2.9166666666666663e-05, "loss": 0.4196, "step": 1300 }, { "epoch": 19.0, "eval_accuracy": 0.8310104529616724, "eval_loss": 0.4837837815284729, "eval_runtime": 4.5576, "eval_samples_per_second": 125.943, "eval_steps_per_second": 15.798, "step": 1368 }, { "epoch": 19.444444444444443, "grad_norm": 3.292024612426758, "learning_rate": 8.333333333333332e-06, "loss": 0.4287, "step": 1400 }, { "epoch": 20.0, "eval_accuracy": 0.8658536585365854, "eval_loss": 0.4597996771335602, "eval_runtime": 4.6098, "eval_samples_per_second": 124.517, "eval_steps_per_second": 15.619, "step": 1440 }, { "epoch": 20.0, "step": 1440, "total_flos": 3.558495949305938e+18, "train_loss": 0.5529726452297634, "train_runtime": 477.7876, "train_samples_per_second": 96.11, "train_steps_per_second": 3.014 } ], "logging_steps": 100, "max_steps": 1440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.558495949305938e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }