{ "best_metric": 0.6914285714285714, "best_model_checkpoint": "dinov2-base-finetuned-eurosat/checkpoint-308", "epoch": 30.0, "eval_steps": 500, "global_step": 330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.91, "grad_norm": 71.39833068847656, "learning_rate": 1.5151515151515153e-05, "loss": 6.646, "step": 10 }, { "epoch": 1.0, "eval_accuracy": 0.004285714285714286, "eval_loss": 6.344563961029053, "eval_runtime": 12.2748, "eval_samples_per_second": 57.027, "eval_steps_per_second": 0.896, "step": 11 }, { "epoch": 1.82, "grad_norm": 36.275108337402344, "learning_rate": 3.0303030303030306e-05, "loss": 6.0586, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.037142857142857144, "eval_loss": 5.812839031219482, "eval_runtime": 11.6948, "eval_samples_per_second": 59.856, "eval_steps_per_second": 0.941, "step": 22 }, { "epoch": 2.73, "grad_norm": 78.4278564453125, "learning_rate": 4.545454545454546e-05, "loss": 4.9553, "step": 30 }, { "epoch": 3.0, "eval_accuracy": 0.24285714285714285, "eval_loss": 4.52340030670166, "eval_runtime": 11.5613, "eval_samples_per_second": 60.547, "eval_steps_per_second": 0.951, "step": 33 }, { "epoch": 3.64, "grad_norm": 94.20513153076172, "learning_rate": 4.882154882154882e-05, "loss": 3.2097, "step": 40 }, { "epoch": 4.0, "eval_accuracy": 0.48428571428571426, "eval_loss": 3.1874964237213135, "eval_runtime": 11.6294, "eval_samples_per_second": 60.192, "eval_steps_per_second": 0.946, "step": 44 }, { "epoch": 4.55, "grad_norm": 55.16205596923828, "learning_rate": 4.713804713804714e-05, "loss": 1.6208, "step": 50 }, { "epoch": 5.0, "eval_accuracy": 0.5957142857142858, "eval_loss": 2.3652451038360596, "eval_runtime": 11.6572, "eval_samples_per_second": 60.048, "eval_steps_per_second": 0.944, "step": 55 }, { "epoch": 5.45, "grad_norm": 28.252750396728516, "learning_rate": 4.545454545454546e-05, "loss": 0.7822, "step": 60 }, { "epoch": 6.0, "eval_accuracy": 0.6485714285714286, "eval_loss": 2.007438898086548, "eval_runtime": 11.7326, "eval_samples_per_second": 59.663, "eval_steps_per_second": 0.938, "step": 66 }, { "epoch": 6.36, "grad_norm": 17.972673416137695, "learning_rate": 4.3771043771043774e-05, "loss": 0.3699, "step": 70 }, { "epoch": 7.0, "eval_accuracy": 0.66, "eval_loss": 1.9254851341247559, "eval_runtime": 11.7512, "eval_samples_per_second": 59.569, "eval_steps_per_second": 0.936, "step": 77 }, { "epoch": 7.27, "grad_norm": 21.875259399414062, "learning_rate": 4.208754208754209e-05, "loss": 0.1745, "step": 80 }, { "epoch": 8.0, "eval_accuracy": 0.6557142857142857, "eval_loss": 1.865968942642212, "eval_runtime": 11.609, "eval_samples_per_second": 60.298, "eval_steps_per_second": 0.948, "step": 88 }, { "epoch": 8.18, "grad_norm": 13.34464168548584, "learning_rate": 4.0404040404040405e-05, "loss": 0.1285, "step": 90 }, { "epoch": 9.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 1.8786249160766602, "eval_runtime": 11.6967, "eval_samples_per_second": 59.846, "eval_steps_per_second": 0.94, "step": 99 }, { "epoch": 9.09, "grad_norm": 11.297475814819336, "learning_rate": 3.872053872053872e-05, "loss": 0.1178, "step": 100 }, { "epoch": 10.0, "grad_norm": 16.153575897216797, "learning_rate": 3.7037037037037037e-05, "loss": 0.0883, "step": 110 }, { "epoch": 10.0, "eval_accuracy": 0.6585714285714286, "eval_loss": 1.8617857694625854, "eval_runtime": 11.6432, "eval_samples_per_second": 60.121, "eval_steps_per_second": 0.945, "step": 110 }, { "epoch": 10.91, "grad_norm": 13.22706127166748, "learning_rate": 3.535353535353535e-05, "loss": 0.0721, "step": 120 }, { "epoch": 11.0, "eval_accuracy": 0.6514285714285715, "eval_loss": 1.9431724548339844, "eval_runtime": 12.6039, "eval_samples_per_second": 55.538, "eval_steps_per_second": 0.873, "step": 121 }, { "epoch": 11.82, "grad_norm": 8.195013046264648, "learning_rate": 3.3670033670033675e-05, "loss": 0.0693, "step": 130 }, { "epoch": 12.0, "eval_accuracy": 0.6642857142857143, "eval_loss": 1.873042345046997, "eval_runtime": 11.7524, "eval_samples_per_second": 59.562, "eval_steps_per_second": 0.936, "step": 132 }, { "epoch": 12.73, "grad_norm": 9.13159465789795, "learning_rate": 3.198653198653199e-05, "loss": 0.0901, "step": 140 }, { "epoch": 13.0, "eval_accuracy": 0.6557142857142857, "eval_loss": 1.8676621913909912, "eval_runtime": 11.7011, "eval_samples_per_second": 59.823, "eval_steps_per_second": 0.94, "step": 143 }, { "epoch": 13.64, "grad_norm": 5.170494556427002, "learning_rate": 3.0303030303030306e-05, "loss": 0.0608, "step": 150 }, { "epoch": 14.0, "eval_accuracy": 0.6757142857142857, "eval_loss": 1.846497654914856, "eval_runtime": 12.5066, "eval_samples_per_second": 55.971, "eval_steps_per_second": 0.88, "step": 154 }, { "epoch": 14.55, "grad_norm": 4.774472713470459, "learning_rate": 2.8619528619528618e-05, "loss": 0.0443, "step": 160 }, { "epoch": 15.0, "eval_accuracy": 0.6642857142857143, "eval_loss": 1.8421980142593384, "eval_runtime": 11.808, "eval_samples_per_second": 59.282, "eval_steps_per_second": 0.932, "step": 165 }, { "epoch": 15.45, "grad_norm": 2.623682737350464, "learning_rate": 2.6936026936026937e-05, "loss": 0.0552, "step": 170 }, { "epoch": 16.0, "eval_accuracy": 0.6585714285714286, "eval_loss": 1.9717400074005127, "eval_runtime": 11.7743, "eval_samples_per_second": 59.451, "eval_steps_per_second": 0.934, "step": 176 }, { "epoch": 16.36, "grad_norm": 3.4440066814422607, "learning_rate": 2.5252525252525256e-05, "loss": 0.0416, "step": 180 }, { "epoch": 17.0, "eval_accuracy": 0.6657142857142857, "eval_loss": 1.8076777458190918, "eval_runtime": 11.7226, "eval_samples_per_second": 59.714, "eval_steps_per_second": 0.938, "step": 187 }, { "epoch": 17.27, "grad_norm": 8.230661392211914, "learning_rate": 2.356902356902357e-05, "loss": 0.0366, "step": 190 }, { "epoch": 18.0, "eval_accuracy": 0.6742857142857143, "eval_loss": 1.8198397159576416, "eval_runtime": 11.6594, "eval_samples_per_second": 60.037, "eval_steps_per_second": 0.943, "step": 198 }, { "epoch": 18.18, "grad_norm": 3.6574606895446777, "learning_rate": 2.1885521885521887e-05, "loss": 0.0313, "step": 200 }, { "epoch": 19.0, "eval_accuracy": 0.6757142857142857, "eval_loss": 1.8081269264221191, "eval_runtime": 11.957, "eval_samples_per_second": 58.543, "eval_steps_per_second": 0.92, "step": 209 }, { "epoch": 19.09, "grad_norm": 4.515919208526611, "learning_rate": 2.0202020202020203e-05, "loss": 0.0272, "step": 210 }, { "epoch": 20.0, "grad_norm": 4.542725086212158, "learning_rate": 1.8518518518518518e-05, "loss": 0.0296, "step": 220 }, { "epoch": 20.0, "eval_accuracy": 0.6785714285714286, "eval_loss": 1.776505947113037, "eval_runtime": 11.5903, "eval_samples_per_second": 60.395, "eval_steps_per_second": 0.949, "step": 220 }, { "epoch": 20.91, "grad_norm": 2.6347365379333496, "learning_rate": 1.6835016835016837e-05, "loss": 0.0215, "step": 230 }, { "epoch": 21.0, "eval_accuracy": 0.6828571428571428, "eval_loss": 1.6916331052780151, "eval_runtime": 13.3341, "eval_samples_per_second": 52.497, "eval_steps_per_second": 0.825, "step": 231 }, { "epoch": 21.82, "grad_norm": 0.4444705545902252, "learning_rate": 1.5151515151515153e-05, "loss": 0.0144, "step": 240 }, { "epoch": 22.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 1.7237095832824707, "eval_runtime": 11.4907, "eval_samples_per_second": 60.919, "eval_steps_per_second": 0.957, "step": 242 }, { "epoch": 22.73, "grad_norm": 4.013304710388184, "learning_rate": 1.3468013468013468e-05, "loss": 0.0108, "step": 250 }, { "epoch": 23.0, "eval_accuracy": 0.67, "eval_loss": 1.792176365852356, "eval_runtime": 11.5859, "eval_samples_per_second": 60.418, "eval_steps_per_second": 0.949, "step": 253 }, { "epoch": 23.64, "grad_norm": 0.9613437056541443, "learning_rate": 1.1784511784511786e-05, "loss": 0.0232, "step": 260 }, { "epoch": 24.0, "eval_accuracy": 0.6828571428571428, "eval_loss": 1.7594307661056519, "eval_runtime": 12.8499, "eval_samples_per_second": 54.475, "eval_steps_per_second": 0.856, "step": 264 }, { "epoch": 24.55, "grad_norm": 2.5503318309783936, "learning_rate": 1.0101010101010101e-05, "loss": 0.0129, "step": 270 }, { "epoch": 25.0, "eval_accuracy": 0.6828571428571428, "eval_loss": 1.7361136674880981, "eval_runtime": 11.7158, "eval_samples_per_second": 59.749, "eval_steps_per_second": 0.939, "step": 275 }, { "epoch": 25.45, "grad_norm": 5.675755977630615, "learning_rate": 8.417508417508419e-06, "loss": 0.0093, "step": 280 }, { "epoch": 26.0, "eval_accuracy": 0.6828571428571428, "eval_loss": 1.7426681518554688, "eval_runtime": 12.593, "eval_samples_per_second": 55.586, "eval_steps_per_second": 0.873, "step": 286 }, { "epoch": 26.36, "grad_norm": 2.090123176574707, "learning_rate": 6.734006734006734e-06, "loss": 0.0067, "step": 290 }, { "epoch": 27.0, "eval_accuracy": 0.69, "eval_loss": 1.730440378189087, "eval_runtime": 11.8655, "eval_samples_per_second": 58.995, "eval_steps_per_second": 0.927, "step": 297 }, { "epoch": 27.27, "grad_norm": 0.6074270009994507, "learning_rate": 5.050505050505051e-06, "loss": 0.0013, "step": 300 }, { "epoch": 28.0, "eval_accuracy": 0.6914285714285714, "eval_loss": 1.726584792137146, "eval_runtime": 11.8751, "eval_samples_per_second": 58.947, "eval_steps_per_second": 0.926, "step": 308 }, { "epoch": 28.18, "grad_norm": 0.04077678918838501, "learning_rate": 3.367003367003367e-06, "loss": 0.0031, "step": 310 }, { "epoch": 29.0, "eval_accuracy": 0.69, "eval_loss": 1.7368921041488647, "eval_runtime": 11.7621, "eval_samples_per_second": 59.513, "eval_steps_per_second": 0.935, "step": 319 }, { "epoch": 29.09, "grad_norm": 0.32179221510887146, "learning_rate": 1.6835016835016836e-06, "loss": 0.002, "step": 320 }, { "epoch": 30.0, "grad_norm": 0.05003494769334793, "learning_rate": 0.0, "loss": 0.0019, "step": 330 }, { "epoch": 30.0, "eval_accuracy": 0.69, "eval_loss": 1.7391921281814575, "eval_runtime": 12.8063, "eval_samples_per_second": 54.661, "eval_steps_per_second": 0.859, "step": 330 }, { "epoch": 30.0, "step": 330, "total_flos": 5.2828663104e+18, "train_loss": 0.7520242673994014, "train_runtime": 3943.4049, "train_samples_per_second": 21.301, "train_steps_per_second": 0.084 } ], "logging_steps": 10, "max_steps": 330, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 5.2828663104e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }