{ "best_metric": 0.9026717557251909, "best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-516", "epoch": 9.898305084745763, "eval_steps": 500, "global_step": 730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13559322033898305, "grad_norm": 0.6092599034309387, "learning_rate": 0.004931506849315068, "loss": 0.6182, "step": 10 }, { "epoch": 0.2711864406779661, "grad_norm": 9.652957916259766, "learning_rate": 0.0048630136986301375, "loss": 0.6302, "step": 20 }, { "epoch": 0.4067796610169492, "grad_norm": 1.760549783706665, "learning_rate": 0.004794520547945206, "loss": 0.645, "step": 30 }, { "epoch": 0.5423728813559322, "grad_norm": 0.8188679814338684, "learning_rate": 0.004726027397260274, "loss": 0.4782, "step": 40 }, { "epoch": 0.6779661016949152, "grad_norm": 3.1816465854644775, "learning_rate": 0.004657534246575342, "loss": 0.7114, "step": 50 }, { "epoch": 0.8135593220338984, "grad_norm": 3.094874858856201, "learning_rate": 0.004589041095890411, "loss": 0.5755, "step": 60 }, { "epoch": 0.9491525423728814, "grad_norm": 1.603766918182373, "learning_rate": 0.00452054794520548, "loss": 0.6158, "step": 70 }, { "epoch": 0.9898305084745763, "eval_accuracy": 0.7423664122137404, "eval_f1": 0.42606790799561883, "eval_loss": 0.567380964756012, "eval_precision": 0.3711832061068702, "eval_recall": 0.5, "eval_runtime": 2.2609, "eval_samples_per_second": 231.762, "eval_steps_per_second": 14.596, "step": 73 }, { "epoch": 1.0847457627118644, "grad_norm": 3.2192718982696533, "learning_rate": 0.004452054794520548, "loss": 0.5565, "step": 80 }, { "epoch": 1.2203389830508475, "grad_norm": 1.6243762969970703, "learning_rate": 0.004383561643835616, "loss": 0.5592, "step": 90 }, { "epoch": 1.3559322033898304, "grad_norm": 2.504822015762329, "learning_rate": 0.004315068493150685, "loss": 0.62, "step": 100 }, { "epoch": 1.4915254237288136, "grad_norm": 1.8308496475219727, "learning_rate": 0.0042465753424657535, "loss": 0.5712, "step": 110 }, { "epoch": 1.6271186440677967, "grad_norm": 0.7807316184043884, "learning_rate": 0.004178082191780822, "loss": 0.531, "step": 120 }, { "epoch": 1.7627118644067796, "grad_norm": 2.2005345821380615, "learning_rate": 0.00410958904109589, "loss": 0.5394, "step": 130 }, { "epoch": 1.8983050847457628, "grad_norm": 1.179870843887329, "learning_rate": 0.004041095890410959, "loss": 0.5322, "step": 140 }, { "epoch": 1.993220338983051, "eval_accuracy": 0.7729007633587787, "eval_f1": 0.5396280463945307, "eval_loss": 0.48395079374313354, "eval_precision": 0.8828740157480315, "eval_recall": 0.5592592592592592, "eval_runtime": 2.4443, "eval_samples_per_second": 214.373, "eval_steps_per_second": 13.501, "step": 147 }, { "epoch": 2.0338983050847457, "grad_norm": 0.4185134768486023, "learning_rate": 0.003972602739726027, "loss": 0.5459, "step": 150 }, { "epoch": 2.169491525423729, "grad_norm": 3.1739585399627686, "learning_rate": 0.003904109589041096, "loss": 0.5155, "step": 160 }, { "epoch": 2.305084745762712, "grad_norm": 1.602779507637024, "learning_rate": 0.0038356164383561643, "loss": 0.5648, "step": 170 }, { "epoch": 2.440677966101695, "grad_norm": 0.7795351147651672, "learning_rate": 0.003767123287671233, "loss": 0.4647, "step": 180 }, { "epoch": 2.576271186440678, "grad_norm": 0.20109909772872925, "learning_rate": 0.0036986301369863013, "loss": 0.4323, "step": 190 }, { "epoch": 2.711864406779661, "grad_norm": 3.717724561691284, "learning_rate": 0.00363013698630137, "loss": 0.5199, "step": 200 }, { "epoch": 2.847457627118644, "grad_norm": 1.0907886028289795, "learning_rate": 0.003561643835616438, "loss": 0.4441, "step": 210 }, { "epoch": 2.983050847457627, "grad_norm": 1.0848020315170288, "learning_rate": 0.003493150684931507, "loss": 0.4139, "step": 220 }, { "epoch": 2.9966101694915253, "eval_accuracy": 0.7938931297709924, "eval_f1": 0.6057185854025584, "eval_loss": 0.3726596534252167, "eval_precision": 0.8913480885311871, "eval_recall": 0.6, "eval_runtime": 2.2564, "eval_samples_per_second": 232.227, "eval_steps_per_second": 14.625, "step": 221 }, { "epoch": 3.1186440677966103, "grad_norm": 0.5923116207122803, "learning_rate": 0.003424657534246575, "loss": 0.4562, "step": 230 }, { "epoch": 3.2542372881355934, "grad_norm": 0.6732156872749329, "learning_rate": 0.003356164383561644, "loss": 0.4198, "step": 240 }, { "epoch": 3.389830508474576, "grad_norm": 0.44104114174842834, "learning_rate": 0.003287671232876712, "loss": 0.4247, "step": 250 }, { "epoch": 3.5254237288135593, "grad_norm": 1.1133421659469604, "learning_rate": 0.0032191780821917808, "loss": 0.4094, "step": 260 }, { "epoch": 3.6610169491525424, "grad_norm": 0.5410763621330261, "learning_rate": 0.003150684931506849, "loss": 0.4489, "step": 270 }, { "epoch": 3.7966101694915255, "grad_norm": 1.290757656097412, "learning_rate": 0.003082191780821918, "loss": 0.4028, "step": 280 }, { "epoch": 3.9322033898305087, "grad_norm": 1.2204492092132568, "learning_rate": 0.0030136986301369864, "loss": 0.3979, "step": 290 }, { "epoch": 4.0, "eval_accuracy": 0.7309160305343512, "eval_f1": 0.7167969274053518, "eval_loss": 0.5270416736602783, "eval_precision": 0.7405170401493931, "eval_recall": 0.8139293535180425, "eval_runtime": 2.2872, "eval_samples_per_second": 229.106, "eval_steps_per_second": 14.428, "step": 295 }, { "epoch": 4.067796610169491, "grad_norm": 0.5202679634094238, "learning_rate": 0.002945205479452055, "loss": 0.3858, "step": 300 }, { "epoch": 4.203389830508475, "grad_norm": 0.20694881677627563, "learning_rate": 0.0028767123287671234, "loss": 0.426, "step": 310 }, { "epoch": 4.338983050847458, "grad_norm": 0.9228326678276062, "learning_rate": 0.002808219178082192, "loss": 0.3419, "step": 320 }, { "epoch": 4.47457627118644, "grad_norm": 0.5701133608818054, "learning_rate": 0.0027397260273972603, "loss": 0.4407, "step": 330 }, { "epoch": 4.610169491525424, "grad_norm": 0.5276753306388855, "learning_rate": 0.002671232876712329, "loss": 0.3626, "step": 340 }, { "epoch": 4.745762711864407, "grad_norm": 0.3167065680027008, "learning_rate": 0.0026027397260273972, "loss": 0.4012, "step": 350 }, { "epoch": 4.88135593220339, "grad_norm": 0.9251042008399963, "learning_rate": 0.002534246575342466, "loss": 0.3858, "step": 360 }, { "epoch": 4.989830508474577, "eval_accuracy": 0.8530534351145038, "eval_f1": 0.825253903289374, "eval_loss": 0.30624744296073914, "eval_precision": 0.8072997416020672, "eval_recall": 0.8623345710749311, "eval_runtime": 2.5426, "eval_samples_per_second": 206.091, "eval_steps_per_second": 12.979, "step": 368 }, { "epoch": 5.016949152542373, "grad_norm": 0.33929237723350525, "learning_rate": 0.002465753424657534, "loss": 0.362, "step": 370 }, { "epoch": 5.1525423728813555, "grad_norm": 1.5055538415908813, "learning_rate": 0.002397260273972603, "loss": 0.4396, "step": 380 }, { "epoch": 5.288135593220339, "grad_norm": 0.318695992231369, "learning_rate": 0.002328767123287671, "loss": 0.3909, "step": 390 }, { "epoch": 5.423728813559322, "grad_norm": 1.0506914854049683, "learning_rate": 0.00226027397260274, "loss": 0.3563, "step": 400 }, { "epoch": 5.559322033898305, "grad_norm": 1.1333622932434082, "learning_rate": 0.002191780821917808, "loss": 0.3737, "step": 410 }, { "epoch": 5.694915254237288, "grad_norm": 0.3602863550186157, "learning_rate": 0.0021232876712328768, "loss": 0.4139, "step": 420 }, { "epoch": 5.830508474576272, "grad_norm": 0.9611266851425171, "learning_rate": 0.002054794520547945, "loss": 0.3851, "step": 430 }, { "epoch": 5.966101694915254, "grad_norm": 0.3874049186706543, "learning_rate": 0.0019863013698630137, "loss": 0.3704, "step": 440 }, { "epoch": 5.9932203389830505, "eval_accuracy": 0.8263358778625954, "eval_f1": 0.8056371899647421, "eval_loss": 0.37736260890960693, "eval_precision": 0.793922767883912, "eval_recall": 0.8733599923831286, "eval_runtime": 2.2589, "eval_samples_per_second": 231.974, "eval_steps_per_second": 14.609, "step": 442 }, { "epoch": 6.101694915254237, "grad_norm": 0.6873330473899841, "learning_rate": 0.0019178082191780822, "loss": 0.3314, "step": 450 }, { "epoch": 6.237288135593221, "grad_norm": 0.4017014801502228, "learning_rate": 0.0018493150684931506, "loss": 0.3821, "step": 460 }, { "epoch": 6.372881355932203, "grad_norm": 0.4146214723587036, "learning_rate": 0.001780821917808219, "loss": 0.336, "step": 470 }, { "epoch": 6.508474576271187, "grad_norm": 0.4829295873641968, "learning_rate": 0.0017123287671232876, "loss": 0.333, "step": 480 }, { "epoch": 6.6440677966101696, "grad_norm": 0.31482774019241333, "learning_rate": 0.001643835616438356, "loss": 0.4051, "step": 490 }, { "epoch": 6.779661016949152, "grad_norm": 1.6030710935592651, "learning_rate": 0.0015753424657534245, "loss": 0.4016, "step": 500 }, { "epoch": 6.915254237288136, "grad_norm": 0.41512808203697205, "learning_rate": 0.0015068493150684932, "loss": 0.3345, "step": 510 }, { "epoch": 6.996610169491525, "eval_accuracy": 0.9026717557251909, "eval_f1": 0.8748683083060584, "eval_loss": 0.2403312772512436, "eval_precision": 0.8690915124253373, "eval_recall": 0.8812434542511663, "eval_runtime": 2.2592, "eval_samples_per_second": 231.938, "eval_steps_per_second": 14.607, "step": 516 }, { "epoch": 7.0508474576271185, "grad_norm": 0.654947817325592, "learning_rate": 0.0014383561643835617, "loss": 0.3406, "step": 520 }, { "epoch": 7.186440677966102, "grad_norm": 0.7446631193161011, "learning_rate": 0.0013698630136986301, "loss": 0.3591, "step": 530 }, { "epoch": 7.322033898305085, "grad_norm": 1.3772833347320557, "learning_rate": 0.0013013698630136986, "loss": 0.3704, "step": 540 }, { "epoch": 7.4576271186440675, "grad_norm": 0.2521153688430786, "learning_rate": 0.001232876712328767, "loss": 0.3506, "step": 550 }, { "epoch": 7.593220338983051, "grad_norm": 0.48469170928001404, "learning_rate": 0.0011643835616438356, "loss": 0.336, "step": 560 }, { "epoch": 7.728813559322034, "grad_norm": 0.30622732639312744, "learning_rate": 0.001095890410958904, "loss": 0.3283, "step": 570 }, { "epoch": 7.864406779661017, "grad_norm": 0.34153249859809875, "learning_rate": 0.0010273972602739725, "loss": 0.3413, "step": 580 }, { "epoch": 8.0, "grad_norm": 0.6064002513885498, "learning_rate": 0.0009589041095890411, "loss": 0.3875, "step": 590 }, { "epoch": 8.0, "eval_accuracy": 0.8816793893129771, "eval_f1": 0.8590395529252504, "eval_loss": 0.3021100163459778, "eval_precision": 0.8389118290017004, "eval_recall": 0.8985432733504712, "eval_runtime": 2.6546, "eval_samples_per_second": 197.396, "eval_steps_per_second": 12.431, "step": 590 }, { "epoch": 8.135593220338983, "grad_norm": 0.49543967843055725, "learning_rate": 0.0008904109589041096, "loss": 0.3826, "step": 600 }, { "epoch": 8.271186440677965, "grad_norm": 0.29732996225357056, "learning_rate": 0.000821917808219178, "loss": 0.358, "step": 610 }, { "epoch": 8.40677966101695, "grad_norm": 0.3708488345146179, "learning_rate": 0.0007534246575342466, "loss": 0.3239, "step": 620 }, { "epoch": 8.542372881355933, "grad_norm": 0.37521126866340637, "learning_rate": 0.0006849315068493151, "loss": 0.3074, "step": 630 }, { "epoch": 8.677966101694915, "grad_norm": 0.8718960285186768, "learning_rate": 0.0006164383561643835, "loss": 0.3114, "step": 640 }, { "epoch": 8.813559322033898, "grad_norm": 0.8521252274513245, "learning_rate": 0.000547945205479452, "loss": 0.3076, "step": 650 }, { "epoch": 8.94915254237288, "grad_norm": 0.501818060874939, "learning_rate": 0.00047945205479452054, "loss": 0.3673, "step": 660 }, { "epoch": 8.989830508474576, "eval_accuracy": 0.8969465648854962, "eval_f1": 0.8748938879456707, "eval_loss": 0.28653448820114136, "eval_precision": 0.8557379781648471, "eval_recall": 0.9064076930400837, "eval_runtime": 2.2474, "eval_samples_per_second": 233.156, "eval_steps_per_second": 14.683, "step": 663 }, { "epoch": 9.084745762711865, "grad_norm": 0.47983425855636597, "learning_rate": 0.000410958904109589, "loss": 0.3276, "step": 670 }, { "epoch": 9.220338983050848, "grad_norm": 0.24815304577350616, "learning_rate": 0.00034246575342465754, "loss": 0.2874, "step": 680 }, { "epoch": 9.35593220338983, "grad_norm": 0.5116902589797974, "learning_rate": 0.000273972602739726, "loss": 0.3645, "step": 690 }, { "epoch": 9.491525423728813, "grad_norm": 0.34429872035980225, "learning_rate": 0.0002054794520547945, "loss": 0.3231, "step": 700 }, { "epoch": 9.627118644067796, "grad_norm": 0.31422099471092224, "learning_rate": 0.000136986301369863, "loss": 0.2665, "step": 710 }, { "epoch": 9.76271186440678, "grad_norm": 0.44704005122184753, "learning_rate": 6.84931506849315e-05, "loss": 0.3432, "step": 720 }, { "epoch": 9.898305084745763, "grad_norm": 0.49681413173675537, "learning_rate": 0.0, "loss": 0.3493, "step": 730 }, { "epoch": 9.898305084745763, "eval_accuracy": 0.8740458015267175, "eval_f1": 0.8515453296703297, "eval_loss": 0.3024097681045532, "eval_precision": 0.8314438332137447, "eval_recall": 0.895820241835666, "eval_runtime": 2.2783, "eval_samples_per_second": 229.992, "eval_steps_per_second": 14.484, "step": 730 }, { "epoch": 9.898305084745763, "step": 730, "total_flos": 3.6369520534486057e+18, "train_loss": 0.4224395229391856, "train_runtime": 449.3032, "train_samples_per_second": 104.784, "train_steps_per_second": 1.625 } ], "logging_steps": 10, "max_steps": 730, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.6369520534486057e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }