|
{ |
|
"best_metric": 0.9141630901287554, |
|
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-63", |
|
"epoch": 9.882352941176471, |
|
"eval_steps": 500, |
|
"global_step": 630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.7611124515533447, |
|
"learning_rate": 0.004920634920634921, |
|
"loss": 0.6696, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.1516690254211426, |
|
"learning_rate": 0.004841269841269842, |
|
"loss": 0.5037, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.6632912158966064, |
|
"learning_rate": 0.0047619047619047615, |
|
"loss": 0.5343, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 15.660941123962402, |
|
"learning_rate": 0.004682539682539683, |
|
"loss": 0.7623, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.5720008611679077, |
|
"learning_rate": 0.004603174603174603, |
|
"loss": 0.5904, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3.288884401321411, |
|
"learning_rate": 0.004523809523809524, |
|
"loss": 0.4775, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.9141630901287554, |
|
"eval_f1": 0.8903401432261271, |
|
"eval_loss": 0.22644232213497162, |
|
"eval_precision": 0.884993651288677, |
|
"eval_recall": 0.8961752704933064, |
|
"eval_runtime": 12.2128, |
|
"eval_samples_per_second": 95.391, |
|
"eval_steps_per_second": 5.977, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.6683709621429443, |
|
"learning_rate": 0.0044603174603174605, |
|
"loss": 0.4832, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.2820926904678345, |
|
"learning_rate": 0.004380952380952381, |
|
"loss": 0.5064, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 5.157268047332764, |
|
"learning_rate": 0.004301587301587302, |
|
"loss": 0.4561, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.9834429025650024, |
|
"learning_rate": 0.004222222222222223, |
|
"loss": 0.3981, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 7.78388786315918, |
|
"learning_rate": 0.0041428571428571434, |
|
"loss": 0.5016, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 12.203431129455566, |
|
"learning_rate": 0.004063492063492063, |
|
"loss": 0.7117, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.7390557939914163, |
|
"eval_f1": 0.4249753208292202, |
|
"eval_loss": 0.4007842242717743, |
|
"eval_precision": 0.36952789699570815, |
|
"eval_recall": 0.5, |
|
"eval_runtime": 12.6185, |
|
"eval_samples_per_second": 92.325, |
|
"eval_steps_per_second": 5.785, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 9.85489273071289, |
|
"learning_rate": 0.003992063492063492, |
|
"loss": 0.4575, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 2.926109552383423, |
|
"learning_rate": 0.003912698412698413, |
|
"loss": 0.4562, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.5027350187301636, |
|
"learning_rate": 0.0038333333333333336, |
|
"loss": 0.417, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.8259086608886719, |
|
"learning_rate": 0.003753968253968254, |
|
"loss": 0.3953, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 2.8119688034057617, |
|
"learning_rate": 0.003674603174603175, |
|
"loss": 0.4175, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 2.306551218032837, |
|
"learning_rate": 0.0035952380952380954, |
|
"loss": 0.4226, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 6.00565242767334, |
|
"learning_rate": 0.003515873015873016, |
|
"loss": 0.4115, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8154506437768241, |
|
"eval_f1": 0.7957275475643482, |
|
"eval_loss": 0.43578293919563293, |
|
"eval_precision": 0.7870816708492059, |
|
"eval_recall": 0.8645050125313283, |
|
"eval_runtime": 12.1326, |
|
"eval_samples_per_second": 96.022, |
|
"eval_steps_per_second": 6.017, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 1.8762693405151367, |
|
"learning_rate": 0.0034365079365079364, |
|
"loss": 0.3876, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 1.1961586475372314, |
|
"learning_rate": 0.003357142857142857, |
|
"loss": 0.3232, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.5015142560005188, |
|
"learning_rate": 0.003277777777777778, |
|
"loss": 0.4061, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 2.750140428543091, |
|
"learning_rate": 0.003198412698412698, |
|
"loss": 0.372, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 2.029989242553711, |
|
"learning_rate": 0.003119047619047619, |
|
"loss": 0.3498, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 2.366288661956787, |
|
"learning_rate": 0.0030396825396825397, |
|
"loss": 0.3631, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8798283261802575, |
|
"eval_f1": 0.8517509925684618, |
|
"eval_loss": 0.30910125374794006, |
|
"eval_precision": 0.8380756825290672, |
|
"eval_recall": 0.8708184332783178, |
|
"eval_runtime": 12.051, |
|
"eval_samples_per_second": 96.673, |
|
"eval_steps_per_second": 6.058, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.5986310839653015, |
|
"learning_rate": 0.0029603174603174604, |
|
"loss": 0.3413, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 4.78090763092041, |
|
"learning_rate": 0.0028809523809523807, |
|
"loss": 0.384, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 3.392075300216675, |
|
"learning_rate": 0.0028015873015873015, |
|
"loss": 0.3579, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 1.982884407043457, |
|
"learning_rate": 0.0027222222222222222, |
|
"loss": 0.3397, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 4.884500980377197, |
|
"learning_rate": 0.002642857142857143, |
|
"loss": 0.3609, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.9579557776451111, |
|
"learning_rate": 0.0025634920634920633, |
|
"loss": 0.3794, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.8798283261802575, |
|
"eval_f1": 0.8494965377744312, |
|
"eval_loss": 0.2801721394062042, |
|
"eval_precision": 0.8392582443135881, |
|
"eval_recall": 0.8623062992847974, |
|
"eval_runtime": 12.197, |
|
"eval_samples_per_second": 95.516, |
|
"eval_steps_per_second": 5.985, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 1.1442433595657349, |
|
"learning_rate": 0.002484126984126984, |
|
"loss": 0.3566, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 4.931128978729248, |
|
"learning_rate": 0.0024047619047619048, |
|
"loss": 0.4874, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 2.474273443222046, |
|
"learning_rate": 0.0023253968253968255, |
|
"loss": 0.375, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"grad_norm": 2.144062042236328, |
|
"learning_rate": 0.0022460317460317463, |
|
"loss": 0.368, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"grad_norm": 0.6386366486549377, |
|
"learning_rate": 0.002166666666666667, |
|
"loss": 0.3755, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"grad_norm": 3.0398051738739014, |
|
"learning_rate": 0.0020873015873015873, |
|
"loss": 0.3248, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 2.5777676105499268, |
|
"learning_rate": 0.002007936507936508, |
|
"loss": 0.3713, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.8772532188841202, |
|
"eval_f1": 0.8449126373928156, |
|
"eval_loss": 0.2805176377296448, |
|
"eval_precision": 0.8370533441922019, |
|
"eval_recall": 0.8541800385109115, |
|
"eval_runtime": 11.9639, |
|
"eval_samples_per_second": 97.376, |
|
"eval_steps_per_second": 6.102, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.9707914590835571, |
|
"learning_rate": 0.0019285714285714288, |
|
"loss": 0.3227, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"grad_norm": 1.1869500875473022, |
|
"learning_rate": 0.0018492063492063493, |
|
"loss": 0.3271, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"grad_norm": 0.6983945369720459, |
|
"learning_rate": 0.00176984126984127, |
|
"loss": 0.3709, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"grad_norm": 1.19561767578125, |
|
"learning_rate": 0.0016904761904761906, |
|
"loss": 0.3629, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 1.7224721908569336, |
|
"learning_rate": 0.0016111111111111113, |
|
"loss": 0.3182, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 2.228806972503662, |
|
"learning_rate": 0.0015317460317460319, |
|
"loss": 0.3953, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8583690987124464, |
|
"eval_f1": 0.836744521418762, |
|
"eval_loss": 0.3396996855735779, |
|
"eval_precision": 0.8185304398119043, |
|
"eval_recall": 0.8871569166819487, |
|
"eval_runtime": 12.0715, |
|
"eval_samples_per_second": 96.508, |
|
"eval_steps_per_second": 6.047, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 3.1751720905303955, |
|
"learning_rate": 0.0014523809523809526, |
|
"loss": 0.3753, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"grad_norm": 2.4551353454589844, |
|
"learning_rate": 0.0013730158730158731, |
|
"loss": 0.3061, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 1.707070231437683, |
|
"learning_rate": 0.0012936507936507939, |
|
"loss": 0.3525, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"grad_norm": 0.43897438049316406, |
|
"learning_rate": 0.0012142857142857144, |
|
"loss": 0.3032, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"grad_norm": 2.149365186691284, |
|
"learning_rate": 0.001134920634920635, |
|
"loss": 0.3031, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 1.5541889667510986, |
|
"learning_rate": 0.0010555555555555557, |
|
"loss": 0.3199, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5194265842437744, |
|
"learning_rate": 0.0009761904761904762, |
|
"loss": 0.3218, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8669527896995708, |
|
"eval_f1": 0.8448168439144914, |
|
"eval_loss": 0.3072466254234314, |
|
"eval_precision": 0.8257109927719459, |
|
"eval_recall": 0.8897720673635308, |
|
"eval_runtime": 12.0816, |
|
"eval_samples_per_second": 96.428, |
|
"eval_steps_per_second": 6.042, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 0.4812917709350586, |
|
"learning_rate": 0.0008968253968253968, |
|
"loss": 0.3105, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 3.871387004852295, |
|
"learning_rate": 0.0008174603174603175, |
|
"loss": 0.3398, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"grad_norm": 0.3649653196334839, |
|
"learning_rate": 0.000746031746031746, |
|
"loss": 0.3169, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"grad_norm": 0.4373646676540375, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.3019, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"grad_norm": 0.21746236085891724, |
|
"learning_rate": 0.0005873015873015873, |
|
"loss": 0.3463, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"grad_norm": 1.6411595344543457, |
|
"learning_rate": 0.0005079365079365079, |
|
"loss": 0.3219, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.896137339055794, |
|
"eval_f1": 0.870789179160911, |
|
"eval_loss": 0.263265997171402, |
|
"eval_precision": 0.8582184517497349, |
|
"eval_recall": 0.887172198789657, |
|
"eval_runtime": 12.1817, |
|
"eval_samples_per_second": 95.635, |
|
"eval_steps_per_second": 5.993, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 1.573140025138855, |
|
"learning_rate": 0.0004285714285714286, |
|
"loss": 0.3078, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.6549800634384155, |
|
"learning_rate": 0.00034920634920634924, |
|
"loss": 0.2872, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.4417751133441925, |
|
"learning_rate": 0.0002698412698412699, |
|
"loss": 0.3256, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"grad_norm": 3.714104413986206, |
|
"learning_rate": 0.0001904761904761905, |
|
"loss": 0.361, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"grad_norm": 0.5447297096252441, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 0.3773, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"grad_norm": 1.347410798072815, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.3049, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_accuracy": 0.8927038626609443, |
|
"eval_f1": 0.8684775000564475, |
|
"eval_loss": 0.27391761541366577, |
|
"eval_precision": 0.8528169402296, |
|
"eval_recall": 0.8912334189131366, |
|
"eval_runtime": 12.284, |
|
"eval_samples_per_second": 94.839, |
|
"eval_steps_per_second": 5.943, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"step": 630, |
|
"total_flos": 3.142570654487126e+18, |
|
"train_loss": 0.3948629246817695, |
|
"train_runtime": 760.2155, |
|
"train_samples_per_second": 53.63, |
|
"train_steps_per_second": 0.829 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 630, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.142570654487126e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|