{ "best_metric": 0.7956131605184447, "best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-1090", "epoch": 9.954337899543379, "eval_steps": 500, "global_step": 1090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "grad_norm": 3.768280267715454, "learning_rate": 0.004954128440366973, "loss": 1.2551, "step": 10 }, { "epoch": 0.18, "grad_norm": 1.6224374771118164, "learning_rate": 0.004908256880733945, "loss": 1.1194, "step": 20 }, { "epoch": 0.27, "grad_norm": 0.8902679085731506, "learning_rate": 0.004862385321100918, "loss": 0.9226, "step": 30 }, { "epoch": 0.37, "grad_norm": 1.336371898651123, "learning_rate": 0.00481651376146789, "loss": 0.9343, "step": 40 }, { "epoch": 0.46, "grad_norm": 1.9396265745162964, "learning_rate": 0.0047706422018348625, "loss": 0.849, "step": 50 }, { "epoch": 0.55, "grad_norm": 1.6278876066207886, "learning_rate": 0.004724770642201835, "loss": 0.8533, "step": 60 }, { "epoch": 0.64, "grad_norm": 1.5454286336898804, "learning_rate": 0.004678899082568808, "loss": 0.8229, "step": 70 }, { "epoch": 0.73, "grad_norm": 1.064020037651062, "learning_rate": 0.00463302752293578, "loss": 0.8622, "step": 80 }, { "epoch": 0.82, "grad_norm": 1.5100964307785034, "learning_rate": 0.0045871559633027525, "loss": 0.8633, "step": 90 }, { "epoch": 0.91, "grad_norm": 1.3673182725906372, "learning_rate": 0.004541284403669725, "loss": 0.9012, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.7228315054835494, "eval_f1": 0.34621811450823214, "eval_loss": 0.7629963159561157, "eval_precision": 0.42629140706004787, "eval_recall": 0.32692877938779574, "eval_runtime": 6.2499, "eval_samples_per_second": 160.484, "eval_steps_per_second": 10.08, "step": 109 }, { "epoch": 1.0, "grad_norm": 0.9423499703407288, "learning_rate": 0.004495412844036698, "loss": 0.8922, "step": 110 }, { "epoch": 1.1, "grad_norm": 2.50795841217041, "learning_rate": 0.0044495412844036695, "loss": 0.8791, "step": 120 }, { "epoch": 1.19, "grad_norm": 1.4502021074295044, "learning_rate": 0.004403669724770643, "loss": 0.8973, "step": 130 }, { "epoch": 1.28, "grad_norm": 1.703600287437439, "learning_rate": 0.004357798165137615, "loss": 0.8246, "step": 140 }, { "epoch": 1.37, "grad_norm": 0.7422580122947693, "learning_rate": 0.004311926605504587, "loss": 0.7913, "step": 150 }, { "epoch": 1.46, "grad_norm": 1.146699070930481, "learning_rate": 0.0042660550458715595, "loss": 0.8028, "step": 160 }, { "epoch": 1.55, "grad_norm": 2.6000311374664307, "learning_rate": 0.004220183486238533, "loss": 0.9162, "step": 170 }, { "epoch": 1.64, "grad_norm": 0.8894189596176147, "learning_rate": 0.004174311926605505, "loss": 0.9022, "step": 180 }, { "epoch": 1.74, "grad_norm": 0.9741529226303101, "learning_rate": 0.004128440366972477, "loss": 0.8879, "step": 190 }, { "epoch": 1.83, "grad_norm": 2.898531198501587, "learning_rate": 0.00408256880733945, "loss": 0.7974, "step": 200 }, { "epoch": 1.92, "grad_norm": 1.5416871309280396, "learning_rate": 0.004036697247706422, "loss": 0.7636, "step": 210 }, { "epoch": 2.0, "eval_accuracy": 0.7288135593220338, "eval_f1": 0.3788829143385214, "eval_loss": 0.7212122082710266, "eval_precision": 0.591156357572359, "eval_recall": 0.3630725573348524, "eval_runtime": 5.4131, "eval_samples_per_second": 185.292, "eval_steps_per_second": 11.638, "step": 219 }, { "epoch": 2.01, "grad_norm": 0.9239124059677124, "learning_rate": 0.003990825688073394, "loss": 0.8466, "step": 220 }, { "epoch": 2.1, "grad_norm": 0.9725480675697327, "learning_rate": 0.003944954128440367, "loss": 0.8851, "step": 230 }, { "epoch": 2.19, "grad_norm": 1.1037930250167847, "learning_rate": 0.0038990825688073397, "loss": 0.7814, "step": 240 }, { "epoch": 2.28, "grad_norm": 1.5679117441177368, "learning_rate": 0.0038532110091743124, "loss": 0.7781, "step": 250 }, { "epoch": 2.37, "grad_norm": 0.7649789452552795, "learning_rate": 0.0038073394495412843, "loss": 0.8029, "step": 260 }, { "epoch": 2.47, "grad_norm": 1.4153032302856445, "learning_rate": 0.003761467889908257, "loss": 0.7439, "step": 270 }, { "epoch": 2.56, "grad_norm": 1.177064299583435, "learning_rate": 0.0037155963302752293, "loss": 0.8781, "step": 280 }, { "epoch": 2.65, "grad_norm": 0.9871839284896851, "learning_rate": 0.003669724770642202, "loss": 0.7439, "step": 290 }, { "epoch": 2.74, "grad_norm": 0.7878830432891846, "learning_rate": 0.0036238532110091743, "loss": 0.8001, "step": 300 }, { "epoch": 2.83, "grad_norm": 1.0698702335357666, "learning_rate": 0.003577981651376147, "loss": 0.8568, "step": 310 }, { "epoch": 2.92, "grad_norm": 0.9107087850570679, "learning_rate": 0.0035321100917431194, "loss": 0.7189, "step": 320 }, { "epoch": 3.0, "eval_accuracy": 0.7258225324027916, "eval_f1": 0.393460298491453, "eval_loss": 0.7621892690658569, "eval_precision": 0.4464810481413866, "eval_recall": 0.422982191132074, "eval_runtime": 5.2671, "eval_samples_per_second": 190.428, "eval_steps_per_second": 11.961, "step": 328 }, { "epoch": 3.01, "grad_norm": 1.1653732061386108, "learning_rate": 0.003486238532110092, "loss": 0.7245, "step": 330 }, { "epoch": 3.11, "grad_norm": 0.693824291229248, "learning_rate": 0.0034403669724770644, "loss": 0.7883, "step": 340 }, { "epoch": 3.2, "grad_norm": 0.7583943009376526, "learning_rate": 0.003394495412844037, "loss": 0.7618, "step": 350 }, { "epoch": 3.29, "grad_norm": 1.0829322338104248, "learning_rate": 0.003348623853211009, "loss": 0.7534, "step": 360 }, { "epoch": 3.38, "grad_norm": 1.2232468128204346, "learning_rate": 0.0033027522935779817, "loss": 0.6827, "step": 370 }, { "epoch": 3.47, "grad_norm": 1.0095463991165161, "learning_rate": 0.003256880733944954, "loss": 0.7615, "step": 380 }, { "epoch": 3.56, "grad_norm": 1.059535026550293, "learning_rate": 0.003211009174311927, "loss": 0.8094, "step": 390 }, { "epoch": 3.65, "grad_norm": 0.9983842372894287, "learning_rate": 0.003165137614678899, "loss": 0.7649, "step": 400 }, { "epoch": 3.74, "grad_norm": 0.6007803082466125, "learning_rate": 0.003119266055045872, "loss": 0.7825, "step": 410 }, { "epoch": 3.84, "grad_norm": 1.1800183057785034, "learning_rate": 0.003073394495412844, "loss": 0.7296, "step": 420 }, { "epoch": 3.93, "grad_norm": 1.049116611480713, "learning_rate": 0.003027522935779817, "loss": 0.6904, "step": 430 }, { "epoch": 4.0, "eval_accuracy": 0.7437686939182453, "eval_f1": 0.4115031629499951, "eval_loss": 0.7280949950218201, "eval_precision": 0.48879535404583313, "eval_recall": 0.4484100461852218, "eval_runtime": 5.2319, "eval_samples_per_second": 191.708, "eval_steps_per_second": 12.041, "step": 438 }, { "epoch": 4.02, "grad_norm": 0.9003276824951172, "learning_rate": 0.002981651376146789, "loss": 0.6584, "step": 440 }, { "epoch": 4.11, "grad_norm": 1.3586938381195068, "learning_rate": 0.002935779816513762, "loss": 0.7175, "step": 450 }, { "epoch": 4.2, "grad_norm": 0.5924133062362671, "learning_rate": 0.0028899082568807338, "loss": 0.67, "step": 460 }, { "epoch": 4.29, "grad_norm": 0.9187817573547363, "learning_rate": 0.0028440366972477065, "loss": 0.697, "step": 470 }, { "epoch": 4.38, "grad_norm": 0.7464513778686523, "learning_rate": 0.002798165137614679, "loss": 0.6691, "step": 480 }, { "epoch": 4.47, "grad_norm": 0.6532416343688965, "learning_rate": 0.0027522935779816515, "loss": 0.7217, "step": 490 }, { "epoch": 4.57, "grad_norm": 0.7422506809234619, "learning_rate": 0.002706422018348624, "loss": 0.7619, "step": 500 }, { "epoch": 4.66, "grad_norm": 0.8745353817939758, "learning_rate": 0.0026605504587155966, "loss": 0.7264, "step": 510 }, { "epoch": 4.75, "grad_norm": 0.7916358113288879, "learning_rate": 0.002614678899082569, "loss": 0.7592, "step": 520 }, { "epoch": 4.84, "grad_norm": 1.0833995342254639, "learning_rate": 0.0025688073394495416, "loss": 0.7263, "step": 530 }, { "epoch": 4.93, "grad_norm": 1.0176126956939697, "learning_rate": 0.0025229357798165135, "loss": 0.7658, "step": 540 }, { "epoch": 5.0, "eval_accuracy": 0.7397806580259222, "eval_f1": 0.3752685826577452, "eval_loss": 0.7215412259101868, "eval_precision": 0.4855492779862528, "eval_recall": 0.42520631993699903, "eval_runtime": 5.5429, "eval_samples_per_second": 180.953, "eval_steps_per_second": 11.366, "step": 547 }, { "epoch": 5.02, "grad_norm": 1.45496666431427, "learning_rate": 0.0024770642201834866, "loss": 0.7024, "step": 550 }, { "epoch": 5.11, "grad_norm": 0.8751833438873291, "learning_rate": 0.002431192660550459, "loss": 0.741, "step": 560 }, { "epoch": 5.21, "grad_norm": 0.7062868475914001, "learning_rate": 0.0023853211009174312, "loss": 0.6733, "step": 570 }, { "epoch": 5.3, "grad_norm": 0.9199981689453125, "learning_rate": 0.002339449541284404, "loss": 0.6988, "step": 580 }, { "epoch": 5.39, "grad_norm": 0.8544884324073792, "learning_rate": 0.0022935779816513763, "loss": 0.7073, "step": 590 }, { "epoch": 5.48, "grad_norm": 1.0523695945739746, "learning_rate": 0.002247706422018349, "loss": 0.6822, "step": 600 }, { "epoch": 5.57, "grad_norm": 0.866678774356842, "learning_rate": 0.0022018348623853213, "loss": 0.6784, "step": 610 }, { "epoch": 5.66, "grad_norm": 0.6989203095436096, "learning_rate": 0.0021559633027522936, "loss": 0.6796, "step": 620 }, { "epoch": 5.75, "grad_norm": 0.4832663834095001, "learning_rate": 0.0021100917431192663, "loss": 0.6472, "step": 630 }, { "epoch": 5.84, "grad_norm": 1.1230007410049438, "learning_rate": 0.0020642201834862386, "loss": 0.6693, "step": 640 }, { "epoch": 5.94, "grad_norm": 0.6341413259506226, "learning_rate": 0.002018348623853211, "loss": 0.6363, "step": 650 }, { "epoch": 6.0, "eval_accuracy": 0.7676969092721835, "eval_f1": 0.5121263559819323, "eval_loss": 0.6328557133674622, "eval_precision": 0.6350277126119822, "eval_recall": 0.49284577902844884, "eval_runtime": 5.4702, "eval_samples_per_second": 183.357, "eval_steps_per_second": 11.517, "step": 657 }, { "epoch": 6.03, "grad_norm": 0.7254393696784973, "learning_rate": 0.0019724770642201837, "loss": 0.6398, "step": 660 }, { "epoch": 6.12, "grad_norm": 0.6462991237640381, "learning_rate": 0.0019266055045871562, "loss": 0.6703, "step": 670 }, { "epoch": 6.21, "grad_norm": 0.7446946501731873, "learning_rate": 0.0018807339449541285, "loss": 0.6184, "step": 680 }, { "epoch": 6.3, "grad_norm": 0.5196910500526428, "learning_rate": 0.001834862385321101, "loss": 0.6308, "step": 690 }, { "epoch": 6.39, "grad_norm": 1.019028902053833, "learning_rate": 0.0017889908256880735, "loss": 0.664, "step": 700 }, { "epoch": 6.48, "grad_norm": 1.3352869749069214, "learning_rate": 0.001743119266055046, "loss": 0.638, "step": 710 }, { "epoch": 6.58, "grad_norm": 0.8105664253234863, "learning_rate": 0.0016972477064220186, "loss": 0.5801, "step": 720 }, { "epoch": 6.67, "grad_norm": 1.0205323696136475, "learning_rate": 0.0016513761467889909, "loss": 0.6923, "step": 730 }, { "epoch": 6.76, "grad_norm": 0.48183074593544006, "learning_rate": 0.0016055045871559634, "loss": 0.6583, "step": 740 }, { "epoch": 6.85, "grad_norm": 0.7392696738243103, "learning_rate": 0.001559633027522936, "loss": 0.6784, "step": 750 }, { "epoch": 6.94, "grad_norm": 0.7750418782234192, "learning_rate": 0.0015137614678899084, "loss": 0.6299, "step": 760 }, { "epoch": 7.0, "eval_accuracy": 0.7716849451645065, "eval_f1": 0.5713473581488847, "eval_loss": 0.6116508841514587, "eval_precision": 0.5962341473849692, "eval_recall": 0.5781145727281558, "eval_runtime": 5.4791, "eval_samples_per_second": 183.06, "eval_steps_per_second": 11.498, "step": 766 }, { "epoch": 7.03, "grad_norm": 1.6914126873016357, "learning_rate": 0.001467889908256881, "loss": 0.7058, "step": 770 }, { "epoch": 7.12, "grad_norm": 1.1266179084777832, "learning_rate": 0.0014220183486238532, "loss": 0.5969, "step": 780 }, { "epoch": 7.21, "grad_norm": 0.8198122978210449, "learning_rate": 0.0013761467889908258, "loss": 0.6296, "step": 790 }, { "epoch": 7.31, "grad_norm": 0.8668613433837891, "learning_rate": 0.0013302752293577983, "loss": 0.6779, "step": 800 }, { "epoch": 7.4, "grad_norm": 0.757351279258728, "learning_rate": 0.0012844036697247708, "loss": 0.6589, "step": 810 }, { "epoch": 7.49, "grad_norm": 1.231268286705017, "learning_rate": 0.0012385321100917433, "loss": 0.5909, "step": 820 }, { "epoch": 7.58, "grad_norm": 0.6386240720748901, "learning_rate": 0.0011926605504587156, "loss": 0.6166, "step": 830 }, { "epoch": 7.67, "grad_norm": 0.7646284103393555, "learning_rate": 0.0011467889908256881, "loss": 0.5815, "step": 840 }, { "epoch": 7.76, "grad_norm": 1.1417447328567505, "learning_rate": 0.0011009174311926607, "loss": 0.5825, "step": 850 }, { "epoch": 7.85, "grad_norm": 0.8213087916374207, "learning_rate": 0.0010550458715596332, "loss": 0.652, "step": 860 }, { "epoch": 7.95, "grad_norm": 0.8747499585151672, "learning_rate": 0.0010091743119266055, "loss": 0.6011, "step": 870 }, { "epoch": 8.0, "eval_accuracy": 0.7796610169491526, "eval_f1": 0.5902452724888688, "eval_loss": 0.5919255018234253, "eval_precision": 0.6162323714867404, "eval_recall": 0.575685794889542, "eval_runtime": 5.3294, "eval_samples_per_second": 188.202, "eval_steps_per_second": 11.821, "step": 876 }, { "epoch": 8.04, "grad_norm": 0.9114809632301331, "learning_rate": 0.0009633027522935781, "loss": 0.6365, "step": 880 }, { "epoch": 8.13, "grad_norm": 0.7782912254333496, "learning_rate": 0.0009174311926605505, "loss": 0.5558, "step": 890 }, { "epoch": 8.22, "grad_norm": 1.2907453775405884, "learning_rate": 0.000871559633027523, "loss": 0.5756, "step": 900 }, { "epoch": 8.31, "grad_norm": 1.1318057775497437, "learning_rate": 0.0008256880733944954, "loss": 0.6574, "step": 910 }, { "epoch": 8.4, "grad_norm": 0.5756754875183105, "learning_rate": 0.000779816513761468, "loss": 0.5494, "step": 920 }, { "epoch": 8.49, "grad_norm": 0.7023201584815979, "learning_rate": 0.0007339449541284405, "loss": 0.6101, "step": 930 }, { "epoch": 8.58, "grad_norm": 0.9913588166236877, "learning_rate": 0.0006880733944954129, "loss": 0.6034, "step": 940 }, { "epoch": 8.68, "grad_norm": 0.982480525970459, "learning_rate": 0.0006422018348623854, "loss": 0.5981, "step": 950 }, { "epoch": 8.77, "grad_norm": 1.1374191045761108, "learning_rate": 0.0005963302752293578, "loss": 0.6134, "step": 960 }, { "epoch": 8.86, "grad_norm": 0.7602503299713135, "learning_rate": 0.0005504587155963303, "loss": 0.5561, "step": 970 }, { "epoch": 8.95, "grad_norm": 0.8967719674110413, "learning_rate": 0.0005045871559633027, "loss": 0.6043, "step": 980 }, { "epoch": 9.0, "eval_accuracy": 0.7946161515453639, "eval_f1": 0.5983005873522603, "eval_loss": 0.5475942492485046, "eval_precision": 0.6295002331514908, "eval_recall": 0.5812943349243114, "eval_runtime": 5.2749, "eval_samples_per_second": 190.145, "eval_steps_per_second": 11.943, "step": 985 }, { "epoch": 9.04, "grad_norm": 0.9292692542076111, "learning_rate": 0.00045871559633027525, "loss": 0.5415, "step": 990 }, { "epoch": 9.13, "grad_norm": 0.8102772235870361, "learning_rate": 0.0004128440366972477, "loss": 0.5611, "step": 1000 }, { "epoch": 9.22, "grad_norm": 1.1832762956619263, "learning_rate": 0.00036697247706422024, "loss": 0.5876, "step": 1010 }, { "epoch": 9.32, "grad_norm": 1.1258927583694458, "learning_rate": 0.0003211009174311927, "loss": 0.4946, "step": 1020 }, { "epoch": 9.41, "grad_norm": 0.8979453444480896, "learning_rate": 0.00027522935779816516, "loss": 0.5668, "step": 1030 }, { "epoch": 9.5, "grad_norm": 0.9506226778030396, "learning_rate": 0.00022935779816513763, "loss": 0.5384, "step": 1040 }, { "epoch": 9.59, "grad_norm": 1.1283026933670044, "learning_rate": 0.00018348623853211012, "loss": 0.5772, "step": 1050 }, { "epoch": 9.68, "grad_norm": 0.9244889616966248, "learning_rate": 0.00013761467889908258, "loss": 0.5662, "step": 1060 }, { "epoch": 9.77, "grad_norm": 0.8923238515853882, "learning_rate": 9.174311926605506e-05, "loss": 0.5921, "step": 1070 }, { "epoch": 9.86, "grad_norm": 1.1653807163238525, "learning_rate": 4.587155963302753e-05, "loss": 0.5846, "step": 1080 }, { "epoch": 9.95, "grad_norm": 0.8477634191513062, "learning_rate": 0.0, "loss": 0.5671, "step": 1090 }, { "epoch": 9.95, "eval_accuracy": 0.7956131605184447, "eval_f1": 0.5997799904348683, "eval_loss": 0.5543830990791321, "eval_precision": 0.627341257239154, "eval_recall": 0.5810367281093276, "eval_runtime": 5.4717, "eval_samples_per_second": 183.308, "eval_steps_per_second": 11.514, "step": 1090 }, { "epoch": 9.95, "step": 1090, "total_flos": 5.442882169274339e+18, "train_loss": 0.7150778630457887, "train_runtime": 818.9501, "train_samples_per_second": 85.561, "train_steps_per_second": 1.331 } ], "logging_steps": 10, "max_steps": 1090, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.442882169274339e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }