{ "best_metric": 0.981892523364486, "best_model_checkpoint": "deit-base-patch16-224-finetuned-lora-medmnistv2/checkpoint-1870", "epoch": 10.0, "eval_steps": 500, "global_step": 1870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.587554693222046, "learning_rate": 0.004973262032085562, "loss": 1.6152, "step": 10 }, { "epoch": 0.11, "grad_norm": 2.7896249294281006, "learning_rate": 0.004946524064171123, "loss": 0.9293, "step": 20 }, { "epoch": 0.16, "grad_norm": 2.900453805923462, "learning_rate": 0.004919786096256685, "loss": 0.7466, "step": 30 }, { "epoch": 0.21, "grad_norm": 1.1440342664718628, "learning_rate": 0.004893048128342246, "loss": 0.7922, "step": 40 }, { "epoch": 0.27, "grad_norm": 2.2305636405944824, "learning_rate": 0.004866310160427808, "loss": 0.7212, "step": 50 }, { "epoch": 0.32, "grad_norm": 1.2825617790222168, "learning_rate": 0.004839572192513369, "loss": 0.6434, "step": 60 }, { "epoch": 0.37, "grad_norm": 1.828630805015564, "learning_rate": 0.004812834224598931, "loss": 0.7316, "step": 70 }, { "epoch": 0.43, "grad_norm": 2.789102792739868, "learning_rate": 0.004786096256684492, "loss": 0.6947, "step": 80 }, { "epoch": 0.48, "grad_norm": 1.9813400506973267, "learning_rate": 0.004759358288770054, "loss": 0.6042, "step": 90 }, { "epoch": 0.53, "grad_norm": 2.0925464630126953, "learning_rate": 0.004732620320855615, "loss": 0.5651, "step": 100 }, { "epoch": 0.59, "grad_norm": 1.5666005611419678, "learning_rate": 0.004705882352941177, "loss": 0.6427, "step": 110 }, { "epoch": 0.64, "grad_norm": 2.313793897628784, "learning_rate": 0.004679144385026738, "loss": 0.5964, "step": 120 }, { "epoch": 0.7, "grad_norm": 1.3041290044784546, "learning_rate": 0.0046524064171123, "loss": 0.5604, "step": 130 }, { "epoch": 0.75, "grad_norm": 1.781190276145935, "learning_rate": 0.0046256684491978615, "loss": 0.5539, "step": 140 }, { "epoch": 0.8, "grad_norm": 2.0298523902893066, "learning_rate": 0.004598930481283423, "loss": 0.5521, "step": 150 }, { "epoch": 0.86, "grad_norm": 1.687642216682434, "learning_rate": 0.004572192513368984, "loss": 0.6069, "step": 160 }, { "epoch": 0.91, "grad_norm": 1.5597479343414307, "learning_rate": 0.004545454545454545, "loss": 0.5216, "step": 170 }, { "epoch": 0.96, "grad_norm": 1.9967581033706665, "learning_rate": 0.004518716577540107, "loss": 0.4839, "step": 180 }, { "epoch": 1.0, "eval_accuracy": 0.8977803738317757, "eval_f1": 0.8762697412468743, "eval_loss": 0.2824207842350006, "eval_precision": 0.90567947974312, "eval_recall": 0.8661886777564033, "eval_runtime": 9.095, "eval_samples_per_second": 188.235, "eval_steps_per_second": 11.765, "step": 187 }, { "epoch": 1.02, "grad_norm": 1.7654634714126587, "learning_rate": 0.004491978609625669, "loss": 0.403, "step": 190 }, { "epoch": 1.07, "grad_norm": 1.4351764917373657, "learning_rate": 0.00446524064171123, "loss": 0.5531, "step": 200 }, { "epoch": 1.12, "grad_norm": 1.3419188261032104, "learning_rate": 0.004438502673796791, "loss": 0.4626, "step": 210 }, { "epoch": 1.18, "grad_norm": 1.2612953186035156, "learning_rate": 0.004411764705882353, "loss": 0.4752, "step": 220 }, { "epoch": 1.23, "grad_norm": 0.9036948680877686, "learning_rate": 0.004385026737967914, "loss": 0.5154, "step": 230 }, { "epoch": 1.28, "grad_norm": 1.5349265336990356, "learning_rate": 0.004358288770053476, "loss": 0.4288, "step": 240 }, { "epoch": 1.34, "grad_norm": 2.2249972820281982, "learning_rate": 0.0043315508021390375, "loss": 0.4383, "step": 250 }, { "epoch": 1.39, "grad_norm": 2.19148325920105, "learning_rate": 0.004304812834224599, "loss": 0.5313, "step": 260 }, { "epoch": 1.44, "grad_norm": 1.2363046407699585, "learning_rate": 0.0042780748663101605, "loss": 0.4048, "step": 270 }, { "epoch": 1.5, "grad_norm": 2.1985669136047363, "learning_rate": 0.004251336898395722, "loss": 0.5779, "step": 280 }, { "epoch": 1.55, "grad_norm": 1.6020399332046509, "learning_rate": 0.004224598930481284, "loss": 0.5005, "step": 290 }, { "epoch": 1.6, "grad_norm": 1.5544497966766357, "learning_rate": 0.004197860962566845, "loss": 0.4702, "step": 300 }, { "epoch": 1.66, "grad_norm": 1.830074429512024, "learning_rate": 0.004171122994652407, "loss": 0.4712, "step": 310 }, { "epoch": 1.71, "grad_norm": 1.7887426614761353, "learning_rate": 0.004144385026737968, "loss": 0.4806, "step": 320 }, { "epoch": 1.76, "grad_norm": 0.9905050992965698, "learning_rate": 0.00411764705882353, "loss": 0.4353, "step": 330 }, { "epoch": 1.82, "grad_norm": 3.7927136421203613, "learning_rate": 0.004090909090909091, "loss": 0.4194, "step": 340 }, { "epoch": 1.87, "grad_norm": 0.877655565738678, "learning_rate": 0.004064171122994653, "loss": 0.4592, "step": 350 }, { "epoch": 1.93, "grad_norm": 2.024024248123169, "learning_rate": 0.004037433155080214, "loss": 0.4472, "step": 360 }, { "epoch": 1.98, "grad_norm": 1.5308970212936401, "learning_rate": 0.004010695187165776, "loss": 0.4762, "step": 370 }, { "epoch": 2.0, "eval_accuracy": 0.9281542056074766, "eval_f1": 0.9185809735494163, "eval_loss": 0.2145574986934662, "eval_precision": 0.9245598328831828, "eval_recall": 0.9160674573942845, "eval_runtime": 9.0756, "eval_samples_per_second": 188.637, "eval_steps_per_second": 11.79, "step": 374 }, { "epoch": 2.03, "grad_norm": 1.3481825590133667, "learning_rate": 0.0039839572192513365, "loss": 0.4369, "step": 380 }, { "epoch": 2.09, "grad_norm": 1.5832984447479248, "learning_rate": 0.003957219251336899, "loss": 0.4218, "step": 390 }, { "epoch": 2.14, "grad_norm": 1.2749643325805664, "learning_rate": 0.00393048128342246, "loss": 0.4345, "step": 400 }, { "epoch": 2.19, "grad_norm": 1.725550651550293, "learning_rate": 0.0039037433155080215, "loss": 0.4145, "step": 410 }, { "epoch": 2.25, "grad_norm": 1.1786770820617676, "learning_rate": 0.003877005347593583, "loss": 0.4393, "step": 420 }, { "epoch": 2.3, "grad_norm": 1.432307243347168, "learning_rate": 0.003850267379679144, "loss": 0.3709, "step": 430 }, { "epoch": 2.35, "grad_norm": 2.1418895721435547, "learning_rate": 0.0038235294117647057, "loss": 0.4356, "step": 440 }, { "epoch": 2.41, "grad_norm": 2.0751211643218994, "learning_rate": 0.0037967914438502676, "loss": 0.4003, "step": 450 }, { "epoch": 2.46, "grad_norm": 1.2488188743591309, "learning_rate": 0.003770053475935829, "loss": 0.5263, "step": 460 }, { "epoch": 2.51, "grad_norm": 1.2528233528137207, "learning_rate": 0.0037433155080213902, "loss": 0.4224, "step": 470 }, { "epoch": 2.57, "grad_norm": 1.3320534229278564, "learning_rate": 0.0037165775401069518, "loss": 0.3868, "step": 480 }, { "epoch": 2.62, "grad_norm": 1.1116540431976318, "learning_rate": 0.0036898395721925133, "loss": 0.428, "step": 490 }, { "epoch": 2.67, "grad_norm": 1.698747992515564, "learning_rate": 0.0036631016042780753, "loss": 0.4034, "step": 500 }, { "epoch": 2.73, "grad_norm": 0.9850418567657471, "learning_rate": 0.0036363636363636364, "loss": 0.3433, "step": 510 }, { "epoch": 2.78, "grad_norm": 1.1448285579681396, "learning_rate": 0.003609625668449198, "loss": 0.4379, "step": 520 }, { "epoch": 2.83, "grad_norm": 1.8283530473709106, "learning_rate": 0.0035828877005347594, "loss": 0.3691, "step": 530 }, { "epoch": 2.89, "grad_norm": 1.0462627410888672, "learning_rate": 0.0035561497326203205, "loss": 0.3956, "step": 540 }, { "epoch": 2.94, "grad_norm": 2.427431583404541, "learning_rate": 0.0035294117647058825, "loss": 0.3877, "step": 550 }, { "epoch": 2.99, "grad_norm": 0.9325568079948425, "learning_rate": 0.003502673796791444, "loss": 0.3445, "step": 560 }, { "epoch": 3.0, "eval_accuracy": 0.923481308411215, "eval_f1": 0.9167621051952115, "eval_loss": 0.21345409750938416, "eval_precision": 0.9243706129462914, "eval_recall": 0.9158821384305301, "eval_runtime": 9.0543, "eval_samples_per_second": 189.081, "eval_steps_per_second": 11.818, "step": 561 }, { "epoch": 3.05, "grad_norm": 1.159042239189148, "learning_rate": 0.0034759358288770055, "loss": 0.3665, "step": 570 }, { "epoch": 3.1, "grad_norm": 1.1578980684280396, "learning_rate": 0.0034491978609625666, "loss": 0.3696, "step": 580 }, { "epoch": 3.16, "grad_norm": 1.49112069606781, "learning_rate": 0.003422459893048128, "loss": 0.4138, "step": 590 }, { "epoch": 3.21, "grad_norm": 1.3102260828018188, "learning_rate": 0.00339572192513369, "loss": 0.4534, "step": 600 }, { "epoch": 3.26, "grad_norm": 1.571768045425415, "learning_rate": 0.0033689839572192517, "loss": 0.3879, "step": 610 }, { "epoch": 3.32, "grad_norm": 1.4697015285491943, "learning_rate": 0.0033422459893048127, "loss": 0.3276, "step": 620 }, { "epoch": 3.37, "grad_norm": 1.3779832124710083, "learning_rate": 0.0033155080213903743, "loss": 0.4291, "step": 630 }, { "epoch": 3.42, "grad_norm": 1.576749563217163, "learning_rate": 0.003288770053475936, "loss": 0.3085, "step": 640 }, { "epoch": 3.48, "grad_norm": 1.58197820186615, "learning_rate": 0.0032620320855614978, "loss": 0.4367, "step": 650 }, { "epoch": 3.53, "grad_norm": 0.894018292427063, "learning_rate": 0.003235294117647059, "loss": 0.3267, "step": 660 }, { "epoch": 3.58, "grad_norm": 1.0838289260864258, "learning_rate": 0.0032085561497326204, "loss": 0.4036, "step": 670 }, { "epoch": 3.64, "grad_norm": 0.9624530076980591, "learning_rate": 0.003181818181818182, "loss": 0.3892, "step": 680 }, { "epoch": 3.69, "grad_norm": 1.1710550785064697, "learning_rate": 0.003155080213903743, "loss": 0.3569, "step": 690 }, { "epoch": 3.74, "grad_norm": 1.3219703435897827, "learning_rate": 0.0031283422459893045, "loss": 0.3127, "step": 700 }, { "epoch": 3.8, "grad_norm": 1.4994938373565674, "learning_rate": 0.0031016042780748665, "loss": 0.3152, "step": 710 }, { "epoch": 3.85, "grad_norm": 2.016817331314087, "learning_rate": 0.003074866310160428, "loss": 0.3041, "step": 720 }, { "epoch": 3.9, "grad_norm": 1.1754976511001587, "learning_rate": 0.003048128342245989, "loss": 0.361, "step": 730 }, { "epoch": 3.96, "grad_norm": 1.263210654258728, "learning_rate": 0.0030213903743315507, "loss": 0.2963, "step": 740 }, { "epoch": 4.0, "eval_accuracy": 0.9415887850467289, "eval_f1": 0.934561225992784, "eval_loss": 0.16466087102890015, "eval_precision": 0.9322896664365989, "eval_recall": 0.9426571976128333, "eval_runtime": 9.0577, "eval_samples_per_second": 189.01, "eval_steps_per_second": 11.813, "step": 748 }, { "epoch": 4.01, "grad_norm": 1.6111160516738892, "learning_rate": 0.002994652406417112, "loss": 0.3683, "step": 750 }, { "epoch": 4.06, "grad_norm": 1.29823637008667, "learning_rate": 0.002967914438502674, "loss": 0.354, "step": 760 }, { "epoch": 4.12, "grad_norm": 0.9260041117668152, "learning_rate": 0.0029411764705882353, "loss": 0.3839, "step": 770 }, { "epoch": 4.17, "grad_norm": 1.0567574501037598, "learning_rate": 0.0029144385026737968, "loss": 0.3461, "step": 780 }, { "epoch": 4.22, "grad_norm": 0.9306647777557373, "learning_rate": 0.0028877005347593583, "loss": 0.344, "step": 790 }, { "epoch": 4.28, "grad_norm": 0.9743908643722534, "learning_rate": 0.0028609625668449194, "loss": 0.2981, "step": 800 }, { "epoch": 4.33, "grad_norm": 0.7713109254837036, "learning_rate": 0.0028342245989304814, "loss": 0.3017, "step": 810 }, { "epoch": 4.39, "grad_norm": 1.3103464841842651, "learning_rate": 0.002807486631016043, "loss": 0.3411, "step": 820 }, { "epoch": 4.44, "grad_norm": 0.7277514934539795, "learning_rate": 0.0027807486631016044, "loss": 0.2546, "step": 830 }, { "epoch": 4.49, "grad_norm": 0.9697772264480591, "learning_rate": 0.0027540106951871655, "loss": 0.339, "step": 840 }, { "epoch": 4.55, "grad_norm": 0.885215163230896, "learning_rate": 0.002727272727272727, "loss": 0.2738, "step": 850 }, { "epoch": 4.6, "grad_norm": 0.8923905491828918, "learning_rate": 0.002700534759358289, "loss": 0.2996, "step": 860 }, { "epoch": 4.65, "grad_norm": 1.0155311822891235, "learning_rate": 0.0026737967914438505, "loss": 0.3621, "step": 870 }, { "epoch": 4.71, "grad_norm": 0.6661949157714844, "learning_rate": 0.0026470588235294116, "loss": 0.2636, "step": 880 }, { "epoch": 4.76, "grad_norm": 1.1502758264541626, "learning_rate": 0.002620320855614973, "loss": 0.3281, "step": 890 }, { "epoch": 4.81, "grad_norm": 0.9213528037071228, "learning_rate": 0.0025935828877005347, "loss": 0.3139, "step": 900 }, { "epoch": 4.87, "grad_norm": 0.9923821687698364, "learning_rate": 0.0025668449197860967, "loss": 0.2954, "step": 910 }, { "epoch": 4.92, "grad_norm": 0.8214481472969055, "learning_rate": 0.0025401069518716578, "loss": 0.3311, "step": 920 }, { "epoch": 4.97, "grad_norm": 1.2385145425796509, "learning_rate": 0.0025133689839572193, "loss": 0.3328, "step": 930 }, { "epoch": 5.0, "eval_accuracy": 0.9386682242990654, "eval_f1": 0.9316281329929348, "eval_loss": 0.17617273330688477, "eval_precision": 0.9322615361668551, "eval_recall": 0.9371856011394022, "eval_runtime": 9.0288, "eval_samples_per_second": 189.616, "eval_steps_per_second": 11.851, "step": 935 }, { "epoch": 5.03, "grad_norm": 0.7902320623397827, "learning_rate": 0.002486631016042781, "loss": 0.257, "step": 940 }, { "epoch": 5.08, "grad_norm": 1.2443559169769287, "learning_rate": 0.0024598930481283423, "loss": 0.314, "step": 950 }, { "epoch": 5.13, "grad_norm": 0.753470778465271, "learning_rate": 0.002433155080213904, "loss": 0.2539, "step": 960 }, { "epoch": 5.19, "grad_norm": 0.9723307490348816, "learning_rate": 0.0024064171122994654, "loss": 0.2922, "step": 970 }, { "epoch": 5.24, "grad_norm": 1.0655038356781006, "learning_rate": 0.002379679144385027, "loss": 0.2791, "step": 980 }, { "epoch": 5.29, "grad_norm": 0.8649442791938782, "learning_rate": 0.0023529411764705885, "loss": 0.298, "step": 990 }, { "epoch": 5.35, "grad_norm": 0.9552505612373352, "learning_rate": 0.00232620320855615, "loss": 0.2861, "step": 1000 }, { "epoch": 5.4, "grad_norm": 0.8958008885383606, "learning_rate": 0.0022994652406417115, "loss": 0.3015, "step": 1010 }, { "epoch": 5.45, "grad_norm": 1.3321876525878906, "learning_rate": 0.0022727272727272726, "loss": 0.3289, "step": 1020 }, { "epoch": 5.51, "grad_norm": 0.8562110066413879, "learning_rate": 0.0022459893048128346, "loss": 0.3088, "step": 1030 }, { "epoch": 5.56, "grad_norm": 1.5215970277786255, "learning_rate": 0.0022192513368983957, "loss": 0.2998, "step": 1040 }, { "epoch": 5.61, "grad_norm": 0.8548377752304077, "learning_rate": 0.002192513368983957, "loss": 0.2401, "step": 1050 }, { "epoch": 5.67, "grad_norm": 0.6302920579910278, "learning_rate": 0.0021657754010695187, "loss": 0.2492, "step": 1060 }, { "epoch": 5.72, "grad_norm": 0.4174397587776184, "learning_rate": 0.0021390374331550803, "loss": 0.2714, "step": 1070 }, { "epoch": 5.78, "grad_norm": 1.0450794696807861, "learning_rate": 0.002112299465240642, "loss": 0.2855, "step": 1080 }, { "epoch": 5.83, "grad_norm": 1.0483543872833252, "learning_rate": 0.0020855614973262033, "loss": 0.2228, "step": 1090 }, { "epoch": 5.88, "grad_norm": 0.9359253644943237, "learning_rate": 0.002058823529411765, "loss": 0.2784, "step": 1100 }, { "epoch": 5.94, "grad_norm": 0.8934263586997986, "learning_rate": 0.0020320855614973264, "loss": 0.2171, "step": 1110 }, { "epoch": 5.99, "grad_norm": 1.1475499868392944, "learning_rate": 0.002005347593582888, "loss": 0.3138, "step": 1120 }, { "epoch": 6.0, "eval_accuracy": 0.9439252336448598, "eval_f1": 0.9426316231525895, "eval_loss": 0.148016095161438, "eval_precision": 0.942094087936167, "eval_recall": 0.9482439417517554, "eval_runtime": 9.0541, "eval_samples_per_second": 189.085, "eval_steps_per_second": 11.818, "step": 1122 }, { "epoch": 6.04, "grad_norm": 1.3191113471984863, "learning_rate": 0.0019786096256684494, "loss": 0.3146, "step": 1130 }, { "epoch": 6.1, "grad_norm": 0.8933680653572083, "learning_rate": 0.0019518716577540108, "loss": 0.2226, "step": 1140 }, { "epoch": 6.15, "grad_norm": 0.6708208918571472, "learning_rate": 0.001925133689839572, "loss": 0.2108, "step": 1150 }, { "epoch": 6.2, "grad_norm": 0.971500039100647, "learning_rate": 0.0018983957219251338, "loss": 0.2795, "step": 1160 }, { "epoch": 6.26, "grad_norm": 1.081484317779541, "learning_rate": 0.0018716577540106951, "loss": 0.2544, "step": 1170 }, { "epoch": 6.31, "grad_norm": 0.9211081266403198, "learning_rate": 0.0018449197860962567, "loss": 0.2595, "step": 1180 }, { "epoch": 6.36, "grad_norm": 0.7927699685096741, "learning_rate": 0.0018181818181818182, "loss": 0.2558, "step": 1190 }, { "epoch": 6.42, "grad_norm": 0.6074767708778381, "learning_rate": 0.0017914438502673797, "loss": 0.1803, "step": 1200 }, { "epoch": 6.47, "grad_norm": 0.878953754901886, "learning_rate": 0.0017647058823529412, "loss": 0.2395, "step": 1210 }, { "epoch": 6.52, "grad_norm": 0.6277757883071899, "learning_rate": 0.0017379679144385028, "loss": 0.188, "step": 1220 }, { "epoch": 6.58, "grad_norm": 0.7370597124099731, "learning_rate": 0.001711229946524064, "loss": 0.192, "step": 1230 }, { "epoch": 6.63, "grad_norm": 1.1328575611114502, "learning_rate": 0.0016844919786096258, "loss": 0.2774, "step": 1240 }, { "epoch": 6.68, "grad_norm": 0.7046247124671936, "learning_rate": 0.0016577540106951871, "loss": 0.2984, "step": 1250 }, { "epoch": 6.74, "grad_norm": 0.9354172945022583, "learning_rate": 0.0016310160427807489, "loss": 0.2253, "step": 1260 }, { "epoch": 6.79, "grad_norm": 0.9354103803634644, "learning_rate": 0.0016042780748663102, "loss": 0.2233, "step": 1270 }, { "epoch": 6.84, "grad_norm": 1.4302254915237427, "learning_rate": 0.0015775401069518715, "loss": 0.2252, "step": 1280 }, { "epoch": 6.9, "grad_norm": 0.879564642906189, "learning_rate": 0.0015508021390374333, "loss": 0.2222, "step": 1290 }, { "epoch": 6.95, "grad_norm": 0.8771746158599854, "learning_rate": 0.0015240641711229946, "loss": 0.2489, "step": 1300 }, { "epoch": 7.0, "eval_accuracy": 0.9620327102803738, "eval_f1": 0.9562861087932668, "eval_loss": 0.11337984353303909, "eval_precision": 0.9535730521530188, "eval_recall": 0.9609340740213044, "eval_runtime": 9.0379, "eval_samples_per_second": 189.424, "eval_steps_per_second": 11.839, "step": 1309 }, { "epoch": 7.01, "grad_norm": 1.0121431350708008, "learning_rate": 0.001497326203208556, "loss": 0.2303, "step": 1310 }, { "epoch": 7.06, "grad_norm": 1.2874377965927124, "learning_rate": 0.0014705882352941176, "loss": 0.2637, "step": 1320 }, { "epoch": 7.11, "grad_norm": 0.838621199131012, "learning_rate": 0.0014438502673796792, "loss": 0.1681, "step": 1330 }, { "epoch": 7.17, "grad_norm": 0.7134861946105957, "learning_rate": 0.0014171122994652407, "loss": 0.2441, "step": 1340 }, { "epoch": 7.22, "grad_norm": 1.5448678731918335, "learning_rate": 0.0013903743315508022, "loss": 0.2167, "step": 1350 }, { "epoch": 7.27, "grad_norm": 0.8171320557594299, "learning_rate": 0.0013636363636363635, "loss": 0.201, "step": 1360 }, { "epoch": 7.33, "grad_norm": 0.9818800091743469, "learning_rate": 0.0013368983957219253, "loss": 0.1853, "step": 1370 }, { "epoch": 7.38, "grad_norm": 0.7382510900497437, "learning_rate": 0.0013101604278074866, "loss": 0.2012, "step": 1380 }, { "epoch": 7.43, "grad_norm": 0.9468443393707275, "learning_rate": 0.0012834224598930483, "loss": 0.1579, "step": 1390 }, { "epoch": 7.49, "grad_norm": 0.6972719430923462, "learning_rate": 0.0012566844919786096, "loss": 0.2077, "step": 1400 }, { "epoch": 7.54, "grad_norm": 0.8953404426574707, "learning_rate": 0.0012299465240641712, "loss": 0.2617, "step": 1410 }, { "epoch": 7.59, "grad_norm": 0.9293211102485657, "learning_rate": 0.0012032085561497327, "loss": 0.2337, "step": 1420 }, { "epoch": 7.65, "grad_norm": 0.6336411237716675, "learning_rate": 0.0011764705882352942, "loss": 0.1855, "step": 1430 }, { "epoch": 7.7, "grad_norm": 0.6554710268974304, "learning_rate": 0.0011497326203208558, "loss": 0.2039, "step": 1440 }, { "epoch": 7.75, "grad_norm": 0.725406289100647, "learning_rate": 0.0011229946524064173, "loss": 0.1857, "step": 1450 }, { "epoch": 7.81, "grad_norm": 0.8667876720428467, "learning_rate": 0.0010962566844919786, "loss": 0.1764, "step": 1460 }, { "epoch": 7.86, "grad_norm": 1.1264532804489136, "learning_rate": 0.0010695187165775401, "loss": 0.2717, "step": 1470 }, { "epoch": 7.91, "grad_norm": 1.0109556913375854, "learning_rate": 0.0010427807486631017, "loss": 0.2283, "step": 1480 }, { "epoch": 7.97, "grad_norm": 0.6521192193031311, "learning_rate": 0.0010160427807486632, "loss": 0.193, "step": 1490 }, { "epoch": 8.0, "eval_accuracy": 0.9637850467289719, "eval_f1": 0.9615837557171594, "eval_loss": 0.10203568637371063, "eval_precision": 0.9665540248425233, "eval_recall": 0.9580773424505189, "eval_runtime": 9.0516, "eval_samples_per_second": 189.138, "eval_steps_per_second": 11.821, "step": 1496 }, { "epoch": 8.02, "grad_norm": 0.7159696221351624, "learning_rate": 0.0009893048128342247, "loss": 0.2214, "step": 1500 }, { "epoch": 8.07, "grad_norm": 0.8673921823501587, "learning_rate": 0.000962566844919786, "loss": 0.1605, "step": 1510 }, { "epoch": 8.13, "grad_norm": 0.8337764143943787, "learning_rate": 0.0009358288770053476, "loss": 0.1704, "step": 1520 }, { "epoch": 8.18, "grad_norm": 0.5427992343902588, "learning_rate": 0.0009090909090909091, "loss": 0.1598, "step": 1530 }, { "epoch": 8.24, "grad_norm": 0.7265269160270691, "learning_rate": 0.0008823529411764706, "loss": 0.1931, "step": 1540 }, { "epoch": 8.29, "grad_norm": 0.8919804692268372, "learning_rate": 0.000855614973262032, "loss": 0.2435, "step": 1550 }, { "epoch": 8.34, "grad_norm": 0.6623568534851074, "learning_rate": 0.0008288770053475936, "loss": 0.1521, "step": 1560 }, { "epoch": 8.4, "grad_norm": 0.7755681872367859, "learning_rate": 0.0008021390374331551, "loss": 0.2308, "step": 1570 }, { "epoch": 8.45, "grad_norm": 0.9359970092773438, "learning_rate": 0.0007754010695187166, "loss": 0.2103, "step": 1580 }, { "epoch": 8.5, "grad_norm": 0.6170040965080261, "learning_rate": 0.000748663101604278, "loss": 0.1663, "step": 1590 }, { "epoch": 8.56, "grad_norm": 0.6797509789466858, "learning_rate": 0.0007219251336898396, "loss": 0.1701, "step": 1600 }, { "epoch": 8.61, "grad_norm": 1.0805569887161255, "learning_rate": 0.0006951871657754011, "loss": 0.1833, "step": 1610 }, { "epoch": 8.66, "grad_norm": 0.6939735412597656, "learning_rate": 0.0006684491978609626, "loss": 0.1746, "step": 1620 }, { "epoch": 8.72, "grad_norm": 0.6343138813972473, "learning_rate": 0.0006417112299465242, "loss": 0.1686, "step": 1630 }, { "epoch": 8.77, "grad_norm": 0.9811675548553467, "learning_rate": 0.0006149732620320856, "loss": 0.1746, "step": 1640 }, { "epoch": 8.82, "grad_norm": 0.573733389377594, "learning_rate": 0.0005882352941176471, "loss": 0.1846, "step": 1650 }, { "epoch": 8.88, "grad_norm": 0.8457497358322144, "learning_rate": 0.0005614973262032086, "loss": 0.2011, "step": 1660 }, { "epoch": 8.93, "grad_norm": 0.8137268424034119, "learning_rate": 0.0005347593582887701, "loss": 0.1835, "step": 1670 }, { "epoch": 8.98, "grad_norm": 0.8221555352210999, "learning_rate": 0.0005080213903743316, "loss": 0.1973, "step": 1680 }, { "epoch": 9.0, "eval_accuracy": 0.9748831775700935, "eval_f1": 0.9743246307049778, "eval_loss": 0.07539471238851547, "eval_precision": 0.9732729615486463, "eval_recall": 0.9761282658354655, "eval_runtime": 9.0508, "eval_samples_per_second": 189.155, "eval_steps_per_second": 11.822, "step": 1683 }, { "epoch": 9.04, "grad_norm": 0.5439435839653015, "learning_rate": 0.000481283422459893, "loss": 0.1444, "step": 1690 }, { "epoch": 9.09, "grad_norm": 0.6913427710533142, "learning_rate": 0.00045454545454545455, "loss": 0.1809, "step": 1700 }, { "epoch": 9.14, "grad_norm": 0.7303802371025085, "learning_rate": 0.000427807486631016, "loss": 0.1396, "step": 1710 }, { "epoch": 9.2, "grad_norm": 0.523857057094574, "learning_rate": 0.00040106951871657755, "loss": 0.174, "step": 1720 }, { "epoch": 9.25, "grad_norm": 0.6848942041397095, "learning_rate": 0.000374331550802139, "loss": 0.1923, "step": 1730 }, { "epoch": 9.3, "grad_norm": 0.5525270104408264, "learning_rate": 0.00034759358288770055, "loss": 0.1438, "step": 1740 }, { "epoch": 9.36, "grad_norm": 0.802334725856781, "learning_rate": 0.0003208556149732621, "loss": 0.1733, "step": 1750 }, { "epoch": 9.41, "grad_norm": 0.6787207722663879, "learning_rate": 0.00029411764705882356, "loss": 0.1637, "step": 1760 }, { "epoch": 9.47, "grad_norm": 0.8829286098480225, "learning_rate": 0.00026737967914438503, "loss": 0.1651, "step": 1770 }, { "epoch": 9.52, "grad_norm": 0.6158820986747742, "learning_rate": 0.0002406417112299465, "loss": 0.1969, "step": 1780 }, { "epoch": 9.57, "grad_norm": 0.6652956604957581, "learning_rate": 0.000213903743315508, "loss": 0.1296, "step": 1790 }, { "epoch": 9.63, "grad_norm": 0.4137950539588928, "learning_rate": 0.0001871657754010695, "loss": 0.141, "step": 1800 }, { "epoch": 9.68, "grad_norm": 0.6831213235855103, "learning_rate": 0.00016042780748663104, "loss": 0.1739, "step": 1810 }, { "epoch": 9.73, "grad_norm": 0.6022053956985474, "learning_rate": 0.00013368983957219252, "loss": 0.1215, "step": 1820 }, { "epoch": 9.79, "grad_norm": 0.5832372307777405, "learning_rate": 0.000106951871657754, "loss": 0.1921, "step": 1830 }, { "epoch": 9.84, "grad_norm": 0.5527146458625793, "learning_rate": 8.021390374331552e-05, "loss": 0.1405, "step": 1840 }, { "epoch": 9.89, "grad_norm": 0.8569141030311584, "learning_rate": 5.3475935828877e-05, "loss": 0.1398, "step": 1850 }, { "epoch": 9.95, "grad_norm": 0.37496665120124817, "learning_rate": 2.67379679144385e-05, "loss": 0.1351, "step": 1860 }, { "epoch": 10.0, "grad_norm": 1.0200737714767456, "learning_rate": 0.0, "loss": 0.1711, "step": 1870 }, { "epoch": 10.0, "eval_accuracy": 0.981892523364486, "eval_f1": 0.9824644070672106, "eval_loss": 0.05325188860297203, "eval_precision": 0.982613511226295, "eval_recall": 0.9823689555008165, "eval_runtime": 9.0633, "eval_samples_per_second": 188.895, "eval_steps_per_second": 11.806, "step": 1870 }, { "epoch": 10.0, "step": 1870, "total_flos": 9.332136680499118e+18, "train_loss": 0.3326003640093268, "train_runtime": 1377.1458, "train_samples_per_second": 86.839, "train_steps_per_second": 1.358 } ], "logging_steps": 10, "max_steps": 1870, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 9.332136680499118e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }