{ "epoch": 1.8559999999999999, "global_step": 5800, "max_steps": 7500, "logging_steps": 5, "eval_steps": 200, "save_steps": 0, "train_batch_size": 32, "num_train_epochs": 3, "num_input_tokens_seen": 0, "total_flos": 3.06356169950208e+16, "log_history": [ { "loss": 3.0335, "grad_norm": 82.1195297241211, "learning_rate": 4.000000000000001e-07, "epoch": 0.0016, "step": 5 }, { "loss": 2.9672, "grad_norm": 88.84027862548828, "learning_rate": 8.000000000000002e-07, "epoch": 0.0032, "step": 10 }, { "loss": 2.9167, "grad_norm": 110.60800170898438, "learning_rate": 1.2000000000000002e-06, "epoch": 0.0048, "step": 15 }, { "loss": 2.7866, "grad_norm": 60.17360305786133, "learning_rate": 1.6000000000000004e-06, "epoch": 0.0064, "step": 20 }, { "loss": 2.5336, "grad_norm": 156.402587890625, "learning_rate": 2.0000000000000003e-06, "epoch": 0.008, "step": 25 }, { "loss": 2.3262, "grad_norm": 59.95594024658203, "learning_rate": 2.4000000000000003e-06, "epoch": 0.0096, "step": 30 }, { "loss": 2.0617, "grad_norm": 44.70839309692383, "learning_rate": 2.8000000000000007e-06, "epoch": 0.0112, "step": 35 }, { "loss": 1.8197, "grad_norm": 77.0301284790039, "learning_rate": 3.2000000000000007e-06, "epoch": 0.0128, "step": 40 }, { "loss": 1.5242, "grad_norm": 56.06800842285156, "learning_rate": 3.6000000000000003e-06, "epoch": 0.0144, "step": 45 }, { "loss": 1.2706, "grad_norm": 72.418701171875, "learning_rate": 4.000000000000001e-06, "epoch": 0.016, "step": 50 }, { "loss": 1.0677, "grad_norm": 48.07682800292969, "learning_rate": 4.4e-06, "epoch": 0.0176, "step": 55 }, { "loss": 0.938, "grad_norm": 25.559019088745117, "learning_rate": 4.800000000000001e-06, "epoch": 0.0192, "step": 60 }, { "loss": 0.8818, "grad_norm": 19.105140686035156, "learning_rate": 5.200000000000001e-06, "epoch": 0.0208, "step": 65 }, { "loss": 0.8552, "grad_norm": 32.9610481262207, "learning_rate": 5.6000000000000014e-06, "epoch": 0.0224, "step": 70 }, { "loss": 0.7681, "grad_norm": 31.54001808166504, "learning_rate": 6.000000000000001e-06, "epoch": 0.024, "step": 75 }, { "loss": 0.8045, "grad_norm": 35.64654541015625, "learning_rate": 6.400000000000001e-06, "epoch": 0.0256, "step": 80 }, { "loss": 0.7089, "grad_norm": 23.311580657958984, "learning_rate": 6.800000000000001e-06, "epoch": 0.0272, "step": 85 }, { "loss": 0.6545, "grad_norm": 20.769006729125977, "learning_rate": 7.2000000000000005e-06, "epoch": 0.0288, "step": 90 }, { "loss": 0.6601, "grad_norm": 17.908187866210938, "learning_rate": 7.600000000000002e-06, "epoch": 0.0304, "step": 95 }, { "loss": 0.6967, "grad_norm": 17.458377838134766, "learning_rate": 8.000000000000001e-06, "epoch": 0.032, "step": 100 }, { "loss": 0.6193, "grad_norm": 17.900911331176758, "learning_rate": 8.400000000000003e-06, "epoch": 0.0336, "step": 105 }, { "loss": 0.6067, "grad_norm": 13.517078399658203, "learning_rate": 8.8e-06, "epoch": 0.0352, "step": 110 }, { "loss": 0.5767, "grad_norm": 28.88074493408203, "learning_rate": 9.2e-06, "epoch": 0.0368, "step": 115 }, { "loss": 0.6834, "grad_norm": 24.771915435791016, "learning_rate": 9.600000000000001e-06, "epoch": 0.0384, "step": 120 }, { "loss": 0.5724, "grad_norm": 10.78698444366455, "learning_rate": 1e-05, "epoch": 0.04, "step": 125 }, { "loss": 0.5498, "grad_norm": 20.9072208404541, "learning_rate": 1.0400000000000002e-05, "epoch": 0.0416, "step": 130 }, { "loss": 0.5447, "grad_norm": 7.22981071472168, "learning_rate": 1.0800000000000002e-05, "epoch": 0.0432, "step": 135 }, { "loss": 0.6207, "grad_norm": 8.354753494262695, "learning_rate": 1.1200000000000003e-05, "epoch": 0.0448, "step": 140 }, { "loss": 0.5466, "grad_norm": 7.524026870727539, "learning_rate": 1.16e-05, "epoch": 0.0464, "step": 145 }, { "loss": 0.6084, "grad_norm": 7.417405128479004, "learning_rate": 1.2000000000000002e-05, "epoch": 0.048, "step": 150 }, { "loss": 0.5675, "grad_norm": 6.576486110687256, "learning_rate": 1.2400000000000002e-05, "epoch": 0.0496, "step": 155 }, { "loss": 0.5289, "grad_norm": 8.842827796936035, "learning_rate": 1.2800000000000003e-05, "epoch": 0.0512, "step": 160 }, { "loss": 0.5603, "grad_norm": 12.270004272460938, "learning_rate": 1.3200000000000002e-05, "epoch": 0.0528, "step": 165 }, { "loss": 0.5231, "grad_norm": 5.759673118591309, "learning_rate": 1.3600000000000002e-05, "epoch": 0.0544, "step": 170 }, { "loss": 0.5021, "grad_norm": 4.171690464019775, "learning_rate": 1.4000000000000001e-05, "epoch": 0.056, "step": 175 }, { "loss": 0.5394, "grad_norm": 11.56941032409668, "learning_rate": 1.4400000000000001e-05, "epoch": 0.0576, "step": 180 }, { "loss": 0.4872, "grad_norm": 7.358865261077881, "learning_rate": 1.4800000000000002e-05, "epoch": 0.0592, "step": 185 }, { "loss": 0.5303, "grad_norm": 7.893204212188721, "learning_rate": 1.5200000000000004e-05, "epoch": 0.0608, "step": 190 }, { "loss": 0.4441, "grad_norm": 4.525728225708008, "learning_rate": 1.5600000000000003e-05, "epoch": 0.0624, "step": 195 }, { "loss": 0.5216, "grad_norm": 4.942392349243164, "learning_rate": 1.6000000000000003e-05, "epoch": 0.064, "step": 200 }, { "eval_loss": 0.5007714033126831, "eval_f1": 0.0022661462923328716, "eval_recall": 0.0012200081333875558, "eval_accuracy": 0.8862360978386841, "eval_precision": 0.015901060070671377, "eval_classification_report": { "LOC": { "precision": 0.0, "recall": 0.0, "f1-score": 0.0, "support": 1087 }, "ORG": { "precision": 0.003105590062111801, "recall": 0.0005903187721369539, "f1-score": 0.000992063492063492, "support": 1694 }, "PER": { "precision": 0.03347280334728033, "recall": 0.002331681725444477, "f1-score": 0.004359673024523161, "support": 3431 }, "PRD": { "precision": 0.0, "recall": 0.0, "f1-score": 0.0, "support": 1165 }, "micro avg": { "precision": 0.015901060070671377, "recall": 0.0012200081333875558, "f1-score": 0.0022661462923328716, "support": 7377 }, "macro avg": { "precision": 0.009144598352348033, "recall": 0.0007305001243953578, "f1-score": 0.0013379341291466632, "support": 7377 }, "weighted avg": { "precision": 0.01628115193842161, "recall": 0.0012200081333875558, "f1-score": 0.002255468849490921, "support": 7377 } }, "eval_runtime": 3.8576, "eval_samples_per_second": 1061.791, "eval_steps_per_second": 8.295, "epoch": 0.064, "step": 200 }, { "loss": 0.4796, "grad_norm": 3.559159278869629, "learning_rate": 1.6400000000000002e-05, "epoch": 0.0656, "step": 205 }, { "loss": 0.4561, "grad_norm": 3.7857613563537598, "learning_rate": 1.6800000000000005e-05, "epoch": 0.0672, "step": 210 }, { "loss": 0.4535, "grad_norm": 3.0681214332580566, "learning_rate": 1.7200000000000005e-05, "epoch": 0.0688, "step": 215 }, { "loss": 0.4537, "grad_norm": 4.983517646789551, "learning_rate": 1.76e-05, "epoch": 0.0704, "step": 220 }, { "loss": 0.4503, "grad_norm": 3.5269930362701416, "learning_rate": 1.8e-05, "epoch": 0.072, "step": 225 }, { "loss": 0.4357, "grad_norm": 3.2933030128479004, "learning_rate": 1.84e-05, "epoch": 0.0736, "step": 230 }, { "loss": 0.4252, "grad_norm": 3.176693916320801, "learning_rate": 1.8800000000000003e-05, "epoch": 0.0752, "step": 235 }, { "loss": 0.4522, "grad_norm": 3.697317123413086, "learning_rate": 1.9200000000000003e-05, "epoch": 0.0768, "step": 240 }, { "loss": 0.4162, "grad_norm": 4.488443851470947, "learning_rate": 1.9600000000000002e-05, "epoch": 0.0784, "step": 245 }, { "loss": 0.4556, "grad_norm": 3.9123892784118652, "learning_rate": 2e-05, "epoch": 0.08, "step": 250 }, { "loss": 0.4117, "grad_norm": 4.08515739440918, "learning_rate": 2.0400000000000005e-05, "epoch": 0.0816, "step": 255 }, { "loss": 0.4113, "grad_norm": 3.2308430671691895, "learning_rate": 2.0800000000000004e-05, "epoch": 0.0832, "step": 260 }, { "loss": 0.3694, "grad_norm": 2.1715378761291504, "learning_rate": 2.1200000000000004e-05, "epoch": 0.0848, "step": 265 }, { "loss": 0.403, "grad_norm": 3.7848703861236572, "learning_rate": 2.1600000000000003e-05, "epoch": 0.0864, "step": 270 }, { "loss": 0.3607, "grad_norm": 2.9827466011047363, "learning_rate": 2.2000000000000003e-05, "epoch": 0.088, "step": 275 }, { "loss": 0.3437, "grad_norm": 2.2484512329101562, "learning_rate": 2.2400000000000006e-05, "epoch": 0.0896, "step": 280 }, { "loss": 0.3513, "grad_norm": 2.7186050415039062, "learning_rate": 2.2800000000000002e-05, "epoch": 0.0912, "step": 285 }, { "loss": 0.3589, "grad_norm": 2.6509628295898438, "learning_rate": 2.32e-05, "epoch": 0.0928, "step": 290 }, { "loss": 0.3473, "grad_norm": 2.3571269512176514, "learning_rate": 2.36e-05, "epoch": 0.0944, "step": 295 }, { "loss": 0.4279, "grad_norm": 2.94352126121521, "learning_rate": 2.4000000000000004e-05, "epoch": 0.096, "step": 300 }, { "loss": 0.3918, "grad_norm": 2.3764500617980957, "learning_rate": 2.4400000000000004e-05, "epoch": 0.0976, "step": 305 }, { "loss": 0.362, "grad_norm": 2.1101410388946533, "learning_rate": 2.4800000000000003e-05, "epoch": 0.0992, "step": 310 }, { "loss": 0.3324, "grad_norm": 2.452136754989624, "learning_rate": 2.5200000000000003e-05, "epoch": 0.1008, "step": 315 }, { "loss": 0.3169, "grad_norm": 1.9149426221847534, "learning_rate": 2.5600000000000006e-05, "epoch": 0.1024, "step": 320 }, { "loss": 0.3404, "grad_norm": 2.372434377670288, "learning_rate": 2.6000000000000005e-05, "epoch": 0.104, "step": 325 }, { "loss": 0.3117, "grad_norm": 1.7959245443344116, "learning_rate": 2.6400000000000005e-05, "epoch": 0.1056, "step": 330 }, { "loss": 0.3027, "grad_norm": 1.7316709756851196, "learning_rate": 2.6800000000000004e-05, "epoch": 0.1072, "step": 335 }, { "loss": 0.301, "grad_norm": 2.2444357872009277, "learning_rate": 2.7200000000000004e-05, "epoch": 0.1088, "step": 340 }, { "loss": 0.2899, "grad_norm": 1.5384297370910645, "learning_rate": 2.7600000000000003e-05, "epoch": 0.1104, "step": 345 }, { "loss": 0.3112, "grad_norm": 1.859355092048645, "learning_rate": 2.8000000000000003e-05, "epoch": 0.112, "step": 350 }, { "loss": 0.2846, "grad_norm": 2.486531972885132, "learning_rate": 2.8400000000000003e-05, "epoch": 0.1136, "step": 355 }, { "loss": 0.3202, "grad_norm": 2.8879830837249756, "learning_rate": 2.8800000000000002e-05, "epoch": 0.1152, "step": 360 }, { "loss": 0.2481, "grad_norm": 2.2178752422332764, "learning_rate": 2.9200000000000005e-05, "epoch": 0.1168, "step": 365 }, { "loss": 0.2938, "grad_norm": 2.410358428955078, "learning_rate": 2.9600000000000005e-05, "epoch": 0.1184, "step": 370 }, { "loss": 0.2902, "grad_norm": 1.543535828590393, "learning_rate": 3.0000000000000004e-05, "epoch": 0.12, "step": 375 }, { "loss": 0.2613, "grad_norm": 1.521278738975525, "learning_rate": 3.0400000000000007e-05, "epoch": 0.1216, "step": 380 }, { "loss": 0.2707, "grad_norm": 2.278775691986084, "learning_rate": 3.08e-05, "epoch": 0.1232, "step": 385 }, { "loss": 0.2622, "grad_norm": 1.917235016822815, "learning_rate": 3.1200000000000006e-05, "epoch": 0.1248, "step": 390 }, { "loss": 0.2673, "grad_norm": 1.2902324199676514, "learning_rate": 3.16e-05, "epoch": 0.1264, "step": 395 }, { "loss": 0.2756, "grad_norm": 2.1004207134246826, "learning_rate": 3.2000000000000005e-05, "epoch": 0.128, "step": 400 }, { "eval_loss": 0.27002671360969543, "eval_f1": 0.29212205445700934, "eval_recall": 0.3134065338213366, "eval_accuracy": 0.9114887257792327, "eval_precision": 0.2735447231424515, "eval_classification_report": { "LOC": { "precision": 0.11131276467029642, "recall": 0.16927322907083717, "f1-score": 0.1343065693430657, "support": 1087 }, "ORG": { "precision": 0.06920077972709551, "recall": 0.08382526564344746, "f1-score": 0.07581420181526961, "support": 1694 }, "PER": { "precision": 0.42450203469693726, "recall": 0.5776741474788691, "f1-score": 0.4893827160493827, "support": 3431 }, "PRD": { "precision": 0.05128205128205128, "recall": 0.0034334763948497852, "f1-score": 0.006436041834271922, "support": 1165 }, "micro avg": { "precision": 0.2735447231424515, "recall": 0.3134065338213366, "f1-score": 0.29212205445700934, "support": 7377 }, "macro avg": { "precision": 0.16407440759409514, "recall": 0.2085515296470009, "f1-score": 0.1764848822604975, "support": 7377 }, "weighted avg": { "precision": 0.237824748114829, "recall": 0.3134065338213366, "f1-score": 0.265824940525056, "support": 7377 } }, "eval_runtime": 3.4221, "eval_samples_per_second": 1196.94, "eval_steps_per_second": 9.351, "epoch": 0.128, "step": 400 }, { "loss": 0.2717, "grad_norm": 1.4402852058410645, "learning_rate": 3.240000000000001e-05, "epoch": 0.1296, "step": 405 }, { "loss": 0.2445, "grad_norm": 2.1268718242645264, "learning_rate": 3.2800000000000004e-05, "epoch": 0.1312, "step": 410 }, { "loss": 0.2465, "grad_norm": 1.4069234132766724, "learning_rate": 3.320000000000001e-05, "epoch": 0.1328, "step": 415 }, { "loss": 0.2439, "grad_norm": 1.5428396463394165, "learning_rate": 3.360000000000001e-05, "epoch": 0.1344, "step": 420 }, { "loss": 0.2552, "grad_norm": 2.9032771587371826, "learning_rate": 3.4000000000000007e-05, "epoch": 0.136, "step": 425 }, { "loss": 0.2185, "grad_norm": 1.7477030754089355, "learning_rate": 3.440000000000001e-05, "epoch": 0.1376, "step": 430 }, { "loss": 0.2394, "grad_norm": 1.3946317434310913, "learning_rate": 3.4800000000000006e-05, "epoch": 0.1392, "step": 435 }, { "loss": 0.2218, "grad_norm": 1.473497986793518, "learning_rate": 3.52e-05, "epoch": 0.1408, "step": 440 }, { "loss": 0.2246, "grad_norm": 1.4988517761230469, "learning_rate": 3.5600000000000005e-05, "epoch": 0.1424, "step": 445 }, { "loss": 0.2038, "grad_norm": 1.7913917303085327, "learning_rate": 3.6e-05, "epoch": 0.144, "step": 450 }, { "loss": 0.2115, "grad_norm": 1.8941395282745361, "learning_rate": 3.6400000000000004e-05, "epoch": 0.1456, "step": 455 }, { "loss": 0.252, "grad_norm": 1.61392343044281, "learning_rate": 3.68e-05, "epoch": 0.1472, "step": 460 }, { "loss": 0.219, "grad_norm": 1.232804298400879, "learning_rate": 3.72e-05, "epoch": 0.1488, "step": 465 }, { "loss": 0.1944, "grad_norm": 2.0817365646362305, "learning_rate": 3.7600000000000006e-05, "epoch": 0.1504, "step": 470 }, { "loss": 0.2257, "grad_norm": 2.192542791366577, "learning_rate": 3.8e-05, "epoch": 0.152, "step": 475 }, { "loss": 0.2242, "grad_norm": 1.763069987297058, "learning_rate": 3.8400000000000005e-05, "epoch": 0.1536, "step": 480 }, { "loss": 0.2093, "grad_norm": 2.2302606105804443, "learning_rate": 3.88e-05, "epoch": 0.1552, "step": 485 }, { "loss": 0.2213, "grad_norm": 1.955357551574707, "learning_rate": 3.9200000000000004e-05, "epoch": 0.1568, "step": 490 }, { "loss": 0.2077, "grad_norm": 3.6477394104003906, "learning_rate": 3.960000000000001e-05, "epoch": 0.1584, "step": 495 }, { "loss": 0.173, "grad_norm": 1.1776589155197144, "learning_rate": 4e-05, "epoch": 0.16, "step": 500 }, { "loss": 0.1862, "grad_norm": 1.275420069694519, "learning_rate": 4.0400000000000006e-05, "epoch": 0.1616, "step": 505 }, { "loss": 0.1849, "grad_norm": 1.7181892395019531, "learning_rate": 4.080000000000001e-05, "epoch": 0.1632, "step": 510 }, { "loss": 0.1714, "grad_norm": 1.5669482946395874, "learning_rate": 4.1200000000000005e-05, "epoch": 0.1648, "step": 515 }, { "loss": 0.2113, "grad_norm": 1.2558255195617676, "learning_rate": 4.160000000000001e-05, "epoch": 0.1664, "step": 520 }, { "loss": 0.1976, "grad_norm": 1.6378982067108154, "learning_rate": 4.2000000000000004e-05, "epoch": 0.168, "step": 525 }, { "loss": 0.1882, "grad_norm": 2.029606342315674, "learning_rate": 4.240000000000001e-05, "epoch": 0.1696, "step": 530 }, { "loss": 0.1987, "grad_norm": 1.5086201429367065, "learning_rate": 4.280000000000001e-05, "epoch": 0.1712, "step": 535 }, { "loss": 0.171, "grad_norm": 1.99561607837677, "learning_rate": 4.3200000000000007e-05, "epoch": 0.1728, "step": 540 }, { "loss": 0.1596, "grad_norm": 1.666466236114502, "learning_rate": 4.360000000000001e-05, "epoch": 0.1744, "step": 545 }, { "loss": 0.1622, "grad_norm": 1.0952867269515991, "learning_rate": 4.4000000000000006e-05, "epoch": 0.176, "step": 550 }, { "loss": 0.169, "grad_norm": 1.599013090133667, "learning_rate": 4.440000000000001e-05, "epoch": 0.1776, "step": 555 }, { "loss": 0.1681, "grad_norm": 1.2811219692230225, "learning_rate": 4.480000000000001e-05, "epoch": 0.1792, "step": 560 }, { "loss": 0.1445, "grad_norm": 1.592172384262085, "learning_rate": 4.52e-05, "epoch": 0.1808, "step": 565 }, { "loss": 0.167, "grad_norm": 1.1934109926223755, "learning_rate": 4.5600000000000004e-05, "epoch": 0.1824, "step": 570 }, { "loss": 0.1733, "grad_norm": 1.2368823289871216, "learning_rate": 4.600000000000001e-05, "epoch": 0.184, "step": 575 }, { "loss": 0.1596, "grad_norm": 2.820197582244873, "learning_rate": 4.64e-05, "epoch": 0.1856, "step": 580 }, { "loss": 0.1854, "grad_norm": 1.342007040977478, "learning_rate": 4.6800000000000006e-05, "epoch": 0.1872, "step": 585 }, { "loss": 0.1602, "grad_norm": 1.2319730520248413, "learning_rate": 4.72e-05, "epoch": 0.1888, "step": 590 }, { "loss": 0.1338, "grad_norm": 1.2222471237182617, "learning_rate": 4.7600000000000005e-05, "epoch": 0.1904, "step": 595 }, { "loss": 0.1854, "grad_norm": 1.2033894062042236, "learning_rate": 4.800000000000001e-05, "epoch": 0.192, "step": 600 }, { "eval_loss": 0.15923890471458435, "eval_f1": 0.5325047801147227, "eval_recall": 0.6040395824861055, "eval_accuracy": 0.9432070024195353, "eval_precision": 0.4761192435089219, "eval_classification_report": { "LOC": { "precision": 0.41457068516912404, "recall": 0.43974241030358785, "f1-score": 0.4267857142857143, "support": 1087 }, "ORG": { "precision": 0.31581092094539526, "recall": 0.4574970484061393, "f1-score": 0.37367405978784957, "support": 1694 }, "PER": { "precision": 0.6563876651982379, "recall": 0.8251238705916643, "f1-score": 0.7311466942148761, "support": 3431 }, "PRD": { "precision": 0.2585128561501042, "recall": 0.31931330472103003, "f1-score": 0.2857142857142857, "support": 1165 }, "micro avg": { "precision": 0.4761192435089219, "recall": 0.6040395824861055, "f1-score": 0.5325047801147227, "support": 7377 }, "macro avg": { "precision": 0.4113205318657154, "recall": 0.5104191585056054, "f1-score": 0.4543301885006814, "support": 7377 }, "weighted avg": { "precision": 0.47971473384443036, "recall": 0.6040395824861055, "f1-score": 0.5338676127718003, "support": 7377 } }, "eval_runtime": 3.8858, "eval_samples_per_second": 1054.088, "eval_steps_per_second": 8.235, "epoch": 0.192, "step": 600 }, { "loss": 0.1566, "grad_norm": 1.4292947053909302, "learning_rate": 4.8400000000000004e-05, "epoch": 0.1936, "step": 605 }, { "loss": 0.156, "grad_norm": 0.9665194153785706, "learning_rate": 4.880000000000001e-05, "epoch": 0.1952, "step": 610 }, { "loss": 0.1584, "grad_norm": 1.1934261322021484, "learning_rate": 4.92e-05, "epoch": 0.1968, "step": 615 }, { "loss": 0.1279, "grad_norm": 1.227059006690979, "learning_rate": 4.9600000000000006e-05, "epoch": 0.1984, "step": 620 }, { "loss": 0.1616, "grad_norm": 1.2745181322097778, "learning_rate": 5.000000000000001e-05, "epoch": 0.2, "step": 625 }, { "loss": 0.1567, "grad_norm": 1.7061660289764404, "learning_rate": 5.0400000000000005e-05, "epoch": 0.2016, "step": 630 }, { "loss": 0.157, "grad_norm": 0.8803685903549194, "learning_rate": 5.080000000000001e-05, "epoch": 0.2032, "step": 635 }, { "loss": 0.1376, "grad_norm": 1.2192692756652832, "learning_rate": 5.120000000000001e-05, "epoch": 0.2048, "step": 640 }, { "loss": 0.1394, "grad_norm": 0.8894755244255066, "learning_rate": 5.160000000000001e-05, "epoch": 0.2064, "step": 645 }, { "loss": 0.15, "grad_norm": 0.8442983031272888, "learning_rate": 5.200000000000001e-05, "epoch": 0.208, "step": 650 }, { "loss": 0.1455, "grad_norm": 1.2069193124771118, "learning_rate": 5.2400000000000007e-05, "epoch": 0.2096, "step": 655 }, { "loss": 0.1546, "grad_norm": 1.2397536039352417, "learning_rate": 5.280000000000001e-05, "epoch": 0.2112, "step": 660 }, { "loss": 0.1393, "grad_norm": 1.5896267890930176, "learning_rate": 5.320000000000001e-05, "epoch": 0.2128, "step": 665 }, { "loss": 0.1323, "grad_norm": 1.0088090896606445, "learning_rate": 5.360000000000001e-05, "epoch": 0.2144, "step": 670 }, { "loss": 0.1339, "grad_norm": 1.080549955368042, "learning_rate": 5.400000000000001e-05, "epoch": 0.216, "step": 675 }, { "loss": 0.1334, "grad_norm": 0.8886431455612183, "learning_rate": 5.440000000000001e-05, "epoch": 0.2176, "step": 680 }, { "loss": 0.1255, "grad_norm": 0.9754141569137573, "learning_rate": 5.480000000000001e-05, "epoch": 0.2192, "step": 685 }, { "loss": 0.154, "grad_norm": 0.9398236274719238, "learning_rate": 5.520000000000001e-05, "epoch": 0.2208, "step": 690 }, { "loss": 0.1257, "grad_norm": 0.9330877661705017, "learning_rate": 5.56e-05, "epoch": 0.2224, "step": 695 }, { "loss": 0.1395, "grad_norm": 1.8738404512405396, "learning_rate": 5.6000000000000006e-05, "epoch": 0.224, "step": 700 }, { "loss": 0.1229, "grad_norm": 1.1091705560684204, "learning_rate": 5.64e-05, "epoch": 0.2256, "step": 705 }, { "loss": 0.1275, "grad_norm": 0.9525001049041748, "learning_rate": 5.6800000000000005e-05, "epoch": 0.2272, "step": 710 }, { "loss": 0.1106, "grad_norm": 1.2660268545150757, "learning_rate": 5.720000000000001e-05, "epoch": 0.2288, "step": 715 }, { "loss": 0.1273, "grad_norm": 1.359167218208313, "learning_rate": 5.7600000000000004e-05, "epoch": 0.2304, "step": 720 }, { "loss": 0.1406, "grad_norm": 1.3178445100784302, "learning_rate": 5.800000000000001e-05, "epoch": 0.232, "step": 725 }, { "loss": 0.1541, "grad_norm": 0.7848720550537109, "learning_rate": 5.840000000000001e-05, "epoch": 0.2336, "step": 730 }, { "loss": 0.1283, "grad_norm": 0.7774125933647156, "learning_rate": 5.8800000000000006e-05, "epoch": 0.2352, "step": 735 }, { "loss": 0.1314, "grad_norm": 0.9177035689353943, "learning_rate": 5.920000000000001e-05, "epoch": 0.2368, "step": 740 }, { "loss": 0.1281, "grad_norm": 0.9019742012023926, "learning_rate": 5.9600000000000005e-05, "epoch": 0.2384, "step": 745 }, { "loss": 0.1332, "grad_norm": 1.124169111251831, "learning_rate": 6.000000000000001e-05, "epoch": 0.24, "step": 750 }, { "loss": 0.1415, "grad_norm": 1.3589638471603394, "learning_rate": 5.999991876872472e-05, "epoch": 0.2416, "step": 755 }, { "loss": 0.1192, "grad_norm": 1.818366527557373, "learning_rate": 5.999967507533877e-05, "epoch": 0.2432, "step": 760 }, { "loss": 0.1337, "grad_norm": 1.0883005857467651, "learning_rate": 5.999926892116185e-05, "epoch": 0.2448, "step": 765 }, { "loss": 0.1717, "grad_norm": 1.3792660236358643, "learning_rate": 5.999870030839346e-05, "epoch": 0.2464, "step": 770 }, { "loss": 0.1483, "grad_norm": 1.2378206253051758, "learning_rate": 5.999796924011288e-05, "epoch": 0.248, "step": 775 }, { "loss": 0.1187, "grad_norm": 0.7132590413093567, "learning_rate": 5.9997075720279136e-05, "epoch": 0.2496, "step": 780 }, { "loss": 0.1281, "grad_norm": 1.3595064878463745, "learning_rate": 5.999601975373102e-05, "epoch": 0.2512, "step": 785 }, { "loss": 0.117, "grad_norm": 0.921572744846344, "learning_rate": 5.999480134618704e-05, "epoch": 0.2528, "step": 790 }, { "loss": 0.1489, "grad_norm": 1.514236330986023, "learning_rate": 5.9993420504245377e-05, "epoch": 0.2544, "step": 795 }, { "loss": 0.1174, "grad_norm": 0.826785683631897, "learning_rate": 5.999187723538386e-05, "epoch": 0.256, "step": 800 }, { "eval_loss": 0.12730863690376282, "eval_f1": 0.6133234090063266, "eval_recall": 0.6701911346075641, "eval_accuracy": 0.9535662728991725, "eval_precision": 0.5653516295025729, "eval_classification_report": { "LOC": { "precision": 0.4779116465863454, "recall": 0.547378104875805, "f1-score": 0.5102915951972555, "support": 1087 }, "ORG": { "precision": 0.44224422442244227, "recall": 0.47461629279811096, "f1-score": 0.4578587699316628, "support": 1694 }, "PER": { "precision": 0.7306626354245402, "recall": 0.8452346254736228, "f1-score": 0.7837837837837839, "support": 3431 }, "PRD": { "precision": 0.37653239929947463, "recall": 0.5536480686695279, "f1-score": 0.448227936066713, "support": 1165 }, "micro avg": { "precision": 0.5653516295025729, "recall": 0.6701911346075641, "f1-score": 0.6133234090063266, "support": 7377 }, "macro avg": { "precision": 0.5068377264332006, "recall": 0.6052192729542666, "f1-score": 0.5500405212448538, "support": 7377 }, "weighted avg": { "precision": 0.5712641213686404, "recall": 0.6701911346075641, "f1-score": 0.6156496445605988, "support": 7377 } }, "eval_runtime": 3.7589, "eval_samples_per_second": 1089.687, "eval_steps_per_second": 8.513, "epoch": 0.256, "step": 800 }, { "loss": 0.1119, "grad_norm": 0.6694440841674805, "learning_rate": 5.999017154795994e-05, "epoch": 0.2576, "step": 805 }, { "loss": 0.1261, "grad_norm": 0.6659302711486816, "learning_rate": 5.9988303451210634e-05, "epoch": 0.2592, "step": 810 }, { "loss": 0.1338, "grad_norm": 1.003527045249939, "learning_rate": 5.998627295525247e-05, "epoch": 0.2608, "step": 815 }, { "loss": 0.1429, "grad_norm": 0.664521336555481, "learning_rate": 5.9984080071081425e-05, "epoch": 0.2624, "step": 820 }, { "loss": 0.1308, "grad_norm": 0.9482252597808838, "learning_rate": 5.998172481057288e-05, "epoch": 0.264, "step": 825 }, { "loss": 0.1416, "grad_norm": 1.411017656326294, "learning_rate": 5.997920718648156e-05, "epoch": 0.2656, "step": 830 }, { "loss": 0.1461, "grad_norm": 2.197968006134033, "learning_rate": 5.997652721244146e-05, "epoch": 0.2672, "step": 835 }, { "loss": 0.1297, "grad_norm": 0.6464182138442993, "learning_rate": 5.997368490296576e-05, "epoch": 0.2688, "step": 840 }, { "loss": 0.1163, "grad_norm": 0.8266535997390747, "learning_rate": 5.997068027344674e-05, "epoch": 0.2704, "step": 845 }, { "loss": 0.1371, "grad_norm": 1.1215314865112305, "learning_rate": 5.996751334015575e-05, "epoch": 0.272, "step": 850 }, { "loss": 0.1322, "grad_norm": 0.917567253112793, "learning_rate": 5.996418412024303e-05, "epoch": 0.2736, "step": 855 }, { "loss": 0.1251, "grad_norm": 0.6738415360450745, "learning_rate": 5.996069263173772e-05, "epoch": 0.2752, "step": 860 }, { "loss": 0.1179, "grad_norm": 0.9891319274902344, "learning_rate": 5.995703889354768e-05, "epoch": 0.2768, "step": 865 }, { "loss": 0.1156, "grad_norm": 0.7964264750480652, "learning_rate": 5.995322292545944e-05, "epoch": 0.2784, "step": 870 }, { "loss": 0.1053, "grad_norm": 1.0691487789154053, "learning_rate": 5.9949244748138055e-05, "epoch": 0.28, "step": 875 }, { "loss": 0.1047, "grad_norm": 0.872525691986084, "learning_rate": 5.994510438312702e-05, "epoch": 0.2816, "step": 880 }, { "loss": 0.1225, "grad_norm": 0.7620688676834106, "learning_rate": 5.994080185284816e-05, "epoch": 0.2832, "step": 885 }, { "loss": 0.1043, "grad_norm": 0.4866650402545929, "learning_rate": 5.993633718060145e-05, "epoch": 0.2848, "step": 890 }, { "loss": 0.1298, "grad_norm": 0.9189175367355347, "learning_rate": 5.9931710390564976e-05, "epoch": 0.2864, "step": 895 }, { "loss": 0.1193, "grad_norm": 0.8462486863136292, "learning_rate": 5.9926921507794735e-05, "epoch": 0.288, "step": 900 }, { "loss": 0.1143, "grad_norm": 0.5728514194488525, "learning_rate": 5.992197055822453e-05, "epoch": 0.2896, "step": 905 }, { "loss": 0.1161, "grad_norm": 0.7136925458908081, "learning_rate": 5.991685756866584e-05, "epoch": 0.2912, "step": 910 }, { "loss": 0.1215, "grad_norm": 0.6829357743263245, "learning_rate": 5.9911582566807616e-05, "epoch": 0.2928, "step": 915 }, { "loss": 0.12, "grad_norm": 0.790256679058075, "learning_rate": 5.990614558121622e-05, "epoch": 0.2944, "step": 920 }, { "loss": 0.1252, "grad_norm": 0.8256840109825134, "learning_rate": 5.990054664133519e-05, "epoch": 0.296, "step": 925 }, { "loss": 0.1174, "grad_norm": 0.8501859903335571, "learning_rate": 5.989478577748514e-05, "epoch": 0.2976, "step": 930 }, { "loss": 0.1161, "grad_norm": 0.7125261425971985, "learning_rate": 5.9888863020863554e-05, "epoch": 0.2992, "step": 935 }, { "loss": 0.1267, "grad_norm": 0.6811211705207825, "learning_rate": 5.988277840354463e-05, "epoch": 0.3008, "step": 940 }, { "loss": 0.1484, "grad_norm": 1.973705768585205, "learning_rate": 5.987653195847911e-05, "epoch": 0.3024, "step": 945 }, { "loss": 0.1467, "grad_norm": 0.8643979430198669, "learning_rate": 5.987012371949414e-05, "epoch": 0.304, "step": 950 }, { "loss": 0.1143, "grad_norm": 0.4681348502635956, "learning_rate": 5.9863553721292985e-05, "epoch": 0.3056, "step": 955 }, { "loss": 0.1086, "grad_norm": 0.5381385087966919, "learning_rate": 5.985682199945494e-05, "epoch": 0.3072, "step": 960 }, { "loss": 0.1347, "grad_norm": 1.4859362840652466, "learning_rate": 5.9849928590435105e-05, "epoch": 0.3088, "step": 965 }, { "loss": 0.1151, "grad_norm": 0.7835789322853088, "learning_rate": 5.984287353156416e-05, "epoch": 0.3104, "step": 970 }, { "loss": 0.1143, "grad_norm": 0.8606787323951721, "learning_rate": 5.9835656861048205e-05, "epoch": 0.312, "step": 975 }, { "loss": 0.1171, "grad_norm": 1.2827361822128296, "learning_rate": 5.982827861796854e-05, "epoch": 0.3136, "step": 980 }, { "loss": 0.1178, "grad_norm": 0.798117458820343, "learning_rate": 5.982073884228142e-05, "epoch": 0.3152, "step": 985 }, { "loss": 0.1227, "grad_norm": 1.068694829940796, "learning_rate": 5.98130375748179e-05, "epoch": 0.3168, "step": 990 }, { "loss": 0.1122, "grad_norm": 0.7587924599647522, "learning_rate": 5.9805174857283554e-05, "epoch": 0.3184, "step": 995 }, { "loss": 0.1177, "grad_norm": 0.7272264957427979, "learning_rate": 5.9797150732258296e-05, "epoch": 0.32, "step": 1000 }, { "eval_loss": 0.11752571165561676, "eval_f1": 0.657496561210454, "eval_recall": 0.7127558628168632, "eval_accuracy": 0.9572362401642844, "eval_precision": 0.6101891609608913, "eval_classification_report": { "LOC": { "precision": 0.5187250996015936, "recall": 0.5988960441582337, "f1-score": 0.555935098206661, "support": 1087 }, "ORG": { "precision": 0.5026595744680851, "recall": 0.5578512396694215, "f1-score": 0.5288192501398993, "support": 1694 }, "PER": { "precision": 0.7437407952871871, "recall": 0.8831244535120956, "f1-score": 0.8074616922051966, "support": 3431 }, "PRD": { "precision": 0.44886363636363635, "recall": 0.542489270386266, "f1-score": 0.491255343956471, "support": 1165 }, "micro avg": { "precision": 0.6101891609608913, "recall": 0.7127558628168632, "f1-score": 0.657496561210454, "support": 7377 }, "macro avg": { "precision": 0.5534972764301255, "recall": 0.6455902519315042, "f1-score": 0.595867846127057, "support": 7377 }, "weighted avg": { "precision": 0.608656677160071, "recall": 0.7127558628168632, "f1-score": 0.6564775387221021, "support": 7377 } }, "eval_runtime": 3.6233, "eval_samples_per_second": 1130.471, "eval_steps_per_second": 8.832, "epoch": 0.32, "step": 1000 }, { "loss": 0.0926, "grad_norm": 0.8890002369880676, "learning_rate": 5.978896524319612e-05, "epoch": 0.3216, "step": 1005 }, { "loss": 0.1116, "grad_norm": 0.9991381168365479, "learning_rate": 5.9780618434424866e-05, "epoch": 0.3232, "step": 1010 }, { "loss": 0.1297, "grad_norm": 1.2445658445358276, "learning_rate": 5.9772110351145996e-05, "epoch": 0.3248, "step": 1015 }, { "loss": 0.1125, "grad_norm": 0.6929383277893066, "learning_rate": 5.976344103943435e-05, "epoch": 0.3264, "step": 1020 }, { "loss": 0.127, "grad_norm": 0.8095528483390808, "learning_rate": 5.975461054623785e-05, "epoch": 0.328, "step": 1025 }, { "loss": 0.1074, "grad_norm": 0.9184337258338928, "learning_rate": 5.9745618919377356e-05, "epoch": 0.3296, "step": 1030 }, { "loss": 0.1389, "grad_norm": 1.1083935499191284, "learning_rate": 5.973646620754626e-05, "epoch": 0.3312, "step": 1035 }, { "loss": 0.1257, "grad_norm": 0.7311134934425354, "learning_rate": 5.9727152460310326e-05, "epoch": 0.3328, "step": 1040 }, { "loss": 0.115, "grad_norm": 0.7103496193885803, "learning_rate": 5.9717677728107403e-05, "epoch": 0.3344, "step": 1045 }, { "loss": 0.1231, "grad_norm": 0.9989029765129089, "learning_rate": 5.9708042062247116e-05, "epoch": 0.336, "step": 1050 }, { "loss": 0.1232, "grad_norm": 0.9020649790763855, "learning_rate": 5.9698245514910644e-05, "epoch": 0.3376, "step": 1055 }, { "loss": 0.1202, "grad_norm": 0.8601577877998352, "learning_rate": 5.9688288139150365e-05, "epoch": 0.3392, "step": 1060 }, { "loss": 0.13, "grad_norm": 0.8116300106048584, "learning_rate": 5.9678169988889664e-05, "epoch": 0.3408, "step": 1065 }, { "loss": 0.1266, "grad_norm": 0.6542564630508423, "learning_rate": 5.966789111892253e-05, "epoch": 0.3424, "step": 1070 }, { "loss": 0.1195, "grad_norm": 1.078215479850769, "learning_rate": 5.965745158491336e-05, "epoch": 0.344, "step": 1075 }, { "loss": 0.1035, "grad_norm": 0.6880220770835876, "learning_rate": 5.9646851443396593e-05, "epoch": 0.3456, "step": 1080 }, { "loss": 0.1221, "grad_norm": 0.7223946452140808, "learning_rate": 5.963609075177644e-05, "epoch": 0.3472, "step": 1085 }, { "loss": 0.1103, "grad_norm": 0.9818997383117676, "learning_rate": 5.962516956832652e-05, "epoch": 0.3488, "step": 1090 }, { "loss": 0.1142, "grad_norm": 0.5985134243965149, "learning_rate": 5.961408795218965e-05, "epoch": 0.3504, "step": 1095 }, { "loss": 0.1284, "grad_norm": 0.953656017780304, "learning_rate": 5.960284596337739e-05, "epoch": 0.352, "step": 1100 }, { "loss": 0.1056, "grad_norm": 0.6047983169555664, "learning_rate": 5.959144366276981e-05, "epoch": 0.3536, "step": 1105 }, { "loss": 0.1119, "grad_norm": 1.577720046043396, "learning_rate": 5.9579881112115154e-05, "epoch": 0.3552, "step": 1110 }, { "loss": 0.113, "grad_norm": 0.6355241537094116, "learning_rate": 5.956815837402947e-05, "epoch": 0.3568, "step": 1115 }, { "loss": 0.0965, "grad_norm": 0.99614417552948, "learning_rate": 5.955627551199628e-05, "epoch": 0.3584, "step": 1120 }, { "loss": 0.1014, "grad_norm": 0.9973353147506714, "learning_rate": 5.954423259036625e-05, "epoch": 0.36, "step": 1125 }, { "loss": 0.1151, "grad_norm": 0.6505391597747803, "learning_rate": 5.953202967435684e-05, "epoch": 0.3616, "step": 1130 }, { "loss": 0.1111, "grad_norm": 0.6866720914840698, "learning_rate": 5.951966683005197e-05, "epoch": 0.3632, "step": 1135 }, { "loss": 0.1028, "grad_norm": 0.813484251499176, "learning_rate": 5.9507144124401586e-05, "epoch": 0.3648, "step": 1140 }, { "loss": 0.1041, "grad_norm": 0.9011476635932922, "learning_rate": 5.949446162522139e-05, "epoch": 0.3664, "step": 1145 }, { "loss": 0.1293, "grad_norm": 0.7367134094238281, "learning_rate": 5.948161940119241e-05, "epoch": 0.368, "step": 1150 }, { "loss": 0.1078, "grad_norm": 0.647478461265564, "learning_rate": 5.946861752186067e-05, "epoch": 0.3696, "step": 1155 }, { "loss": 0.1238, "grad_norm": 0.9823537468910217, "learning_rate": 5.9455456057636784e-05, "epoch": 0.3712, "step": 1160 }, { "loss": 0.1154, "grad_norm": 0.8274264931678772, "learning_rate": 5.944213507979559e-05, "epoch": 0.3728, "step": 1165 }, { "loss": 0.0965, "grad_norm": 0.6561154127120972, "learning_rate": 5.942865466047575e-05, "epoch": 0.3744, "step": 1170 }, { "loss": 0.1215, "grad_norm": 0.6931183338165283, "learning_rate": 5.941501487267939e-05, "epoch": 0.376, "step": 1175 }, { "loss": 0.1033, "grad_norm": 0.6052055358886719, "learning_rate": 5.940121579027165e-05, "epoch": 0.3776, "step": 1180 }, { "loss": 0.101, "grad_norm": 1.0075409412384033, "learning_rate": 5.9387257487980325e-05, "epoch": 0.3792, "step": 1185 }, { "loss": 0.0924, "grad_norm": 0.7277015447616577, "learning_rate": 5.937314004139549e-05, "epoch": 0.3808, "step": 1190 }, { "loss": 0.1093, "grad_norm": 0.9426122903823853, "learning_rate": 5.935886352696901e-05, "epoch": 0.3824, "step": 1195 }, { "loss": 0.0892, "grad_norm": 1.3698885440826416, "learning_rate": 5.934442802201418e-05, "epoch": 0.384, "step": 1200 }, { "eval_loss": 0.11468067765235901, "eval_f1": 0.6577290270160472, "eval_recall": 0.6583977226514843, "eval_accuracy": 0.9584663399955269, "eval_precision": 0.6570616883116883, "eval_classification_report": { "LOC": { "precision": 0.5429718875502008, "recall": 0.6218951241950322, "f1-score": 0.5797598627787308, "support": 1087 }, "ORG": { "precision": 0.5111695137976346, "recall": 0.4592680047225502, "f1-score": 0.48383084577114427, "support": 1694 }, "PER": { "precision": 0.8116755543081673, "recall": 0.8429029437481784, "f1-score": 0.8269945667715185, "support": 3431 }, "PRD": { "precision": 0.4811676082862524, "recall": 0.4386266094420601, "f1-score": 0.4589133363268972, "support": 1165 }, "micro avg": { "precision": 0.6570616883116883, "recall": 0.6583977226514843, "f1-score": 0.6577290270160472, "support": 7377 }, "macro avg": { "precision": 0.5867461409855638, "recall": 0.5906731705269552, "f1-score": 0.5873746529120727, "support": 7377 }, "weighted avg": { "precision": 0.6508812103328003, "recall": 0.6583977226514843, "f1-score": 0.6536343796923836, "support": 7377 } }, "eval_runtime": 3.683, "eval_samples_per_second": 1112.123, "eval_steps_per_second": 8.688, "epoch": 0.384, "step": 1200 }, { "loss": 0.1067, "grad_norm": 0.8485783934593201, "learning_rate": 5.93298336047053e-05, "epoch": 0.3856, "step": 1205 }, { "loss": 0.1144, "grad_norm": 1.2266898155212402, "learning_rate": 5.9315080354077244e-05, "epoch": 0.3872, "step": 1210 }, { "loss": 0.087, "grad_norm": 0.9171196818351746, "learning_rate": 5.930016835002505e-05, "epoch": 0.3888, "step": 1215 }, { "loss": 0.1226, "grad_norm": 1.2662955522537231, "learning_rate": 5.928509767330343e-05, "epoch": 0.3904, "step": 1220 }, { "loss": 0.1279, "grad_norm": 0.6647626757621765, "learning_rate": 5.9269868405526435e-05, "epoch": 0.392, "step": 1225 }, { "loss": 0.0955, "grad_norm": 0.7059332728385925, "learning_rate": 5.9254480629166903e-05, "epoch": 0.3936, "step": 1230 }, { "loss": 0.1169, "grad_norm": 0.7493918538093567, "learning_rate": 5.923893442755609e-05, "epoch": 0.3952, "step": 1235 }, { "loss": 0.1168, "grad_norm": 1.0150423049926758, "learning_rate": 5.922322988488316e-05, "epoch": 0.3968, "step": 1240 }, { "loss": 0.1055, "grad_norm": 0.6017731428146362, "learning_rate": 5.920736708619482e-05, "epoch": 0.3984, "step": 1245 }, { "loss": 0.1118, "grad_norm": 0.932864248752594, "learning_rate": 5.9191346117394724e-05, "epoch": 0.4, "step": 1250 }, { "loss": 0.1071, "grad_norm": 0.6678168773651123, "learning_rate": 5.917516706524314e-05, "epoch": 0.4016, "step": 1255 }, { "loss": 0.1207, "grad_norm": 0.5044338703155518, "learning_rate": 5.91588300173564e-05, "epoch": 0.4032, "step": 1260 }, { "loss": 0.1123, "grad_norm": 0.6335092782974243, "learning_rate": 5.9142335062206446e-05, "epoch": 0.4048, "step": 1265 }, { "loss": 0.1206, "grad_norm": 0.9701820611953735, "learning_rate": 5.9125682289120364e-05, "epoch": 0.4064, "step": 1270 }, { "loss": 0.1149, "grad_norm": 1.1878705024719238, "learning_rate": 5.9108871788279896e-05, "epoch": 0.408, "step": 1275 }, { "loss": 0.1156, "grad_norm": 0.8083585500717163, "learning_rate": 5.909190365072094e-05, "epoch": 0.4096, "step": 1280 }, { "loss": 0.1228, "grad_norm": 0.6746086478233337, "learning_rate": 5.9074777968333035e-05, "epoch": 0.4112, "step": 1285 }, { "loss": 0.1125, "grad_norm": 0.6668514013290405, "learning_rate": 5.905749483385894e-05, "epoch": 0.4128, "step": 1290 }, { "loss": 0.107, "grad_norm": 0.6310532093048096, "learning_rate": 5.9040054340894046e-05, "epoch": 0.4144, "step": 1295 }, { "loss": 0.1246, "grad_norm": 0.7736676335334778, "learning_rate": 5.9022456583885925e-05, "epoch": 0.416, "step": 1300 }, { "loss": 0.0928, "grad_norm": 0.914233922958374, "learning_rate": 5.9004701658133796e-05, "epoch": 0.4176, "step": 1305 }, { "loss": 0.1037, "grad_norm": 0.7098410129547119, "learning_rate": 5.898678965978799e-05, "epoch": 0.4192, "step": 1310 }, { "loss": 0.1316, "grad_norm": 0.9904256463050842, "learning_rate": 5.8968720685849494e-05, "epoch": 0.4208, "step": 1315 }, { "loss": 0.0997, "grad_norm": 0.5907948017120361, "learning_rate": 5.895049483416935e-05, "epoch": 0.4224, "step": 1320 }, { "loss": 0.0935, "grad_norm": 0.8248777985572815, "learning_rate": 5.893211220344818e-05, "epoch": 0.424, "step": 1325 }, { "loss": 0.1045, "grad_norm": 0.7989441752433777, "learning_rate": 5.89135728932356e-05, "epoch": 0.4256, "step": 1330 }, { "loss": 0.105, "grad_norm": 0.7386398911476135, "learning_rate": 5.889487700392975e-05, "epoch": 0.4272, "step": 1335 }, { "loss": 0.1173, "grad_norm": 0.7704159617424011, "learning_rate": 5.8876024636776685e-05, "epoch": 0.4288, "step": 1340 }, { "loss": 0.1085, "grad_norm": 0.6569073796272278, "learning_rate": 5.885701589386987e-05, "epoch": 0.4304, "step": 1345 }, { "loss": 0.1217, "grad_norm": 0.7931858897209167, "learning_rate": 5.8837850878149574e-05, "epoch": 0.432, "step": 1350 }, { "loss": 0.111, "grad_norm": 0.6820420026779175, "learning_rate": 5.88185296934024e-05, "epoch": 0.4336, "step": 1355 }, { "loss": 0.1107, "grad_norm": 0.7383225560188293, "learning_rate": 5.8799052444260635e-05, "epoch": 0.4352, "step": 1360 }, { "loss": 0.1011, "grad_norm": 0.7148371338844299, "learning_rate": 5.877941923620173e-05, "epoch": 0.4368, "step": 1365 }, { "loss": 0.1153, "grad_norm": 0.5324385762214661, "learning_rate": 5.8759630175547735e-05, "epoch": 0.4384, "step": 1370 }, { "loss": 0.1041, "grad_norm": 0.608962893486023, "learning_rate": 5.8739685369464674e-05, "epoch": 0.44, "step": 1375 }, { "loss": 0.1121, "grad_norm": 0.6661380529403687, "learning_rate": 5.871958492596202e-05, "epoch": 0.4416, "step": 1380 }, { "loss": 0.0928, "grad_norm": 0.61454176902771, "learning_rate": 5.8699328953892095e-05, "epoch": 0.4432, "step": 1385 }, { "loss": 0.1145, "grad_norm": 0.9084603786468506, "learning_rate": 5.867891756294945e-05, "epoch": 0.4448, "step": 1390 }, { "loss": 0.1423, "grad_norm": 1.144399881362915, "learning_rate": 5.865835086367031e-05, "epoch": 0.4464, "step": 1395 }, { "loss": 0.1203, "grad_norm": 1.0306140184402466, "learning_rate": 5.863762896743197e-05, "epoch": 0.448, "step": 1400 }, { "eval_loss": 0.1112460047006607, "eval_f1": 0.6803706203595754, "eval_recall": 0.7515250101667345, "eval_accuracy": 0.9585375027956814, "eval_precision": 0.62152466367713, "eval_classification_report": { "LOC": { "precision": 0.5157967032967034, "recall": 0.6908923643054278, "f1-score": 0.5906409752261109, "support": 1087 }, "ORG": { "precision": 0.5248015873015873, "recall": 0.6245572609208973, "f1-score": 0.5703504043126684, "support": 1694 }, "PER": { "precision": 0.7939685081398452, "recall": 0.8670941416496648, "f1-score": 0.828921705210365, "support": 3431 }, "PRD": { "precision": 0.4467960023515579, "recall": 0.6523605150214592, "f1-score": 0.5303558967201675, "support": 1165 }, "micro avg": { "precision": 0.62152466367713, "recall": 0.7515250101667345, "f1-score": 0.6803706203595754, "support": 7377 }, "macro avg": { "precision": 0.5703407002724235, "recall": 0.7087260704743622, "f1-score": 0.6300672453673279, "support": 7377 }, "weighted avg": { "precision": 0.6363437982296027, "recall": 0.7515250101667345, "f1-score": 0.6872841690703809, "support": 7377 } }, "eval_runtime": 3.812, "eval_samples_per_second": 1074.514, "eval_steps_per_second": 8.395, "epoch": 0.448, "step": 1400 }, { "loss": 0.0945, "grad_norm": 0.6809028387069702, "learning_rate": 5.8616751986452135e-05, "epoch": 0.4496, "step": 1405 }, { "loss": 0.0834, "grad_norm": 0.6054275631904602, "learning_rate": 5.8595720033788415e-05, "epoch": 0.4512, "step": 1410 }, { "loss": 0.1081, "grad_norm": 0.8710251450538635, "learning_rate": 5.857453322333763e-05, "epoch": 0.4528, "step": 1415 }, { "loss": 0.1049, "grad_norm": 0.586397111415863, "learning_rate": 5.8553191669835216e-05, "epoch": 0.4544, "step": 1420 }, { "loss": 0.1132, "grad_norm": 0.8299476504325867, "learning_rate": 5.853169548885462e-05, "epoch": 0.456, "step": 1425 }, { "loss": 0.1112, "grad_norm": 1.1174827814102173, "learning_rate": 5.8510044796806646e-05, "epoch": 0.4576, "step": 1430 }, { "loss": 0.0898, "grad_norm": 0.7946940064430237, "learning_rate": 5.848823971093885e-05, "epoch": 0.4592, "step": 1435 }, { "loss": 0.0875, "grad_norm": 0.5518187880516052, "learning_rate": 5.8466280349334905e-05, "epoch": 0.4608, "step": 1440 }, { "loss": 0.1128, "grad_norm": 0.5996854901313782, "learning_rate": 5.844416683091393e-05, "epoch": 0.4624, "step": 1445 }, { "loss": 0.0954, "grad_norm": 0.6962219476699829, "learning_rate": 5.842189927542988e-05, "epoch": 0.464, "step": 1450 }, { "loss": 0.1285, "grad_norm": 1.5462192296981812, "learning_rate": 5.83994778034709e-05, "epoch": 0.4656, "step": 1455 }, { "loss": 0.1079, "grad_norm": 0.8902407288551331, "learning_rate": 5.837690253645862e-05, "epoch": 0.4672, "step": 1460 }, { "loss": 0.1083, "grad_norm": 0.6925358772277832, "learning_rate": 5.835417359664756e-05, "epoch": 0.4688, "step": 1465 }, { "loss": 0.1096, "grad_norm": 0.541042149066925, "learning_rate": 5.833129110712444e-05, "epoch": 0.4704, "step": 1470 }, { "loss": 0.1089, "grad_norm": 1.4327906370162964, "learning_rate": 5.8308255191807517e-05, "epoch": 0.472, "step": 1475 }, { "loss": 0.1066, "grad_norm": 0.6732336282730103, "learning_rate": 5.8285065975445906e-05, "epoch": 0.4736, "step": 1480 }, { "loss": 0.1076, "grad_norm": 0.5755553841590881, "learning_rate": 5.826172358361891e-05, "epoch": 0.4752, "step": 1485 }, { "loss": 0.1104, "grad_norm": 0.9912682175636292, "learning_rate": 5.8238228142735356e-05, "epoch": 0.4768, "step": 1490 }, { "loss": 0.1076, "grad_norm": 0.6135424375534058, "learning_rate": 5.8214579780032884e-05, "epoch": 0.4784, "step": 1495 }, { "loss": 0.0871, "grad_norm": 0.49041664600372314, "learning_rate": 5.819077862357726e-05, "epoch": 0.48, "step": 1500 }, { "loss": 0.1087, "grad_norm": 0.8772937655448914, "learning_rate": 5.816682480226171e-05, "epoch": 0.4816, "step": 1505 }, { "loss": 0.1096, "grad_norm": 1.8205904960632324, "learning_rate": 5.814271844580621e-05, "epoch": 0.4832, "step": 1510 }, { "loss": 0.0921, "grad_norm": 0.7511752247810364, "learning_rate": 5.811845968475675e-05, "epoch": 0.4848, "step": 1515 }, { "loss": 0.1165, "grad_norm": 1.1483489274978638, "learning_rate": 5.809404865048468e-05, "epoch": 0.4864, "step": 1520 }, { "loss": 0.1163, "grad_norm": 0.6257110834121704, "learning_rate": 5.8069485475185954e-05, "epoch": 0.488, "step": 1525 }, { "loss": 0.1206, "grad_norm": 0.965168297290802, "learning_rate": 5.804477029188044e-05, "epoch": 0.4896, "step": 1530 }, { "loss": 0.1156, "grad_norm": 0.9124221205711365, "learning_rate": 5.80199032344112e-05, "epoch": 0.4912, "step": 1535 }, { "loss": 0.1081, "grad_norm": 0.7719221711158752, "learning_rate": 5.799488443744377e-05, "epoch": 0.4928, "step": 1540 }, { "loss": 0.1084, "grad_norm": 0.5909450650215149, "learning_rate": 5.796971403646537e-05, "epoch": 0.4944, "step": 1545 }, { "loss": 0.1064, "grad_norm": 0.7535513639450073, "learning_rate": 5.794439216778428e-05, "epoch": 0.496, "step": 1550 }, { "loss": 0.1017, "grad_norm": 0.9198781251907349, "learning_rate": 5.791891896852899e-05, "epoch": 0.4976, "step": 1555 }, { "loss": 0.1068, "grad_norm": 0.7925691604614258, "learning_rate": 5.789329457664755e-05, "epoch": 0.4992, "step": 1560 }, { "loss": 0.1124, "grad_norm": 0.9029912948608398, "learning_rate": 5.786751913090675e-05, "epoch": 0.5008, "step": 1565 }, { "loss": 0.1161, "grad_norm": 0.9168642163276672, "learning_rate": 5.784159277089141e-05, "epoch": 0.5024, "step": 1570 }, { "loss": 0.0946, "grad_norm": 0.7418901920318604, "learning_rate": 5.781551563700363e-05, "epoch": 0.504, "step": 1575 }, { "loss": 0.1127, "grad_norm": 0.7722365856170654, "learning_rate": 5.7789287870461984e-05, "epoch": 0.5056, "step": 1580 }, { "loss": 0.1071, "grad_norm": 0.7764033079147339, "learning_rate": 5.776290961330081e-05, "epoch": 0.5072, "step": 1585 }, { "loss": 0.0835, "grad_norm": 0.719040036201477, "learning_rate": 5.7736381008369404e-05, "epoch": 0.5088, "step": 1590 }, { "loss": 0.091, "grad_norm": 1.0980381965637207, "learning_rate": 5.770970219933125e-05, "epoch": 0.5104, "step": 1595 }, { "loss": 0.1056, "grad_norm": 0.7829711437225342, "learning_rate": 5.7682873330663274e-05, "epoch": 0.512, "step": 1600 }, { "eval_loss": 0.10453741252422333, "eval_f1": 0.6926321339132726, "eval_recall": 0.7123491934390673, "eval_accuracy": 0.9623294634324868, "eval_precision": 0.6739771707066821, "eval_classification_report": { "LOC": { "precision": 0.592433361994841, "recall": 0.6338546458141674, "f1-score": 0.6124444444444445, "support": 1087 }, "ORG": { "precision": 0.5840543881334982, "recall": 0.5578512396694215, "f1-score": 0.5706521739130435, "support": 1694 }, "PER": { "precision": 0.769249692496925, "recall": 0.9113960944331099, "f1-score": 0.8343116328708644, "support": 3431 }, "PRD": { "precision": 0.519453207150368, "recall": 0.4240343347639485, "f1-score": 0.4669187145557656, "support": 1165 }, "micro avg": { "precision": 0.6739771707066821, "recall": 0.7123491934390673, "f1-score": 0.6926321339132726, "support": 7377 }, "macro avg": { "precision": 0.616297662443908, "recall": 0.6317840786701617, "f1-score": 0.6210817414460295, "support": 7377 }, "weighted avg": { "precision": 0.6612202628810717, "recall": 0.7123491934390673, "f1-score": 0.6830548201921119, "support": 7377 } }, "eval_runtime": 5.9981, "eval_samples_per_second": 682.881, "eval_steps_per_second": 5.335, "epoch": 0.512, "step": 1600 }, { "loss": 0.1161, "grad_norm": 0.5395638942718506, "learning_rate": 5.765589454765502e-05, "epoch": 0.5136, "step": 1605 }, { "loss": 0.0786, "grad_norm": 0.6352102756500244, "learning_rate": 5.762876599640789e-05, "epoch": 0.5152, "step": 1610 }, { "loss": 0.0962, "grad_norm": 0.5065721273422241, "learning_rate": 5.760148782383432e-05, "epoch": 0.5168, "step": 1615 }, { "loss": 0.11, "grad_norm": 0.9539219737052917, "learning_rate": 5.757406017765704e-05, "epoch": 0.5184, "step": 1620 }, { "loss": 0.119, "grad_norm": 0.8298090696334839, "learning_rate": 5.754648320640823e-05, "epoch": 0.52, "step": 1625 }, { "loss": 0.0935, "grad_norm": 0.6447076201438904, "learning_rate": 5.7518757059428724e-05, "epoch": 0.5216, "step": 1630 }, { "loss": 0.1017, "grad_norm": 0.8737111687660217, "learning_rate": 5.7490881886867196e-05, "epoch": 0.5232, "step": 1635 }, { "loss": 0.1004, "grad_norm": 0.8524758815765381, "learning_rate": 5.746285783967938e-05, "epoch": 0.5248, "step": 1640 }, { "loss": 0.105, "grad_norm": 0.7059099078178406, "learning_rate": 5.743468506962722e-05, "epoch": 0.5264, "step": 1645 }, { "loss": 0.0888, "grad_norm": 0.7066783905029297, "learning_rate": 5.740636372927803e-05, "epoch": 0.528, "step": 1650 }, { "loss": 0.1006, "grad_norm": 0.9814224243164062, "learning_rate": 5.737789397200374e-05, "epoch": 0.5296, "step": 1655 }, { "loss": 0.0962, "grad_norm": 0.7629753351211548, "learning_rate": 5.7349275951979985e-05, "epoch": 0.5312, "step": 1660 }, { "loss": 0.1063, "grad_norm": 0.5984035134315491, "learning_rate": 5.732050982418532e-05, "epoch": 0.5328, "step": 1665 }, { "loss": 0.1125, "grad_norm": 0.9269580245018005, "learning_rate": 5.7291595744400355e-05, "epoch": 0.5344, "step": 1670 }, { "loss": 0.0846, "grad_norm": 0.5846156477928162, "learning_rate": 5.726253386920694e-05, "epoch": 0.536, "step": 1675 }, { "loss": 0.096, "grad_norm": 0.7241097092628479, "learning_rate": 5.7233324355987265e-05, "epoch": 0.5376, "step": 1680 }, { "loss": 0.0961, "grad_norm": 0.8890935182571411, "learning_rate": 5.720396736292309e-05, "epoch": 0.5392, "step": 1685 }, { "loss": 0.108, "grad_norm": 0.7181938290596008, "learning_rate": 5.71744630489948e-05, "epoch": 0.5408, "step": 1690 }, { "loss": 0.11, "grad_norm": 0.5960686802864075, "learning_rate": 5.714481157398059e-05, "epoch": 0.5424, "step": 1695 }, { "loss": 0.1294, "grad_norm": 0.8022229075431824, "learning_rate": 5.711501309845563e-05, "epoch": 0.544, "step": 1700 }, { "loss": 0.0969, "grad_norm": 0.575072169303894, "learning_rate": 5.70850677837911e-05, "epoch": 0.5456, "step": 1705 }, { "loss": 0.0717, "grad_norm": 0.40081197023391724, "learning_rate": 5.7054975792153425e-05, "epoch": 0.5472, "step": 1710 }, { "loss": 0.1063, "grad_norm": 0.5622066259384155, "learning_rate": 5.702473728650332e-05, "epoch": 0.5488, "step": 1715 }, { "loss": 0.0983, "grad_norm": 1.041090965270996, "learning_rate": 5.699435243059495e-05, "epoch": 0.5504, "step": 1720 }, { "loss": 0.0963, "grad_norm": 0.625645637512207, "learning_rate": 5.696382138897502e-05, "epoch": 0.552, "step": 1725 }, { "loss": 0.1024, "grad_norm": 0.7116280794143677, "learning_rate": 5.693314432698189e-05, "epoch": 0.5536, "step": 1730 }, { "loss": 0.1133, "grad_norm": 1.3275892734527588, "learning_rate": 5.690232141074469e-05, "epoch": 0.5552, "step": 1735 }, { "loss": 0.0945, "grad_norm": 0.586405336856842, "learning_rate": 5.687135280718239e-05, "epoch": 0.5568, "step": 1740 }, { "loss": 0.1309, "grad_norm": 0.7126042246818542, "learning_rate": 5.684023868400296e-05, "epoch": 0.5584, "step": 1745 }, { "loss": 0.0976, "grad_norm": 0.656852662563324, "learning_rate": 5.680897920970237e-05, "epoch": 0.56, "step": 1750 }, { "loss": 0.0886, "grad_norm": 0.5556890368461609, "learning_rate": 5.677757455356378e-05, "epoch": 0.5616, "step": 1755 }, { "loss": 0.1045, "grad_norm": 0.6136338710784912, "learning_rate": 5.6746024885656505e-05, "epoch": 0.5632, "step": 1760 }, { "loss": 0.0893, "grad_norm": 0.7007787227630615, "learning_rate": 5.671433037683521e-05, "epoch": 0.5648, "step": 1765 }, { "loss": 0.1045, "grad_norm": 0.6161232590675354, "learning_rate": 5.6682491198738934e-05, "epoch": 0.5664, "step": 1770 }, { "loss": 0.101, "grad_norm": 0.5233799815177917, "learning_rate": 5.665050752379013e-05, "epoch": 0.568, "step": 1775 }, { "loss": 0.0831, "grad_norm": 0.5158640146255493, "learning_rate": 5.661837952519379e-05, "epoch": 0.5696, "step": 1780 }, { "loss": 0.0919, "grad_norm": 0.6936194896697998, "learning_rate": 5.658610737693645e-05, "epoch": 0.5712, "step": 1785 }, { "loss": 0.0983, "grad_norm": 1.045814871788025, "learning_rate": 5.6553691253785304e-05, "epoch": 0.5728, "step": 1790 }, { "loss": 0.1087, "grad_norm": 0.9404318332672119, "learning_rate": 5.652113133128723e-05, "epoch": 0.5744, "step": 1795 }, { "loss": 0.0894, "grad_norm": 0.772411048412323, "learning_rate": 5.648842778576782e-05, "epoch": 0.576, "step": 1800 }, { "eval_loss": 0.10257084667682648, "eval_f1": 0.7074263360464383, "eval_recall": 0.7599295106411821, "eval_accuracy": 0.9627259418904907, "eval_precision": 0.6617091595845137, "eval_classification_report": { "LOC": { "precision": 0.5838011226944667, "recall": 0.6697332106715731, "f1-score": 0.62382176520994, "support": 1087 }, "ORG": { "precision": 0.5784581738655002, "recall": 0.6245572609208973, "f1-score": 0.6006244677831393, "support": 1694 }, "PER": { "precision": 0.800780234070221, "recall": 0.897406004080443, "f1-score": 0.8463441451346894, "support": 3431 }, "PRD": { "precision": 0.47775628626692457, "recall": 0.6360515021459228, "f1-score": 0.5456553755522827, "support": 1165 }, "micro avg": { "precision": 0.6617091595845137, "recall": 0.7599295106411821, "f1-score": 0.7074263360464383, "support": 7377 }, "macro avg": { "precision": 0.6101989542242782, "recall": 0.706936994454709, "f1-score": 0.6541114384200128, "support": 7377 }, "weighted avg": { "precision": 0.6667429881378526, "recall": 0.7599295106411821, "f1-score": 0.7096444871469935, "support": 7377 } }, "eval_runtime": 3.6865, "eval_samples_per_second": 1111.095, "eval_steps_per_second": 8.68, "epoch": 0.576, "step": 1800 }, { "loss": 0.1092, "grad_norm": 0.7905903458595276, "learning_rate": 5.645558079433045e-05, "epoch": 0.5776, "step": 1805 }, { "loss": 0.0955, "grad_norm": 0.3658745288848877, "learning_rate": 5.6422590534855326e-05, "epoch": 0.5792, "step": 1810 }, { "loss": 0.0974, "grad_norm": 0.6396922469139099, "learning_rate": 5.63894571859985e-05, "epoch": 0.5808, "step": 1815 }, { "loss": 0.1054, "grad_norm": 0.749363124370575, "learning_rate": 5.6356180927190934e-05, "epoch": 0.5824, "step": 1820 }, { "loss": 0.0974, "grad_norm": 0.7177557349205017, "learning_rate": 5.632276193863746e-05, "epoch": 0.584, "step": 1825 }, { "loss": 0.0907, "grad_norm": 0.42948251962661743, "learning_rate": 5.628920040131591e-05, "epoch": 0.5856, "step": 1830 }, { "loss": 0.0963, "grad_norm": 0.6956673264503479, "learning_rate": 5.625549649697605e-05, "epoch": 0.5872, "step": 1835 }, { "loss": 0.0965, "grad_norm": 0.7138074636459351, "learning_rate": 5.622165040813859e-05, "epoch": 0.5888, "step": 1840 }, { "loss": 0.0986, "grad_norm": 0.6690980195999146, "learning_rate": 5.61876623180943e-05, "epoch": 0.5904, "step": 1845 }, { "loss": 0.0962, "grad_norm": 0.6943672299385071, "learning_rate": 5.615353241090288e-05, "epoch": 0.592, "step": 1850 }, { "loss": 0.096, "grad_norm": 0.6613456606864929, "learning_rate": 5.6119260871392064e-05, "epoch": 0.5936, "step": 1855 }, { "loss": 0.0862, "grad_norm": 0.47192469239234924, "learning_rate": 5.6084847885156576e-05, "epoch": 0.5952, "step": 1860 }, { "loss": 0.0945, "grad_norm": 0.5989574193954468, "learning_rate": 5.605029363855714e-05, "epoch": 0.5968, "step": 1865 }, { "loss": 0.1105, "grad_norm": 0.8669204711914062, "learning_rate": 5.6015598318719444e-05, "epoch": 0.5984, "step": 1870 }, { "loss": 0.0959, "grad_norm": 0.5692254304885864, "learning_rate": 5.598076211353317e-05, "epoch": 0.6, "step": 1875 }, { "loss": 0.0978, "grad_norm": 0.6159011125564575, "learning_rate": 5.5945785211650936e-05, "epoch": 0.6016, "step": 1880 }, { "loss": 0.1147, "grad_norm": 0.6536481976509094, "learning_rate": 5.59106678024873e-05, "epoch": 0.6032, "step": 1885 }, { "loss": 0.1125, "grad_norm": 0.7205601334571838, "learning_rate": 5.587541007621773e-05, "epoch": 0.6048, "step": 1890 }, { "loss": 0.0925, "grad_norm": 0.6726199984550476, "learning_rate": 5.584001222377756e-05, "epoch": 0.6064, "step": 1895 }, { "loss": 0.1393, "grad_norm": 0.48568856716156006, "learning_rate": 5.5804474436860976e-05, "epoch": 0.608, "step": 1900 }, { "loss": 0.1231, "grad_norm": 0.7120784521102905, "learning_rate": 5.576879690791994e-05, "epoch": 0.6096, "step": 1905 }, { "loss": 0.107, "grad_norm": 0.552306056022644, "learning_rate": 5.5732979830163215e-05, "epoch": 0.6112, "step": 1910 }, { "loss": 0.1015, "grad_norm": 0.6177356839179993, "learning_rate": 5.569702339755526e-05, "epoch": 0.6128, "step": 1915 }, { "loss": 0.0834, "grad_norm": 0.691417396068573, "learning_rate": 5.566092780481521e-05, "epoch": 0.6144, "step": 1920 }, { "loss": 0.0922, "grad_norm": 0.5560222864151001, "learning_rate": 5.562469324741577e-05, "epoch": 0.616, "step": 1925 }, { "loss": 0.081, "grad_norm": 0.5803418755531311, "learning_rate": 5.558831992158226e-05, "epoch": 0.6176, "step": 1930 }, { "loss": 0.09, "grad_norm": 0.8699193596839905, "learning_rate": 5.5551808024291435e-05, "epoch": 0.6192, "step": 1935 }, { "loss": 0.0833, "grad_norm": 0.805458128452301, "learning_rate": 5.55151577532705e-05, "epoch": 0.6208, "step": 1940 }, { "loss": 0.1205, "grad_norm": 0.8025895953178406, "learning_rate": 5.547836930699601e-05, "epoch": 0.6224, "step": 1945 }, { "loss": 0.1003, "grad_norm": 0.9410426616668701, "learning_rate": 5.544144288469278e-05, "epoch": 0.624, "step": 1950 }, { "loss": 0.1003, "grad_norm": 0.9818751811981201, "learning_rate": 5.540437868633286e-05, "epoch": 0.6256, "step": 1955 }, { "loss": 0.0921, "grad_norm": 0.8371868133544922, "learning_rate": 5.536717691263436e-05, "epoch": 0.6272, "step": 1960 }, { "loss": 0.0891, "grad_norm": 0.6195716261863708, "learning_rate": 5.532983776506046e-05, "epoch": 0.6288, "step": 1965 }, { "loss": 0.1085, "grad_norm": 0.5943077802658081, "learning_rate": 5.529236144581827e-05, "epoch": 0.6304, "step": 1970 }, { "loss": 0.1081, "grad_norm": 0.9380521774291992, "learning_rate": 5.525474815785773e-05, "epoch": 0.632, "step": 1975 }, { "loss": 0.0929, "grad_norm": 1.0477690696716309, "learning_rate": 5.521699810487054e-05, "epoch": 0.6336, "step": 1980 }, { "loss": 0.0998, "grad_norm": 1.3080973625183105, "learning_rate": 5.5179111491289015e-05, "epoch": 0.6352, "step": 1985 }, { "loss": 0.098, "grad_norm": 0.735542356967926, "learning_rate": 5.514108852228503e-05, "epoch": 0.6368, "step": 1990 }, { "loss": 0.095, "grad_norm": 0.8670830726623535, "learning_rate": 5.510292940376886e-05, "epoch": 0.6384, "step": 1995 }, { "loss": 0.0906, "grad_norm": 0.7203187346458435, "learning_rate": 5.50646343423881e-05, "epoch": 0.64, "step": 2000 }, { "eval_loss": 0.09931854158639908, "eval_f1": 0.7046049188906333, "eval_recall": 0.7301070896028196, "eval_accuracy": 0.9636917227497306, "eval_precision": 0.6808241688787764, "eval_classification_report": { "LOC": { "precision": 0.6022727272727273, "recall": 0.6826126954921803, "f1-score": 0.6399310047434239, "support": 1087 }, "ORG": { "precision": 0.5795180722891566, "recall": 0.5678866587957497, "f1-score": 0.5736434108527133, "support": 1694 }, "PER": { "precision": 0.8105797488645472, "recall": 0.8842902943748179, "f1-score": 0.8458321717312518, "support": 3431 }, "PRD": { "precision": 0.5078369905956113, "recall": 0.5562231759656653, "f1-score": 0.5309299467431381, "support": 1165 }, "micro avg": { "precision": 0.6808241688787764, "recall": 0.7301070896028196, "f1-score": 0.7046049188906333, "support": 7377 }, "macro avg": { "precision": 0.6250518847555107, "recall": 0.6727532061571033, "f1-score": 0.6475841335176318, "support": 7377 }, "weighted avg": { "precision": 0.6790163049208939, "recall": 0.7301070896028196, "f1-score": 0.7032588463204933, "support": 7377 } }, "eval_runtime": 3.8188, "eval_samples_per_second": 1072.585, "eval_steps_per_second": 8.38, "epoch": 0.64, "step": 2000 }, { "loss": 0.0884, "grad_norm": 0.7014639973640442, "learning_rate": 5.502620354552653e-05, "epoch": 0.6416, "step": 2005 }, { "loss": 0.0861, "grad_norm": 0.7875646352767944, "learning_rate": 5.4987637221302993e-05, "epoch": 0.6432, "step": 2010 }, { "loss": 0.0977, "grad_norm": 0.5600938200950623, "learning_rate": 5.494893557857026e-05, "epoch": 0.6448, "step": 2015 }, { "loss": 0.1035, "grad_norm": 0.722987174987793, "learning_rate": 5.491009882691393e-05, "epoch": 0.6464, "step": 2020 }, { "loss": 0.1214, "grad_norm": 0.7116439342498779, "learning_rate": 5.487112717665125e-05, "epoch": 0.648, "step": 2025 }, { "loss": 0.1119, "grad_norm": 0.8725836277008057, "learning_rate": 5.4832020838830024e-05, "epoch": 0.6496, "step": 2030 }, { "loss": 0.0891, "grad_norm": 0.5460137724876404, "learning_rate": 5.479278002522742e-05, "epoch": 0.6512, "step": 2035 }, { "loss": 0.0903, "grad_norm": 0.6276223659515381, "learning_rate": 5.4753404948348856e-05, "epoch": 0.6528, "step": 2040 }, { "loss": 0.1119, "grad_norm": 1.2063217163085938, "learning_rate": 5.471389582142686e-05, "epoch": 0.6544, "step": 2045 }, { "loss": 0.0994, "grad_norm": 0.49461960792541504, "learning_rate": 5.467425285841988e-05, "epoch": 0.656, "step": 2050 }, { "loss": 0.0901, "grad_norm": 0.7451221346855164, "learning_rate": 5.463447627401113e-05, "epoch": 0.6576, "step": 2055 }, { "loss": 0.0896, "grad_norm": 0.7661594748497009, "learning_rate": 5.459456628360747e-05, "epoch": 0.6592, "step": 2060 }, { "loss": 0.0859, "grad_norm": 0.9997418522834778, "learning_rate": 5.4554523103338186e-05, "epoch": 0.6608, "step": 2065 }, { "loss": 0.1032, "grad_norm": 0.9749106168746948, "learning_rate": 5.451434695005386e-05, "epoch": 0.6624, "step": 2070 }, { "loss": 0.0972, "grad_norm": 0.575941801071167, "learning_rate": 5.447403804132517e-05, "epoch": 0.664, "step": 2075 }, { "loss": 0.1023, "grad_norm": 0.6454254388809204, "learning_rate": 5.4433596595441715e-05, "epoch": 0.6656, "step": 2080 }, { "loss": 0.1107, "grad_norm": 0.43858006596565247, "learning_rate": 5.439302283141083e-05, "epoch": 0.6672, "step": 2085 }, { "loss": 0.087, "grad_norm": 0.6018679738044739, "learning_rate": 5.435231696895645e-05, "epoch": 0.6688, "step": 2090 }, { "loss": 0.1104, "grad_norm": 0.6940935850143433, "learning_rate": 5.431147922851784e-05, "epoch": 0.6704, "step": 2095 }, { "loss": 0.1089, "grad_norm": 0.6142142415046692, "learning_rate": 5.4270509831248434e-05, "epoch": 0.672, "step": 2100 }, { "loss": 0.0891, "grad_norm": 0.7066136598587036, "learning_rate": 5.422940899901467e-05, "epoch": 0.6736, "step": 2105 }, { "loss": 0.1133, "grad_norm": 0.5308381915092468, "learning_rate": 5.4188176954394744e-05, "epoch": 0.6752, "step": 2110 }, { "loss": 0.1193, "grad_norm": 0.7384476661682129, "learning_rate": 5.414681392067743e-05, "epoch": 0.6768, "step": 2115 }, { "loss": 0.0984, "grad_norm": 0.7968347072601318, "learning_rate": 5.410532012186087e-05, "epoch": 0.6784, "step": 2120 }, { "loss": 0.092, "grad_norm": 0.4553911089897156, "learning_rate": 5.406369578265132e-05, "epoch": 0.68, "step": 2125 }, { "loss": 0.1015, "grad_norm": 0.5899618864059448, "learning_rate": 5.402194112846201e-05, "epoch": 0.6816, "step": 2130 }, { "loss": 0.1058, "grad_norm": 3.818732500076294, "learning_rate": 5.398005638541186e-05, "epoch": 0.6832, "step": 2135 }, { "loss": 0.0974, "grad_norm": 0.7274853587150574, "learning_rate": 5.393804178032426e-05, "epoch": 0.6848, "step": 2140 }, { "loss": 0.0951, "grad_norm": 0.5004416108131409, "learning_rate": 5.3895897540725896e-05, "epoch": 0.6864, "step": 2145 }, { "loss": 0.1065, "grad_norm": 0.8699393272399902, "learning_rate": 5.3853623894845435e-05, "epoch": 0.688, "step": 2150 }, { "loss": 0.0946, "grad_norm": 0.6873959302902222, "learning_rate": 5.381122107161238e-05, "epoch": 0.6896, "step": 2155 }, { "loss": 0.1027, "grad_norm": 0.35972872376441956, "learning_rate": 5.376868930065573e-05, "epoch": 0.6912, "step": 2160 }, { "loss": 0.0948, "grad_norm": 0.5679711699485779, "learning_rate": 5.372602881230283e-05, "epoch": 0.6928, "step": 2165 }, { "loss": 0.0981, "grad_norm": 0.5284885764122009, "learning_rate": 5.368323983757808e-05, "epoch": 0.6944, "step": 2170 }, { "loss": 0.0839, "grad_norm": 0.6792832612991333, "learning_rate": 5.364032260820166e-05, "epoch": 0.696, "step": 2175 }, { "loss": 0.0983, "grad_norm": 0.6458384394645691, "learning_rate": 5.359727735658835e-05, "epoch": 0.6976, "step": 2180 }, { "loss": 0.082, "grad_norm": 1.0629314184188843, "learning_rate": 5.3554104315846166e-05, "epoch": 0.6992, "step": 2185 }, { "loss": 0.1091, "grad_norm": 1.215697169303894, "learning_rate": 5.35108037197752e-05, "epoch": 0.7008, "step": 2190 }, { "loss": 0.1025, "grad_norm": 0.6554715633392334, "learning_rate": 5.346737580286629e-05, "epoch": 0.7024, "step": 2195 }, { "loss": 0.0837, "grad_norm": 0.663650393486023, "learning_rate": 5.3423820800299785e-05, "epoch": 0.704, "step": 2200 }, { "eval_loss": 0.09860281646251678, "eval_f1": 0.7022683084899546, "eval_recall": 0.7344448962993086, "eval_accuracy": 0.9638035500071163, "eval_precision": 0.6727927480442071, "eval_classification_report": { "LOC": { "precision": 0.5957095709570958, "recall": 0.6642134314627415, "f1-score": 0.628099173553719, "support": 1087 }, "ORG": { "precision": 0.5663481953290871, "recall": 0.6298701298701299, "f1-score": 0.5964225824482953, "support": 1694 }, "PER": { "precision": 0.8130193905817175, "recall": 0.8554357330224425, "f1-score": 0.833688396534583, "support": 3431 }, "PRD": { "precision": 0.5152190051967335, "recall": 0.5957081545064378, "f1-score": 0.5525477707006369, "support": 1165 }, "micro avg": { "precision": 0.6727927480442071, "recall": 0.7344448962993086, "f1-score": 0.7022683084899546, "support": 7377 }, "macro avg": { "precision": 0.6225740405161584, "recall": 0.686306862215438, "f1-score": 0.6526894808093086, "support": 7377 }, "weighted avg": { "precision": 0.6773254462054905, "recall": 0.7344448962993086, "f1-score": 0.7045122268803987, "support": 7377 } }, "eval_runtime": 3.6823, "eval_samples_per_second": 1112.358, "eval_steps_per_second": 8.69, "epoch": 0.704, "step": 2200 }, { "loss": 0.0903, "grad_norm": 0.5369718074798584, "learning_rate": 5.3380138947944236e-05, "epoch": 0.7056, "step": 2205 }, { "loss": 0.0738, "grad_norm": 0.43987390398979187, "learning_rate": 5.3336330482355145e-05, "epoch": 0.7072, "step": 2210 }, { "loss": 0.0907, "grad_norm": 1.1239206790924072, "learning_rate": 5.329239564077367e-05, "epoch": 0.7088, "step": 2215 }, { "loss": 0.0815, "grad_norm": 0.9082643985748291, "learning_rate": 5.32483346611254e-05, "epoch": 0.7104, "step": 2220 }, { "loss": 0.0918, "grad_norm": 0.5776572823524475, "learning_rate": 5.320414778201892e-05, "epoch": 0.712, "step": 2225 }, { "loss": 0.0969, "grad_norm": 0.9061143398284912, "learning_rate": 5.315983524274471e-05, "epoch": 0.7136, "step": 2230 }, { "loss": 0.1108, "grad_norm": 0.5887501835823059, "learning_rate": 5.3115397283273684e-05, "epoch": 0.7152, "step": 2235 }, { "loss": 0.1029, "grad_norm": 0.5370180606842041, "learning_rate": 5.3070834144255986e-05, "epoch": 0.7168, "step": 2240 }, { "loss": 0.0911, "grad_norm": 0.542801022529602, "learning_rate": 5.302614606701967e-05, "epoch": 0.7184, "step": 2245 }, { "loss": 0.1043, "grad_norm": 1.0472865104675293, "learning_rate": 5.2981333293569345e-05, "epoch": 0.72, "step": 2250 }, { "loss": 0.0789, "grad_norm": 0.4142518937587738, "learning_rate": 5.2936396066584954e-05, "epoch": 0.7216, "step": 2255 }, { "loss": 0.1072, "grad_norm": 0.7248318195343018, "learning_rate": 5.289133462942036e-05, "epoch": 0.7232, "step": 2260 }, { "loss": 0.1076, "grad_norm": 1.2336457967758179, "learning_rate": 5.284614922610211e-05, "epoch": 0.7248, "step": 2265 }, { "loss": 0.0997, "grad_norm": 0.61026930809021, "learning_rate": 5.280084010132806e-05, "epoch": 0.7264, "step": 2270 }, { "loss": 0.091, "grad_norm": 0.8275012969970703, "learning_rate": 5.2755407500466075e-05, "epoch": 0.728, "step": 2275 }, { "loss": 0.0757, "grad_norm": 0.5474427342414856, "learning_rate": 5.27098516695527e-05, "epoch": 0.7296, "step": 2280 }, { "loss": 0.1077, "grad_norm": 0.6705024838447571, "learning_rate": 5.2664172855291816e-05, "epoch": 0.7312, "step": 2285 }, { "loss": 0.1038, "grad_norm": 0.6384997367858887, "learning_rate": 5.261837130505331e-05, "epoch": 0.7328, "step": 2290 }, { "loss": 0.091, "grad_norm": 0.40548214316368103, "learning_rate": 5.257244726687174e-05, "epoch": 0.7344, "step": 2295 }, { "loss": 0.1128, "grad_norm": 0.7016564011573792, "learning_rate": 5.252640098944499e-05, "epoch": 0.736, "step": 2300 }, { "loss": 0.1071, "grad_norm": 0.6650475263595581, "learning_rate": 5.248023272213289e-05, "epoch": 0.7376, "step": 2305 }, { "loss": 0.1048, "grad_norm": 0.5852968692779541, "learning_rate": 5.243394271495597e-05, "epoch": 0.7392, "step": 2310 }, { "loss": 0.1211, "grad_norm": 0.7575483322143555, "learning_rate": 5.238753121859394e-05, "epoch": 0.7408, "step": 2315 }, { "loss": 0.1046, "grad_norm": 0.7423969507217407, "learning_rate": 5.234099848438449e-05, "epoch": 0.7424, "step": 2320 }, { "loss": 0.0764, "grad_norm": 0.557420015335083, "learning_rate": 5.229434476432183e-05, "epoch": 0.744, "step": 2325 }, { "loss": 0.1037, "grad_norm": 0.822180986404419, "learning_rate": 5.2247570311055397e-05, "epoch": 0.7456, "step": 2330 }, { "loss": 0.0918, "grad_norm": 0.7512887120246887, "learning_rate": 5.2200675377888396e-05, "epoch": 0.7472, "step": 2335 }, { "loss": 0.0959, "grad_norm": 1.4606983661651611, "learning_rate": 5.215366021877653e-05, "epoch": 0.7488, "step": 2340 }, { "loss": 0.0926, "grad_norm": 1.504852294921875, "learning_rate": 5.2106525088326524e-05, "epoch": 0.7504, "step": 2345 }, { "loss": 0.0815, "grad_norm": 0.5610271692276001, "learning_rate": 5.205927024179487e-05, "epoch": 0.752, "step": 2350 }, { "loss": 0.1091, "grad_norm": 0.6768802404403687, "learning_rate": 5.201189593508629e-05, "epoch": 0.7536, "step": 2355 }, { "loss": 0.097, "grad_norm": 0.8454651236534119, "learning_rate": 5.196440242475252e-05, "epoch": 0.7552, "step": 2360 }, { "loss": 0.0922, "grad_norm": 0.5901356935501099, "learning_rate": 5.1916789967990744e-05, "epoch": 0.7568, "step": 2365 }, { "loss": 0.0706, "grad_norm": 0.5649783611297607, "learning_rate": 5.1869058822642355e-05, "epoch": 0.7584, "step": 2370 }, { "loss": 0.0883, "grad_norm": 0.5137647986412048, "learning_rate": 5.182120924719147e-05, "epoch": 0.76, "step": 2375 }, { "loss": 0.1048, "grad_norm": 0.968090832233429, "learning_rate": 5.177324150076356e-05, "epoch": 0.7616, "step": 2380 }, { "loss": 0.1016, "grad_norm": 0.6791149377822876, "learning_rate": 5.172515584312404e-05, "epoch": 0.7632, "step": 2385 }, { "loss": 0.1009, "grad_norm": 1.1339752674102783, "learning_rate": 5.1676952534676845e-05, "epoch": 0.7648, "step": 2390 }, { "loss": 0.0942, "grad_norm": 0.9203306436538696, "learning_rate": 5.1628631836463095e-05, "epoch": 0.7664, "step": 2395 }, { "loss": 0.0986, "grad_norm": 0.5185569524765015, "learning_rate": 5.158019401015954e-05, "epoch": 0.768, "step": 2400 }, { "eval_loss": 0.09681051969528198, "eval_f1": 0.713878011548417, "eval_recall": 0.7290226379286973, "eval_accuracy": 0.9647540816948946, "eval_precision": 0.6993498049414825, "eval_classification_report": { "LOC": { "precision": 0.6095652173913043, "recall": 0.6448942042318307, "f1-score": 0.6267322306660706, "support": 1087 }, "ORG": { "precision": 0.6134858518964479, "recall": 0.601534828807556, "f1-score": 0.6074515648286141, "support": 1694 }, "PER": { "precision": 0.8060446780551905, "recall": 0.8939084814922763, "f1-score": 0.847705914870094, "support": 3431 }, "PRD": { "precision": 0.5502793296089385, "recall": 0.5072961373390558, "f1-score": 0.5279142474318892, "support": 1165 }, "micro avg": { "precision": 0.6993498049414825, "recall": 0.7290226379286973, "f1-score": 0.713878011548417, "support": 7377 }, "macro avg": { "precision": 0.6448437692379704, "recall": 0.6619084129676798, "f1-score": 0.6524509894491669, "support": 7377 }, "weighted avg": { "precision": 0.6924843613689443, "recall": 0.7290226379286973, "f1-score": 0.7094726823547696, "support": 7377 } }, "eval_runtime": 3.444, "eval_samples_per_second": 1189.313, "eval_steps_per_second": 9.292, "epoch": 0.768, "step": 2400 }, { "loss": 0.0873, "grad_norm": 0.7869986295700073, "learning_rate": 5.1531639318077304e-05, "epoch": 0.7696, "step": 2405 }, { "loss": 0.1051, "grad_norm": 0.8877466917037964, "learning_rate": 5.148296802316035e-05, "epoch": 0.7712, "step": 2410 }, { "loss": 0.0922, "grad_norm": 1.0546032190322876, "learning_rate": 5.143418038898411e-05, "epoch": 0.7728, "step": 2415 }, { "loss": 0.0911, "grad_norm": 0.5526928901672363, "learning_rate": 5.1385276679754015e-05, "epoch": 0.7744, "step": 2420 }, { "loss": 0.0959, "grad_norm": 0.4351060390472412, "learning_rate": 5.133625716030412e-05, "epoch": 0.776, "step": 2425 }, { "loss": 0.1083, "grad_norm": 0.7338590621948242, "learning_rate": 5.1287122096095636e-05, "epoch": 0.7776, "step": 2430 }, { "loss": 0.1199, "grad_norm": 0.8085363507270813, "learning_rate": 5.123787175321548e-05, "epoch": 0.7792, "step": 2435 }, { "loss": 0.0959, "grad_norm": 0.7397745251655579, "learning_rate": 5.1188506398374865e-05, "epoch": 0.7808, "step": 2440 }, { "loss": 0.1009, "grad_norm": 0.4954250454902649, "learning_rate": 5.113902629890785e-05, "epoch": 0.7824, "step": 2445 }, { "loss": 0.0783, "grad_norm": 0.4568309187889099, "learning_rate": 5.1089431722769864e-05, "epoch": 0.784, "step": 2450 }, { "loss": 0.081, "grad_norm": 0.44635599851608276, "learning_rate": 5.1039722938536275e-05, "epoch": 0.7856, "step": 2455 }, { "loss": 0.1089, "grad_norm": 0.5480934381484985, "learning_rate": 5.098990021540097e-05, "epoch": 0.7872, "step": 2460 }, { "loss": 0.0899, "grad_norm": 0.4443829357624054, "learning_rate": 5.093996382317482e-05, "epoch": 0.7888, "step": 2465 }, { "loss": 0.1203, "grad_norm": 0.8199405074119568, "learning_rate": 5.088991403228429e-05, "epoch": 0.7904, "step": 2470 }, { "loss": 0.0983, "grad_norm": 0.6323763728141785, "learning_rate": 5.083975111376992e-05, "epoch": 0.792, "step": 2475 }, { "loss": 0.1045, "grad_norm": 0.5096043944358826, "learning_rate": 5.078947533928492e-05, "epoch": 0.7936, "step": 2480 }, { "loss": 0.0895, "grad_norm": 0.6827158331871033, "learning_rate": 5.0739086981093636e-05, "epoch": 0.7952, "step": 2485 }, { "loss": 0.0863, "grad_norm": 0.6818947792053223, "learning_rate": 5.06885863120701e-05, "epoch": 0.7968, "step": 2490 }, { "loss": 0.1094, "grad_norm": 0.8366793394088745, "learning_rate": 5.063797360569656e-05, "epoch": 0.7984, "step": 2495 }, { "loss": 0.0874, "grad_norm": 0.4549383223056793, "learning_rate": 5.0587249136062016e-05, "epoch": 0.8, "step": 2500 }, { "loss": 0.0854, "grad_norm": 0.5576324462890625, "learning_rate": 5.053641317786067e-05, "epoch": 0.8016, "step": 2505 }, { "loss": 0.0814, "grad_norm": 0.724626362323761, "learning_rate": 5.0485466006390515e-05, "epoch": 0.8032, "step": 2510 }, { "loss": 0.1005, "grad_norm": 0.6099131107330322, "learning_rate": 5.0434407897551787e-05, "epoch": 0.8048, "step": 2515 }, { "loss": 0.098, "grad_norm": 0.9018473625183105, "learning_rate": 5.0383239127845504e-05, "epoch": 0.8064, "step": 2520 }, { "loss": 0.1065, "grad_norm": 0.6420243978500366, "learning_rate": 5.0331959974371984e-05, "epoch": 0.808, "step": 2525 }, { "loss": 0.0954, "grad_norm": 0.6162922978401184, "learning_rate": 5.0280570714829264e-05, "epoch": 0.8096, "step": 2530 }, { "loss": 0.0817, "grad_norm": 0.8812652230262756, "learning_rate": 5.0229071627511705e-05, "epoch": 0.8112, "step": 2535 }, { "loss": 0.0875, "grad_norm": 0.7500573992729187, "learning_rate": 5.0177462991308416e-05, "epoch": 0.8128, "step": 2540 }, { "loss": 0.0951, "grad_norm": 0.519886314868927, "learning_rate": 5.012574508570173e-05, "epoch": 0.8144, "step": 2545 }, { "loss": 0.0822, "grad_norm": 0.7063540816307068, "learning_rate": 5.0073918190765756e-05, "epoch": 0.816, "step": 2550 }, { "loss": 0.0927, "grad_norm": 0.9035806655883789, "learning_rate": 5.00219825871648e-05, "epoch": 0.8176, "step": 2555 }, { "loss": 0.098, "grad_norm": 0.5797522068023682, "learning_rate": 4.996993855615191e-05, "epoch": 0.8192, "step": 2560 }, { "loss": 0.1031, "grad_norm": 1.1202480792999268, "learning_rate": 4.9917786379567254e-05, "epoch": 0.8208, "step": 2565 }, { "loss": 0.1159, "grad_norm": 0.47208496928215027, "learning_rate": 4.986552633983671e-05, "epoch": 0.8224, "step": 2570 }, { "loss": 0.1242, "grad_norm": 0.5779678225517273, "learning_rate": 4.981315871997024e-05, "epoch": 0.824, "step": 2575 }, { "loss": 0.0937, "grad_norm": 0.8530116677284241, "learning_rate": 4.976068380356042e-05, "epoch": 0.8256, "step": 2580 }, { "loss": 0.0937, "grad_norm": 0.6860007643699646, "learning_rate": 4.970810187478089e-05, "epoch": 0.8272, "step": 2585 }, { "loss": 0.0893, "grad_norm": 0.7348502278327942, "learning_rate": 4.965541321838476e-05, "epoch": 0.8288, "step": 2590 }, { "loss": 0.096, "grad_norm": 0.6729636192321777, "learning_rate": 4.960261811970317e-05, "epoch": 0.8304, "step": 2595 }, { "loss": 0.1025, "grad_norm": 0.6256001591682434, "learning_rate": 4.9549716864643665e-05, "epoch": 0.832, "step": 2600 }, { "eval_loss": 0.09482306987047195, "eval_f1": 0.7177655082819098, "eval_recall": 0.7489494374406941, "eval_accuracy": 0.9648252444950491, "eval_precision": 0.6890745821900723, "eval_classification_report": { "LOC": { "precision": 0.5987411487018096, "recall": 0.7000919963201472, "f1-score": 0.6454622561492791, "support": 1087 }, "ORG": { "precision": 0.5953031130529766, "recall": 0.6434474616292798, "f1-score": 0.6184397163120567, "support": 1694 }, "PER": { "precision": 0.8077129084092126, "recall": 0.8790440104925678, "f1-score": 0.8418702023726448, "support": 3431 }, "PRD": { "precision": 0.55668358714044, "recall": 0.5648068669527897, "f1-score": 0.5607158074137196, "support": 1165 }, "micro avg": { "precision": 0.6890745821900723, "recall": 0.7489494374406941, "f1-score": 0.7177655082819098, "support": 7377 }, "macro avg": { "precision": 0.6396101893261097, "recall": 0.696847583848696, "f1-score": 0.666621995561925, "support": 7377 }, "weighted avg": { "precision": 0.6885013514872211, "recall": 0.7489494374406941, "f1-score": 0.7172217611284287, "support": 7377 } }, "eval_runtime": 3.651, "eval_samples_per_second": 1121.881, "eval_steps_per_second": 8.765, "epoch": 0.832, "step": 2600 }, { "loss": 0.0974, "grad_norm": 0.38491159677505493, "learning_rate": 4.9496709739688666e-05, "epoch": 0.8336, "step": 2605 }, { "loss": 0.1029, "grad_norm": 0.9257701635360718, "learning_rate": 4.9443597031893935e-05, "epoch": 0.8352, "step": 2610 }, { "loss": 0.1013, "grad_norm": 0.8571967482566833, "learning_rate": 4.9390379028887e-05, "epoch": 0.8368, "step": 2615 }, { "loss": 0.0786, "grad_norm": 0.6227191686630249, "learning_rate": 4.933705601886562e-05, "epoch": 0.8384, "step": 2620 }, { "loss": 0.1032, "grad_norm": 0.6985015869140625, "learning_rate": 4.928362829059619e-05, "epoch": 0.84, "step": 2625 }, { "loss": 0.0992, "grad_norm": 0.4339228868484497, "learning_rate": 4.923009613341221e-05, "epoch": 0.8416, "step": 2630 }, { "loss": 0.0836, "grad_norm": 0.5130759477615356, "learning_rate": 4.917645983721272e-05, "epoch": 0.8432, "step": 2635 }, { "loss": 0.092, "grad_norm": 0.5941975116729736, "learning_rate": 4.91227196924607e-05, "epoch": 0.8448, "step": 2640 }, { "loss": 0.0732, "grad_norm": 0.8467056155204773, "learning_rate": 4.9068875990181514e-05, "epoch": 0.8464, "step": 2645 }, { "loss": 0.1048, "grad_norm": 0.47442761063575745, "learning_rate": 4.901492902196131e-05, "epoch": 0.848, "step": 2650 }, { "loss": 0.0899, "grad_norm": 0.6312716007232666, "learning_rate": 4.8960879079945534e-05, "epoch": 0.8496, "step": 2655 }, { "loss": 0.0837, "grad_norm": 0.47962021827697754, "learning_rate": 4.8906726456837206e-05, "epoch": 0.8512, "step": 2660 }, { "loss": 0.0962, "grad_norm": 0.4580094516277313, "learning_rate": 4.8852471445895444e-05, "epoch": 0.8528, "step": 2665 }, { "loss": 0.0885, "grad_norm": 0.5966576337814331, "learning_rate": 4.8798114340933825e-05, "epoch": 0.8544, "step": 2670 }, { "loss": 0.094, "grad_norm": 0.46646204590797424, "learning_rate": 4.874365543631881e-05, "epoch": 0.856, "step": 2675 }, { "loss": 0.0875, "grad_norm": 0.3875463008880615, "learning_rate": 4.868909502696817e-05, "epoch": 0.8576, "step": 2680 }, { "loss": 0.0818, "grad_norm": 0.5771622657775879, "learning_rate": 4.863443340834932e-05, "epoch": 0.8592, "step": 2685 }, { "loss": 0.0978, "grad_norm": 0.8952041864395142, "learning_rate": 4.857967087647781e-05, "epoch": 0.8608, "step": 2690 }, { "loss": 0.0771, "grad_norm": 0.7994149923324585, "learning_rate": 4.852480772791565e-05, "epoch": 0.8624, "step": 2695 }, { "loss": 0.0819, "grad_norm": 0.5862005352973938, "learning_rate": 4.846984425976975e-05, "epoch": 0.864, "step": 2700 }, { "loss": 0.079, "grad_norm": 0.5511375069618225, "learning_rate": 4.841478076969029e-05, "epoch": 0.8656, "step": 2705 }, { "loss": 0.0838, "grad_norm": 0.5442830920219421, "learning_rate": 4.835961755586909e-05, "epoch": 0.8672, "step": 2710 }, { "loss": 0.0868, "grad_norm": 0.3928110897541046, "learning_rate": 4.8304354917038044e-05, "epoch": 0.8688, "step": 2715 }, { "loss": 0.0925, "grad_norm": 0.8274597525596619, "learning_rate": 4.8248993152467434e-05, "epoch": 0.8704, "step": 2720 }, { "loss": 0.0775, "grad_norm": 0.7462033629417419, "learning_rate": 4.8193532561964415e-05, "epoch": 0.872, "step": 2725 }, { "loss": 0.0761, "grad_norm": 0.6517231464385986, "learning_rate": 4.813797344587126e-05, "epoch": 0.8736, "step": 2730 }, { "loss": 0.0969, "grad_norm": 0.5700922608375549, "learning_rate": 4.808231610506382e-05, "epoch": 0.8752, "step": 2735 }, { "loss": 0.106, "grad_norm": 0.9333721995353699, "learning_rate": 4.8026560840949905e-05, "epoch": 0.8768, "step": 2740 }, { "loss": 0.0877, "grad_norm": 0.6266512274742126, "learning_rate": 4.797070795546758e-05, "epoch": 0.8784, "step": 2745 }, { "loss": 0.1176, "grad_norm": 0.8793296217918396, "learning_rate": 4.791475775108359e-05, "epoch": 0.88, "step": 2750 }, { "loss": 0.0958, "grad_norm": 0.562859833240509, "learning_rate": 4.7858710530791696e-05, "epoch": 0.8816, "step": 2755 }, { "loss": 0.0935, "grad_norm": 0.4126324951648712, "learning_rate": 4.7802566598111054e-05, "epoch": 0.8832, "step": 2760 }, { "loss": 0.0908, "grad_norm": 0.7474780082702637, "learning_rate": 4.774632625708453e-05, "epoch": 0.8848, "step": 2765 }, { "loss": 0.0995, "grad_norm": 0.7474005222320557, "learning_rate": 4.768998981227711e-05, "epoch": 0.8864, "step": 2770 }, { "loss": 0.0857, "grad_norm": 0.49958786368370056, "learning_rate": 4.76335575687742e-05, "epoch": 0.888, "step": 2775 }, { "loss": 0.0854, "grad_norm": 0.7211307287216187, "learning_rate": 4.757702983218002e-05, "epoch": 0.8896, "step": 2780 }, { "loss": 0.0867, "grad_norm": 0.5871773362159729, "learning_rate": 4.752040690861592e-05, "epoch": 0.8912, "step": 2785 }, { "loss": 0.0837, "grad_norm": 0.6633477807044983, "learning_rate": 4.746368910471869e-05, "epoch": 0.8928, "step": 2790 }, { "loss": 0.0948, "grad_norm": 1.1398066282272339, "learning_rate": 4.740687672763899e-05, "epoch": 0.8944, "step": 2795 }, { "loss": 0.0829, "grad_norm": 0.4543677568435669, "learning_rate": 4.73499700850396e-05, "epoch": 0.896, "step": 2800 }, { "eval_loss": 0.09350425004959106, "eval_f1": 0.7209928426029286, "eval_recall": 0.7442049613664091, "eval_accuracy": 0.9651861415529757, "eval_precision": 0.6991849210392257, "eval_classification_report": { "LOC": { "precision": 0.6028309741881765, "recall": 0.6660533578656854, "f1-score": 0.6328671328671329, "support": 1087 }, "ORG": { "precision": 0.6379533678756477, "recall": 0.5814639905548996, "f1-score": 0.6084002470660901, "support": 1694 }, "PER": { "precision": 0.823878984332793, "recall": 0.8889536578257068, "f1-score": 0.855180148605075, "support": 3431 }, "PRD": { "precision": 0.5202846975088968, "recall": 0.6274678111587982, "f1-score": 0.5688715953307393, "support": 1165 }, "micro avg": { "precision": 0.6991849210392257, "recall": 0.7442049613664091, "f1-score": 0.7209928426029286, "support": 7377 }, "macro avg": { "precision": 0.6462370059763785, "recall": 0.6909847043512725, "f1-score": 0.6663297809672593, "support": 7377 }, "weighted avg": { "precision": 0.7006683939226749, "recall": 0.7442049613664091, "f1-score": 0.7205388491772879, "support": 7377 } }, "eval_runtime": 2.4003, "eval_samples_per_second": 1706.445, "eval_steps_per_second": 13.332, "epoch": 0.896, "step": 2800 }, { "loss": 0.0901, "grad_norm": 0.5064451098442078, "learning_rate": 4.729296948509381e-05, "epoch": 0.8976, "step": 2805 }, { "loss": 0.1046, "grad_norm": 0.5272597670555115, "learning_rate": 4.7235875236483696e-05, "epoch": 0.8992, "step": 2810 }, { "loss": 0.0958, "grad_norm": 0.48673927783966064, "learning_rate": 4.717868764839851e-05, "epoch": 0.9008, "step": 2815 }, { "loss": 0.0883, "grad_norm": 0.4359210729598999, "learning_rate": 4.712140703053297e-05, "epoch": 0.9024, "step": 2820 }, { "loss": 0.0782, "grad_norm": 0.43518325686454773, "learning_rate": 4.706403369308556e-05, "epoch": 0.904, "step": 2825 }, { "loss": 0.0884, "grad_norm": 0.7398062944412231, "learning_rate": 4.7006567946756936e-05, "epoch": 0.9056, "step": 2830 }, { "loss": 0.0791, "grad_norm": 0.5512197017669678, "learning_rate": 4.694901010274814e-05, "epoch": 0.9072, "step": 2835 }, { "loss": 0.0809, "grad_norm": 0.8878910541534424, "learning_rate": 4.6891360472758985e-05, "epoch": 0.9088, "step": 2840 }, { "loss": 0.0974, "grad_norm": 0.7676135897636414, "learning_rate": 4.683361936898634e-05, "epoch": 0.9104, "step": 2845 }, { "loss": 0.0895, "grad_norm": 0.5737549066543579, "learning_rate": 4.677578710412241e-05, "epoch": 0.912, "step": 2850 }, { "loss": 0.0907, "grad_norm": 0.6615152359008789, "learning_rate": 4.671786399135314e-05, "epoch": 0.9136, "step": 2855 }, { "loss": 0.1035, "grad_norm": 0.843995213508606, "learning_rate": 4.6659850344356386e-05, "epoch": 0.9152, "step": 2860 }, { "loss": 0.0783, "grad_norm": 0.7226997017860413, "learning_rate": 4.660174647730033e-05, "epoch": 0.9168, "step": 2865 }, { "loss": 0.0912, "grad_norm": 0.6751953363418579, "learning_rate": 4.6543552704841724e-05, "epoch": 0.9184, "step": 2870 }, { "loss": 0.0686, "grad_norm": 0.5173461437225342, "learning_rate": 4.6485269342124185e-05, "epoch": 0.92, "step": 2875 }, { "loss": 0.0759, "grad_norm": 0.528901994228363, "learning_rate": 4.642689670477651e-05, "epoch": 0.9216, "step": 2880 }, { "loss": 0.0891, "grad_norm": 0.4600706398487091, "learning_rate": 4.636843510891096e-05, "epoch": 0.9232, "step": 2885 }, { "loss": 0.0952, "grad_norm": 0.6179909706115723, "learning_rate": 4.6309884871121505e-05, "epoch": 0.9248, "step": 2890 }, { "loss": 0.1022, "grad_norm": 0.47484421730041504, "learning_rate": 4.6251246308482204e-05, "epoch": 0.9264, "step": 2895 }, { "loss": 0.0884, "grad_norm": 0.6625020503997803, "learning_rate": 4.6192519738545395e-05, "epoch": 0.928, "step": 2900 }, { "loss": 0.0917, "grad_norm": 0.8607995510101318, "learning_rate": 4.613370547934002e-05, "epoch": 0.9296, "step": 2905 }, { "loss": 0.0992, "grad_norm": 0.41931450366973877, "learning_rate": 4.60748038493699e-05, "epoch": 0.9312, "step": 2910 }, { "loss": 0.0739, "grad_norm": 0.5591437220573425, "learning_rate": 4.601581516761201e-05, "epoch": 0.9328, "step": 2915 }, { "loss": 0.081, "grad_norm": 0.36853671073913574, "learning_rate": 4.5956739753514736e-05, "epoch": 0.9344, "step": 2920 }, { "loss": 0.0768, "grad_norm": 0.5792878270149231, "learning_rate": 4.5897577926996154e-05, "epoch": 0.936, "step": 2925 }, { "loss": 0.1044, "grad_norm": 0.5675480365753174, "learning_rate": 4.5838330008442316e-05, "epoch": 0.9376, "step": 2930 }, { "loss": 0.084, "grad_norm": 0.5832204818725586, "learning_rate": 4.577899631870547e-05, "epoch": 0.9392, "step": 2935 }, { "loss": 0.0833, "grad_norm": 0.7235661745071411, "learning_rate": 4.5719577179102385e-05, "epoch": 0.9408, "step": 2940 }, { "loss": 0.0857, "grad_norm": 0.9319849014282227, "learning_rate": 4.566007291141255e-05, "epoch": 0.9424, "step": 2945 }, { "loss": 0.089, "grad_norm": 0.6958876848220825, "learning_rate": 4.5600483837876465e-05, "epoch": 0.944, "step": 2950 }, { "loss": 0.0952, "grad_norm": 0.727777361869812, "learning_rate": 4.5540810281193915e-05, "epoch": 0.9456, "step": 2955 }, { "loss": 0.0886, "grad_norm": 0.648483395576477, "learning_rate": 4.5481052564522146e-05, "epoch": 0.9472, "step": 2960 }, { "loss": 0.0917, "grad_norm": 1.0112786293029785, "learning_rate": 4.542121101147422e-05, "epoch": 0.9488, "step": 2965 }, { "loss": 0.0963, "grad_norm": 1.67804753780365, "learning_rate": 4.536128594611715e-05, "epoch": 0.9504, "step": 2970 }, { "loss": 0.087, "grad_norm": 0.41798049211502075, "learning_rate": 4.530127769297027e-05, "epoch": 0.952, "step": 2975 }, { "loss": 0.1174, "grad_norm": 0.5905815362930298, "learning_rate": 4.524118657700334e-05, "epoch": 0.9536, "step": 2980 }, { "loss": 0.0827, "grad_norm": 0.5243927240371704, "learning_rate": 4.518101292363491e-05, "epoch": 0.9552, "step": 2985 }, { "loss": 0.1047, "grad_norm": 0.9380344748497009, "learning_rate": 4.5120757058730506e-05, "epoch": 0.9568, "step": 2990 }, { "loss": 0.0795, "grad_norm": 0.6107513904571533, "learning_rate": 4.506041930860081e-05, "epoch": 0.9584, "step": 2995 }, { "loss": 0.092, "grad_norm": 0.5400716662406921, "learning_rate": 4.500000000000001e-05, "epoch": 0.96, "step": 3000 }, { "eval_loss": 0.09253348410129547, "eval_f1": 0.7278679937139865, "eval_recall": 0.7534228005964484, "eval_accuracy": 0.9665077364129883, "eval_precision": 0.7039898670044332, "eval_classification_report": { "LOC": { "precision": 0.6031353135313532, "recall": 0.672493100275989, "f1-score": 0.6359286646367986, "support": 1087 }, "ORG": { "precision": 0.6112558624283481, "recall": 0.692443919716647, "f1-score": 0.649321893163576, "support": 1694 }, "PER": { "precision": 0.8205882352941176, "recall": 0.8944914019236374, "f1-score": 0.8559475665876447, "support": 3431 }, "PRD": { "precision": 0.5712890625, "recall": 0.5021459227467812, "f1-score": 0.5344906349931475, "support": 1165 }, "micro avg": { "precision": 0.7039898670044332, "recall": 0.7534228005964484, "f1-score": 0.7278679937139865, "support": 7377 }, "macro avg": { "precision": 0.6515671184384547, "recall": 0.6903935861657636, "f1-score": 0.6689221898452917, "support": 7377 }, "weighted avg": { "precision": 0.701106887605913, "recall": 0.7534228005964484, "f1-score": 0.7253142790034599, "support": 7377 } }, "eval_runtime": 2.5426, "eval_samples_per_second": 1610.92, "eval_steps_per_second": 12.585, "epoch": 0.96, "step": 3000 }, { "loss": 0.084, "grad_norm": 0.5997390747070312, "learning_rate": 4.493949946012391e-05, "epoch": 0.9616, "step": 3005 }, { "loss": 0.0916, "grad_norm": 0.9988031983375549, "learning_rate": 4.4878918016608274e-05, "epoch": 0.9632, "step": 3010 }, { "loss": 0.0801, "grad_norm": 0.5336042642593384, "learning_rate": 4.481825599752694e-05, "epoch": 0.9648, "step": 3015 }, { "loss": 0.099, "grad_norm": 1.0593531131744385, "learning_rate": 4.475751373139012e-05, "epoch": 0.9664, "step": 3020 }, { "loss": 0.0705, "grad_norm": 0.69476318359375, "learning_rate": 4.469669154714262e-05, "epoch": 0.968, "step": 3025 }, { "loss": 0.0781, "grad_norm": 0.528763473033905, "learning_rate": 4.463578977416199e-05, "epoch": 0.9696, "step": 3030 }, { "loss": 0.0931, "grad_norm": 0.8860145211219788, "learning_rate": 4.4574808742256816e-05, "epoch": 0.9712, "step": 3035 }, { "loss": 0.1104, "grad_norm": 0.8176819086074829, "learning_rate": 4.45137487816649e-05, "epoch": 0.9728, "step": 3040 }, { "loss": 0.0961, "grad_norm": 0.6515324711799622, "learning_rate": 4.4452610223051465e-05, "epoch": 0.9744, "step": 3045 }, { "loss": 0.0676, "grad_norm": 0.4852517545223236, "learning_rate": 4.4391393397507396e-05, "epoch": 0.976, "step": 3050 }, { "loss": 0.0848, "grad_norm": 0.6544936299324036, "learning_rate": 4.43300986365474e-05, "epoch": 0.9776, "step": 3055 }, { "loss": 0.0826, "grad_norm": 0.6334974765777588, "learning_rate": 4.426872627210827e-05, "epoch": 0.9792, "step": 3060 }, { "loss": 0.0891, "grad_norm": 0.7229584455490112, "learning_rate": 4.4207276636547e-05, "epoch": 0.9808, "step": 3065 }, { "loss": 0.1213, "grad_norm": 0.6871221661567688, "learning_rate": 4.414575006263912e-05, "epoch": 0.9824, "step": 3070 }, { "loss": 0.0867, "grad_norm": 0.4800964295864105, "learning_rate": 4.4084146883576726e-05, "epoch": 0.984, "step": 3075 }, { "loss": 0.081, "grad_norm": 0.7497280836105347, "learning_rate": 4.402246743296683e-05, "epoch": 0.9856, "step": 3080 }, { "loss": 0.0876, "grad_norm": 0.6592546105384827, "learning_rate": 4.396071204482945e-05, "epoch": 0.9872, "step": 3085 }, { "loss": 0.0811, "grad_norm": 0.5773298144340515, "learning_rate": 4.389888105359586e-05, "epoch": 0.9888, "step": 3090 }, { "loss": 0.0933, "grad_norm": 0.7535294890403748, "learning_rate": 4.3836974794106727e-05, "epoch": 0.9904, "step": 3095 }, { "loss": 0.0935, "grad_norm": 0.5805786848068237, "learning_rate": 4.3774993601610356e-05, "epoch": 0.992, "step": 3100 }, { "loss": 0.0894, "grad_norm": 0.5155940651893616, "learning_rate": 4.3712937811760836e-05, "epoch": 0.9936, "step": 3105 }, { "loss": 0.1202, "grad_norm": 0.547626793384552, "learning_rate": 4.365080776061622e-05, "epoch": 0.9952, "step": 3110 }, { "loss": 0.0873, "grad_norm": 0.6334134340286255, "learning_rate": 4.358860378463673e-05, "epoch": 0.9968, "step": 3115 }, { "loss": 0.076, "grad_norm": 0.5997979640960693, "learning_rate": 4.352632622068293e-05, "epoch": 0.9984, "step": 3120 }, { "loss": 0.0963, "grad_norm": 1.8701601028442383, "learning_rate": 4.3463975406013874e-05, "epoch": 1.0, "step": 3125 }, { "loss": 0.07, "grad_norm": 0.5357418656349182, "learning_rate": 4.340155167828531e-05, "epoch": 1.0016, "step": 3130 }, { "loss": 0.0504, "grad_norm": 0.3770025968551636, "learning_rate": 4.333905537554783e-05, "epoch": 1.0032, "step": 3135 }, { "loss": 0.0505, "grad_norm": 0.3558103144168854, "learning_rate": 4.3276486836245074e-05, "epoch": 1.0048, "step": 3140 }, { "loss": 0.0571, "grad_norm": 0.413334459066391, "learning_rate": 4.3213846399211855e-05, "epoch": 1.0064, "step": 3145 }, { "loss": 0.0645, "grad_norm": 0.5588265061378479, "learning_rate": 4.315113440367233e-05, "epoch": 1.008, "step": 3150 }, { "loss": 0.0703, "grad_norm": 0.8101525902748108, "learning_rate": 4.3088351189238204e-05, "epoch": 1.0096, "step": 3155 }, { "loss": 0.0628, "grad_norm": 0.6702431440353394, "learning_rate": 4.3025497095906845e-05, "epoch": 1.0112, "step": 3160 }, { "loss": 0.057, "grad_norm": 0.587557852268219, "learning_rate": 4.296257246405948e-05, "epoch": 1.0128, "step": 3165 }, { "loss": 0.0637, "grad_norm": 0.5873764157295227, "learning_rate": 4.289957763445929e-05, "epoch": 1.0144, "step": 3170 }, { "loss": 0.0697, "grad_norm": 0.4973818361759186, "learning_rate": 4.283651294824963e-05, "epoch": 1.016, "step": 3175 }, { "loss": 0.0709, "grad_norm": 0.44891709089279175, "learning_rate": 4.277337874695219e-05, "epoch": 1.0176, "step": 3180 }, { "loss": 0.0586, "grad_norm": 0.5509355068206787, "learning_rate": 4.271017537246505e-05, "epoch": 1.0192, "step": 3185 }, { "loss": 0.0649, "grad_norm": 0.43009933829307556, "learning_rate": 4.264690316706095e-05, "epoch": 1.0208, "step": 3190 }, { "loss": 0.0563, "grad_norm": 0.540193498134613, "learning_rate": 4.258356247338532e-05, "epoch": 1.0224, "step": 3195 }, { "loss": 0.0649, "grad_norm": 0.4838225841522217, "learning_rate": 4.252015363445454e-05, "epoch": 1.024, "step": 3200 }, { "eval_loss": 0.09394045919179916, "eval_f1": 0.7384374183433497, "eval_recall": 0.7661651077673851, "eval_accuracy": 0.9670821218713783, "eval_precision": 0.7126465767242466, "eval_classification_report": { "LOC": { "precision": 0.6444444444444445, "recall": 0.6936522539098436, "f1-score": 0.6681435533894551, "support": 1087 }, "ORG": { "precision": 0.6631773399014779, "recall": 0.6357733175914995, "f1-score": 0.6491862567811935, "support": 1694 }, "PER": { "precision": 0.8137100994243851, "recall": 0.9064412707665404, "f1-score": 0.8575761753757065, "support": 3431 }, "PRD": { "precision": 0.5406844106463878, "recall": 0.6103004291845494, "f1-score": 0.5733870967741935, "support": 1165 }, "micro avg": { "precision": 0.7126465767242466, "recall": 0.7661651077673851, "f1-score": 0.7384374183433497, "support": 7377 }, "macro avg": { "precision": 0.6655040736041737, "recall": 0.7115418178631082, "f1-score": 0.6870732705801371, "support": 7377 }, "weighted avg": { "precision": 0.7110844807418084, "recall": 0.7661651077673851, "f1-score": 0.7369301053243411, "support": 7377 } }, "eval_runtime": 2.6071, "eval_samples_per_second": 1571.099, "eval_steps_per_second": 12.274, "epoch": 1.024, "step": 3200 }, { "loss": 0.0591, "grad_norm": 0.6289095878601074, "learning_rate": 4.2456676993654e-05, "epoch": 1.0256, "step": 3205 }, { "loss": 0.0632, "grad_norm": 0.8218206167221069, "learning_rate": 4.2393132894736264e-05, "epoch": 1.0272, "step": 3210 }, { "loss": 0.0807, "grad_norm": 0.532882571220398, "learning_rate": 4.23295216818192e-05, "epoch": 1.0288, "step": 3215 }, { "loss": 0.067, "grad_norm": 0.6556366086006165, "learning_rate": 4.226584369938414e-05, "epoch": 1.0304, "step": 3220 }, { "loss": 0.0701, "grad_norm": 0.7512959837913513, "learning_rate": 4.2202099292274015e-05, "epoch": 1.032, "step": 3225 }, { "loss": 0.0575, "grad_norm": 0.3516683578491211, "learning_rate": 4.213828880569144e-05, "epoch": 1.0336, "step": 3230 }, { "loss": 0.0671, "grad_norm": 0.44332972168922424, "learning_rate": 4.2074412585196894e-05, "epoch": 1.0352, "step": 3235 }, { "loss": 0.0712, "grad_norm": 0.886711597442627, "learning_rate": 4.2010470976706866e-05, "epoch": 1.0368, "step": 3240 }, { "loss": 0.0678, "grad_norm": 0.41689151525497437, "learning_rate": 4.194646432649186e-05, "epoch": 1.0384, "step": 3245 }, { "loss": 0.0746, "grad_norm": 0.3851936161518097, "learning_rate": 4.188239298117471e-05, "epoch": 1.04, "step": 3250 }, { "loss": 0.0616, "grad_norm": 0.5334251523017883, "learning_rate": 4.181825728772854e-05, "epoch": 1.0416, "step": 3255 }, { "loss": 0.0685, "grad_norm": 0.5539014935493469, "learning_rate": 4.175405759347496e-05, "epoch": 1.0432, "step": 3260 }, { "loss": 0.0607, "grad_norm": 0.4429740011692047, "learning_rate": 4.168979424608217e-05, "epoch": 1.0448, "step": 3265 }, { "loss": 0.0654, "grad_norm": 0.41293808817863464, "learning_rate": 4.1625467593563094e-05, "epoch": 1.0464, "step": 3270 }, { "loss": 0.0683, "grad_norm": 0.5350791215896606, "learning_rate": 4.1561077984273455e-05, "epoch": 1.048, "step": 3275 }, { "loss": 0.0551, "grad_norm": 0.4682389795780182, "learning_rate": 4.1496625766909926e-05, "epoch": 1.0496, "step": 3280 }, { "loss": 0.0542, "grad_norm": 0.596297025680542, "learning_rate": 4.1432111290508234e-05, "epoch": 1.0512, "step": 3285 }, { "loss": 0.0473, "grad_norm": 0.41230592131614685, "learning_rate": 4.136753490444125e-05, "epoch": 1.0528, "step": 3290 }, { "loss": 0.0554, "grad_norm": 0.669712483882904, "learning_rate": 4.130289695841712e-05, "epoch": 1.0544, "step": 3295 }, { "loss": 0.0523, "grad_norm": 0.3948027789592743, "learning_rate": 4.1238197802477374e-05, "epoch": 1.056, "step": 3300 }, { "loss": 0.0829, "grad_norm": 0.8203285336494446, "learning_rate": 4.117343778699499e-05, "epoch": 1.0576, "step": 3305 }, { "loss": 0.0731, "grad_norm": 0.5141724944114685, "learning_rate": 4.110861726267256e-05, "epoch": 1.0592, "step": 3310 }, { "loss": 0.0634, "grad_norm": 0.3414155840873718, "learning_rate": 4.104373658054035e-05, "epoch": 1.0608, "step": 3315 }, { "loss": 0.0694, "grad_norm": 0.6490858793258667, "learning_rate": 4.097879609195437e-05, "epoch": 1.0624, "step": 3320 }, { "loss": 0.0503, "grad_norm": 0.3713358938694, "learning_rate": 4.091379614859455e-05, "epoch": 1.064, "step": 3325 }, { "loss": 0.0453, "grad_norm": 0.41878265142440796, "learning_rate": 4.084873710246277e-05, "epoch": 1.0656, "step": 3330 }, { "loss": 0.0729, "grad_norm": 0.3237791657447815, "learning_rate": 4.0783619305881004e-05, "epoch": 1.0672, "step": 3335 }, { "loss": 0.0608, "grad_norm": 0.42332518100738525, "learning_rate": 4.0718443111489327e-05, "epoch": 1.0688, "step": 3340 }, { "loss": 0.0617, "grad_norm": 0.6328282952308655, "learning_rate": 4.065320887224412e-05, "epoch": 1.0704, "step": 3345 }, { "loss": 0.0636, "grad_norm": 0.6683381199836731, "learning_rate": 4.058791694141607e-05, "epoch": 1.072, "step": 3350 }, { "loss": 0.0554, "grad_norm": 0.7020971179008484, "learning_rate": 4.0522567672588315e-05, "epoch": 1.0735999999999999, "step": 3355 }, { "loss": 0.0568, "grad_norm": 0.4012830853462219, "learning_rate": 4.045716141965446e-05, "epoch": 1.0752, "step": 3360 }, { "loss": 0.0656, "grad_norm": 0.5052561163902283, "learning_rate": 4.039169853681675e-05, "epoch": 1.0768, "step": 3365 }, { "loss": 0.0489, "grad_norm": 0.21015505492687225, "learning_rate": 4.032617937858407e-05, "epoch": 1.0784, "step": 3370 }, { "loss": 0.0601, "grad_norm": 0.9062200784683228, "learning_rate": 4.026060429977007e-05, "epoch": 1.08, "step": 3375 }, { "loss": 0.0536, "grad_norm": 0.2977677285671234, "learning_rate": 4.019497365549125e-05, "epoch": 1.0816, "step": 3380 }, { "loss": 0.0506, "grad_norm": 0.5506483912467957, "learning_rate": 4.0129287801164975e-05, "epoch": 1.0832, "step": 3385 }, { "loss": 0.0553, "grad_norm": 0.7066329121589661, "learning_rate": 4.006354709250766e-05, "epoch": 1.0848, "step": 3390 }, { "loss": 0.0626, "grad_norm": 0.7892416715621948, "learning_rate": 3.9997751885532734e-05, "epoch": 1.0864, "step": 3395 }, { "loss": 0.063, "grad_norm": 0.5779737830162048, "learning_rate": 3.9931902536548746e-05, "epoch": 1.088, "step": 3400 }, { "eval_loss": 0.09469541907310486, "eval_f1": 0.7393389841645313, "eval_recall": 0.7626406398264878, "eval_accuracy": 0.9666449789561434, "eval_precision": 0.7174190257587351, "eval_classification_report": { "LOC": { "precision": 0.6206322795341098, "recall": 0.6862925482980681, "f1-score": 0.6518130187854958, "support": 1087 }, "ORG": { "precision": 0.6391018619934282, "recall": 0.6889020070838253, "f1-score": 0.6630681818181818, "support": 1694 }, "PER": { "precision": 0.830306316074817, "recall": 0.8927426406295541, "f1-score": 0.8603932584269661, "support": 3431 }, "PRD": { "precision": 0.5777777777777777, "recall": 0.5579399141630901, "f1-score": 0.5676855895196506, "support": 1165 }, "micro avg": { "precision": 0.7174190257587351, "recall": 0.7626406398264878, "f1-score": 0.7393389841645313, "support": 7377 }, "macro avg": { "precision": 0.6669545588450332, "recall": 0.7064692775436343, "f1-score": 0.6857400121375736, "support": 7377 }, "weighted avg": { "precision": 0.7156239560301279, "recall": 0.7626406398264878, "f1-score": 0.7381213545985018, "support": 7377 } }, "eval_runtime": 2.463, "eval_samples_per_second": 1662.995, "eval_steps_per_second": 12.992, "epoch": 1.088, "step": 3400 }, { "loss": 0.058, "grad_norm": 0.4889095425605774, "learning_rate": 3.9865999402157506e-05, "epoch": 1.0896, "step": 3405 }, { "loss": 0.0494, "grad_norm": 0.38131508231163025, "learning_rate": 3.9800042839252035e-05, "epoch": 1.0912, "step": 3410 }, { "loss": 0.0842, "grad_norm": 0.4849714934825897, "learning_rate": 3.9734033205014716e-05, "epoch": 1.0928, "step": 3415 }, { "loss": 0.0598, "grad_norm": 0.4843025207519531, "learning_rate": 3.966797085691534e-05, "epoch": 1.0944, "step": 3420 }, { "loss": 0.0565, "grad_norm": 0.46093568205833435, "learning_rate": 3.960185615270915e-05, "epoch": 1.096, "step": 3425 }, { "loss": 0.0628, "grad_norm": 0.6947360038757324, "learning_rate": 3.953568945043494e-05, "epoch": 1.0976, "step": 3430 }, { "loss": 0.0626, "grad_norm": 0.7914568185806274, "learning_rate": 3.946947110841308e-05, "epoch": 1.0992, "step": 3435 }, { "loss": 0.0583, "grad_norm": 0.4247816205024719, "learning_rate": 3.940320148524359e-05, "epoch": 1.1008, "step": 3440 }, { "loss": 0.0604, "grad_norm": 0.7833236455917358, "learning_rate": 3.933688093980421e-05, "epoch": 1.1024, "step": 3445 }, { "loss": 0.0508, "grad_norm": 0.5074185729026794, "learning_rate": 3.927050983124843e-05, "epoch": 1.104, "step": 3450 }, { "loss": 0.0753, "grad_norm": 0.9859195947647095, "learning_rate": 3.9204088519003575e-05, "epoch": 1.1056, "step": 3455 }, { "loss": 0.0616, "grad_norm": 0.32169005274772644, "learning_rate": 3.913761736276884e-05, "epoch": 1.1072, "step": 3460 }, { "loss": 0.0533, "grad_norm": 0.5442489981651306, "learning_rate": 3.9071096722513346e-05, "epoch": 1.1088, "step": 3465 }, { "loss": 0.0612, "grad_norm": 0.7056108117103577, "learning_rate": 3.900452695847418e-05, "epoch": 1.1104, "step": 3470 }, { "loss": 0.0516, "grad_norm": 0.6202899217605591, "learning_rate": 3.893790843115446e-05, "epoch": 1.112, "step": 3475 }, { "loss": 0.0726, "grad_norm": 0.4670408368110657, "learning_rate": 3.887124150132141e-05, "epoch": 1.1136, "step": 3480 }, { "loss": 0.0634, "grad_norm": 0.4039972126483917, "learning_rate": 3.880452653000432e-05, "epoch": 1.1152, "step": 3485 }, { "loss": 0.052, "grad_norm": 0.35446104407310486, "learning_rate": 3.873776387849267e-05, "epoch": 1.1168, "step": 3490 }, { "loss": 0.0615, "grad_norm": 0.629490852355957, "learning_rate": 3.867095390833415e-05, "epoch": 1.1184, "step": 3495 }, { "loss": 0.0765, "grad_norm": 1.0972533226013184, "learning_rate": 3.860409698133271e-05, "epoch": 1.12, "step": 3500 }, { "loss": 0.0603, "grad_norm": 0.7486947178840637, "learning_rate": 3.8537193459546574e-05, "epoch": 1.1216, "step": 3505 }, { "loss": 0.0695, "grad_norm": 0.3977409899234772, "learning_rate": 3.84702437052863e-05, "epoch": 1.1232, "step": 3510 }, { "loss": 0.0632, "grad_norm": 0.5205050110816956, "learning_rate": 3.840324808111282e-05, "epoch": 1.1248, "step": 3515 }, { "loss": 0.0545, "grad_norm": 0.580403745174408, "learning_rate": 3.833620694983545e-05, "epoch": 1.1264, "step": 3520 }, { "loss": 0.0606, "grad_norm": 0.4265666604042053, "learning_rate": 3.826912067450998e-05, "epoch": 1.1280000000000001, "step": 3525 }, { "loss": 0.0466, "grad_norm": 0.5802576541900635, "learning_rate": 3.820198961843665e-05, "epoch": 1.1296, "step": 3530 }, { "loss": 0.0564, "grad_norm": 0.6011948585510254, "learning_rate": 3.813481414515822e-05, "epoch": 1.1312, "step": 3535 }, { "loss": 0.0707, "grad_norm": 1.2971011400222778, "learning_rate": 3.8067594618457974e-05, "epoch": 1.1328, "step": 3540 }, { "loss": 0.0496, "grad_norm": 0.35327309370040894, "learning_rate": 3.8000331402357774e-05, "epoch": 1.1344, "step": 3545 }, { "loss": 0.0575, "grad_norm": 0.889660120010376, "learning_rate": 3.793302486111608e-05, "epoch": 1.1360000000000001, "step": 3550 }, { "loss": 0.0657, "grad_norm": 0.5159348845481873, "learning_rate": 3.7865675359225945e-05, "epoch": 1.1376, "step": 3555 }, { "loss": 0.0638, "grad_norm": 0.40126362442970276, "learning_rate": 3.779828326141313e-05, "epoch": 1.1392, "step": 3560 }, { "loss": 0.0511, "grad_norm": 0.3955281674861908, "learning_rate": 3.773084893263402e-05, "epoch": 1.1408, "step": 3565 }, { "loss": 0.0573, "grad_norm": 0.5016000270843506, "learning_rate": 3.766337273807372e-05, "epoch": 1.1424, "step": 3570 }, { "loss": 0.0587, "grad_norm": 0.7836259007453918, "learning_rate": 3.7595855043144064e-05, "epoch": 1.144, "step": 3575 }, { "loss": 0.1032, "grad_norm": 0.5860452055931091, "learning_rate": 3.752829621348158e-05, "epoch": 1.1456, "step": 3580 }, { "loss": 0.0689, "grad_norm": 0.7412546277046204, "learning_rate": 3.7460696614945656e-05, "epoch": 1.1472, "step": 3585 }, { "loss": 0.0575, "grad_norm": 0.5969299077987671, "learning_rate": 3.739305661361634e-05, "epoch": 1.1488, "step": 3590 }, { "loss": 0.058, "grad_norm": 0.46106481552124023, "learning_rate": 3.732537657579257e-05, "epoch": 1.1504, "step": 3595 }, { "loss": 0.0648, "grad_norm": 0.5862342119216919, "learning_rate": 3.7257656867990034e-05, "epoch": 1.152, "step": 3600 }, { "eval_loss": 0.09219186007976532, "eval_f1": 0.740968184490756, "eval_recall": 0.7687406804934255, "eval_accuracy": 0.9671177032714556, "eval_precision": 0.7151324085750316, "eval_classification_report": { "LOC": { "precision": 0.6577815993121238, "recall": 0.703771849126035, "f1-score": 0.6799999999999999, "support": 1087 }, "ORG": { "precision": 0.6240875912408759, "recall": 0.7066115702479339, "f1-score": 0.6627906976744186, "support": 1694 }, "PER": { "precision": 0.8296357162421254, "recall": 0.8828329932964151, "f1-score": 0.8554080768144592, "support": 3431 }, "PRD": { "precision": 0.5676126878130217, "recall": 0.5836909871244635, "f1-score": 0.5755395683453237, "support": 1165 }, "micro avg": { "precision": 0.7151324085750316, "recall": 0.7687406804934255, "f1-score": 0.740968184490756, "support": 7377 }, "macro avg": { "precision": 0.6697793986520366, "recall": 0.7192268499487118, "f1-score": 0.6934345857085504, "support": 7377 }, "weighted avg": { "precision": 0.7157329404559067, "recall": 0.7687406804934255, "f1-score": 0.7411327301793652, "support": 7377 } }, "eval_runtime": 3.7833, "eval_samples_per_second": 1082.641, "eval_steps_per_second": 8.458, "epoch": 1.152, "step": 3600 }, { "loss": 0.0528, "grad_norm": 1.2004528045654297, "learning_rate": 3.7189897856939316e-05, "epoch": 1.1536, "step": 3605 }, { "loss": 0.0495, "grad_norm": 0.4554869830608368, "learning_rate": 3.7122099909583795e-05, "epoch": 1.1552, "step": 3610 }, { "loss": 0.0526, "grad_norm": 0.7351579070091248, "learning_rate": 3.70542633930777e-05, "epoch": 1.1568, "step": 3615 }, { "loss": 0.0561, "grad_norm": 0.4895160496234894, "learning_rate": 3.698638867478418e-05, "epoch": 1.1584, "step": 3620 }, { "loss": 0.0611, "grad_norm": 0.5927324891090393, "learning_rate": 3.691847612227321e-05, "epoch": 1.16, "step": 3625 }, { "loss": 0.0626, "grad_norm": 0.6944194436073303, "learning_rate": 3.6850526103319676e-05, "epoch": 1.1616, "step": 3630 }, { "loss": 0.0565, "grad_norm": 0.7273026704788208, "learning_rate": 3.678253898590136e-05, "epoch": 1.1632, "step": 3635 }, { "loss": 0.0573, "grad_norm": 0.9152206778526306, "learning_rate": 3.671451513819695e-05, "epoch": 1.1648, "step": 3640 }, { "loss": 0.0563, "grad_norm": 0.6791650056838989, "learning_rate": 3.664645492858403e-05, "epoch": 1.1663999999999999, "step": 3645 }, { "loss": 0.0588, "grad_norm": 0.5220665335655212, "learning_rate": 3.65783587256371e-05, "epoch": 1.168, "step": 3650 }, { "loss": 0.0681, "grad_norm": 0.6691823601722717, "learning_rate": 3.6510226898125615e-05, "epoch": 1.1696, "step": 3655 }, { "loss": 0.0513, "grad_norm": 0.5333041548728943, "learning_rate": 3.6442059815011906e-05, "epoch": 1.1712, "step": 3660 }, { "loss": 0.0642, "grad_norm": 0.7830275893211365, "learning_rate": 3.6373857845449236e-05, "epoch": 1.1728, "step": 3665 }, { "loss": 0.0614, "grad_norm": 1.0705488920211792, "learning_rate": 3.630562135877982e-05, "epoch": 1.1743999999999999, "step": 3670 }, { "loss": 0.0598, "grad_norm": 0.4356139898300171, "learning_rate": 3.623735072453278e-05, "epoch": 1.176, "step": 3675 }, { "loss": 0.0727, "grad_norm": 0.7246953845024109, "learning_rate": 3.6169046312422165e-05, "epoch": 1.1776, "step": 3680 }, { "loss": 0.0611, "grad_norm": 0.5549226999282837, "learning_rate": 3.610070849234492e-05, "epoch": 1.1792, "step": 3685 }, { "loss": 0.0599, "grad_norm": 0.7923194766044617, "learning_rate": 3.603233763437895e-05, "epoch": 1.1808, "step": 3690 }, { "loss": 0.0552, "grad_norm": 0.4860561788082123, "learning_rate": 3.596393410878104e-05, "epoch": 1.1824, "step": 3695 }, { "loss": 0.051, "grad_norm": 0.6419265270233154, "learning_rate": 3.589549828598491e-05, "epoch": 1.184, "step": 3700 }, { "loss": 0.0585, "grad_norm": 0.818865954875946, "learning_rate": 3.582703053659916e-05, "epoch": 1.1856, "step": 3705 }, { "loss": 0.0619, "grad_norm": 0.8771647810935974, "learning_rate": 3.5758531231405306e-05, "epoch": 1.1872, "step": 3710 }, { "loss": 0.0474, "grad_norm": 0.5207450985908508, "learning_rate": 3.569000074135575e-05, "epoch": 1.1888, "step": 3715 }, { "loss": 0.0558, "grad_norm": 1.0908665657043457, "learning_rate": 3.562143943757175e-05, "epoch": 1.1904, "step": 3720 }, { "loss": 0.0566, "grad_norm": 0.42771023511886597, "learning_rate": 3.555284769134144e-05, "epoch": 1.192, "step": 3725 }, { "loss": 0.0574, "grad_norm": 0.6894050240516663, "learning_rate": 3.548422587411785e-05, "epoch": 1.1936, "step": 3730 }, { "loss": 0.0459, "grad_norm": 0.4959530830383301, "learning_rate": 3.541557435751681e-05, "epoch": 1.1952, "step": 3735 }, { "loss": 0.0733, "grad_norm": 1.0264801979064941, "learning_rate": 3.5346893513315e-05, "epoch": 1.1968, "step": 3740 }, { "loss": 0.0688, "grad_norm": 0.8723161220550537, "learning_rate": 3.5278183713447934e-05, "epoch": 1.1984, "step": 3745 }, { "loss": 0.07, "grad_norm": 0.6290266513824463, "learning_rate": 3.520944533000792e-05, "epoch": 1.2, "step": 3750 }, { "loss": 0.0636, "grad_norm": 1.2199335098266602, "learning_rate": 3.514067873524206e-05, "epoch": 1.2016, "step": 3755 }, { "loss": 0.0656, "grad_norm": 0.6711154580116272, "learning_rate": 3.507188430155022e-05, "epoch": 1.2032, "step": 3760 }, { "loss": 0.062, "grad_norm": 0.5187415480613708, "learning_rate": 3.500306240148307e-05, "epoch": 1.2048, "step": 3765 }, { "loss": 0.059, "grad_norm": 0.4930712580680847, "learning_rate": 3.493421340773997e-05, "epoch": 1.2064, "step": 3770 }, { "loss": 0.0475, "grad_norm": 0.8040748238563538, "learning_rate": 3.4865337693167035e-05, "epoch": 1.208, "step": 3775 }, { "loss": 0.0645, "grad_norm": 0.6265009641647339, "learning_rate": 3.479643563075505e-05, "epoch": 1.2096, "step": 3780 }, { "loss": 0.0537, "grad_norm": 0.43460115790367126, "learning_rate": 3.4727507593637534e-05, "epoch": 1.2112, "step": 3785 }, { "loss": 0.055, "grad_norm": 0.38511723279953003, "learning_rate": 3.4658553955088634e-05, "epoch": 1.2128, "step": 3790 }, { "loss": 0.061, "grad_norm": 0.4122491776943207, "learning_rate": 3.4589575088521147e-05, "epoch": 1.2144, "step": 3795 }, { "loss": 0.0547, "grad_norm": 0.8003705739974976, "learning_rate": 3.4520571367484496e-05, "epoch": 1.216, "step": 3800 }, { "eval_loss": 0.0925210639834404, "eval_f1": 0.7470116905293577, "eval_recall": 0.7709095838416701, "eval_accuracy": 0.9675446800723827, "eval_precision": 0.7245508982035929, "eval_classification_report": { "LOC": { "precision": 0.6577586206896552, "recall": 0.7019319227230911, "f1-score": 0.6791277258566978, "support": 1087 }, "ORG": { "precision": 0.6244791666666667, "recall": 0.7077922077922078, "f1-score": 0.6635307138904261, "support": 1694 }, "PER": { "precision": 0.8395573997233748, "recall": 0.8845817545904984, "f1-score": 0.8614816917399943, "support": 3431 }, "PRD": { "precision": 0.5979202772963604, "recall": 0.592274678111588, "f1-score": 0.5950840879689522, "support": 1165 }, "micro avg": { "precision": 0.7245508982035929, "recall": 0.7709095838416701, "f1-score": 0.7470116905293577, "support": 7377 }, "macro avg": { "precision": 0.6799288660940143, "recall": 0.7216451408043463, "f1-score": 0.6998060548640175, "support": 7377 }, "weighted avg": { "precision": 0.7252202644061471, "recall": 0.7709095838416701, "f1-score": 0.7470854702698064, "support": 7377 } }, "eval_runtime": 3.1334, "eval_samples_per_second": 1307.214, "eval_steps_per_second": 10.213, "epoch": 1.216, "step": 3800 }, { "loss": 0.0644, "grad_norm": 0.6768618226051331, "learning_rate": 3.44515431656627e-05, "epoch": 1.2176, "step": 3805 }, { "loss": 0.0664, "grad_norm": 0.5372012257575989, "learning_rate": 3.438249085687236e-05, "epoch": 1.2192, "step": 3810 }, { "loss": 0.0638, "grad_norm": 0.5765817165374756, "learning_rate": 3.431341481506059e-05, "epoch": 1.2208, "step": 3815 }, { "loss": 0.0675, "grad_norm": 1.0842068195343018, "learning_rate": 3.4244315414303074e-05, "epoch": 1.2224, "step": 3820 }, { "loss": 0.0485, "grad_norm": 0.9630246758460999, "learning_rate": 3.4175193028801965e-05, "epoch": 1.224, "step": 3825 }, { "loss": 0.0656, "grad_norm": 0.5278171896934509, "learning_rate": 3.410604803288391e-05, "epoch": 1.2256, "step": 3830 }, { "loss": 0.06, "grad_norm": 0.8352569937705994, "learning_rate": 3.403688080099799e-05, "epoch": 1.2272, "step": 3835 }, { "loss": 0.0623, "grad_norm": 0.9273226857185364, "learning_rate": 3.396769170771368e-05, "epoch": 1.2288000000000001, "step": 3840 }, { "loss": 0.07, "grad_norm": 0.5983508825302124, "learning_rate": 3.389848112771889e-05, "epoch": 1.2304, "step": 3845 }, { "loss": 0.06, "grad_norm": 0.8319526314735413, "learning_rate": 3.382924943581785e-05, "epoch": 1.232, "step": 3850 }, { "loss": 0.052, "grad_norm": 0.29965996742248535, "learning_rate": 3.375999700692913e-05, "epoch": 1.2336, "step": 3855 }, { "loss": 0.0643, "grad_norm": 0.3567400276660919, "learning_rate": 3.3690724216083626e-05, "epoch": 1.2352, "step": 3860 }, { "loss": 0.0677, "grad_norm": 0.7236108779907227, "learning_rate": 3.3621431438422466e-05, "epoch": 1.2368000000000001, "step": 3865 }, { "loss": 0.0503, "grad_norm": 0.7198706269264221, "learning_rate": 3.355211904919504e-05, "epoch": 1.2384, "step": 3870 }, { "loss": 0.0525, "grad_norm": 0.45932328701019287, "learning_rate": 3.348278742375691e-05, "epoch": 1.24, "step": 3875 }, { "loss": 0.0577, "grad_norm": 0.8261347413063049, "learning_rate": 3.341343693756785e-05, "epoch": 1.2416, "step": 3880 }, { "loss": 0.0543, "grad_norm": 0.621379017829895, "learning_rate": 3.334406796618976e-05, "epoch": 1.2432, "step": 3885 }, { "loss": 0.0625, "grad_norm": 0.6212461590766907, "learning_rate": 3.3274680885284644e-05, "epoch": 1.2448, "step": 3890 }, { "loss": 0.0671, "grad_norm": 1.1812677383422852, "learning_rate": 3.320527607061256e-05, "epoch": 1.2464, "step": 3895 }, { "loss": 0.0699, "grad_norm": 0.5274093151092529, "learning_rate": 3.313585389802961e-05, "epoch": 1.248, "step": 3900 }, { "loss": 0.0786, "grad_norm": 0.6966798305511475, "learning_rate": 3.306641474348592e-05, "epoch": 1.2496, "step": 3905 }, { "loss": 0.0648, "grad_norm": 0.5683120489120483, "learning_rate": 3.2996958983023546e-05, "epoch": 1.2511999999999999, "step": 3910 }, { "loss": 0.0646, "grad_norm": 1.0045043230056763, "learning_rate": 3.292748699277449e-05, "epoch": 1.2528000000000001, "step": 3915 }, { "loss": 0.0493, "grad_norm": 0.6333140134811401, "learning_rate": 3.285799914895865e-05, "epoch": 1.2544, "step": 3920 }, { "loss": 0.0557, "grad_norm": 0.43484431505203247, "learning_rate": 3.2788495827881775e-05, "epoch": 1.256, "step": 3925 }, { "loss": 0.0565, "grad_norm": 0.45735877752304077, "learning_rate": 3.271897740593341e-05, "epoch": 1.2576, "step": 3930 }, { "loss": 0.0573, "grad_norm": 0.6612702012062073, "learning_rate": 3.264944425958491e-05, "epoch": 1.2591999999999999, "step": 3935 }, { "loss": 0.0474, "grad_norm": 0.698167085647583, "learning_rate": 3.257989676538733e-05, "epoch": 1.2608, "step": 3940 }, { "loss": 0.0501, "grad_norm": 0.588047444820404, "learning_rate": 3.2510335299969476e-05, "epoch": 1.2624, "step": 3945 }, { "loss": 0.0727, "grad_norm": 0.593565821647644, "learning_rate": 3.2440760240035754e-05, "epoch": 1.264, "step": 3950 }, { "loss": 0.0725, "grad_norm": 0.3422766923904419, "learning_rate": 3.237117196236424e-05, "epoch": 1.2656, "step": 3955 }, { "loss": 0.0744, "grad_norm": 1.4410278797149658, "learning_rate": 3.2301570843804566e-05, "epoch": 1.2671999999999999, "step": 3960 }, { "loss": 0.0584, "grad_norm": 1.2807468175888062, "learning_rate": 3.223195726127591e-05, "epoch": 1.2688, "step": 3965 }, { "loss": 0.0567, "grad_norm": 0.6221219897270203, "learning_rate": 3.2162331591764925e-05, "epoch": 1.2704, "step": 3970 }, { "loss": 0.0491, "grad_norm": 0.41640806198120117, "learning_rate": 3.209269421232376e-05, "epoch": 1.272, "step": 3975 }, { "loss": 0.0631, "grad_norm": 0.40749090909957886, "learning_rate": 3.202304550006796e-05, "epoch": 1.2736, "step": 3980 }, { "loss": 0.0709, "grad_norm": 0.595072329044342, "learning_rate": 3.195338583217443e-05, "epoch": 1.2752, "step": 3985 }, { "loss": 0.0638, "grad_norm": 0.5588378310203552, "learning_rate": 3.188371558587941e-05, "epoch": 1.2768, "step": 3990 }, { "loss": 0.0603, "grad_norm": 0.8484724760055542, "learning_rate": 3.1814035138476436e-05, "epoch": 1.2784, "step": 3995 }, { "loss": 0.0444, "grad_norm": 0.6286981701850891, "learning_rate": 3.174434486731428e-05, "epoch": 1.28, "step": 4000 }, { "eval_loss": 0.09196597337722778, "eval_f1": 0.7458823529411764, "eval_recall": 0.7734851565677104, "eval_accuracy": 0.9673006933289958, "eval_precision": 0.7201817493373722, "eval_classification_report": { "LOC": { "precision": 0.6460251046025105, "recall": 0.7102115915363385, "f1-score": 0.6765994741454865, "support": 1087 }, "ORG": { "precision": 0.6503210741389376, "recall": 0.6576151121605667, "f1-score": 0.6539477546228353, "support": 1694 }, "PER": { "precision": 0.8288770053475936, "recall": 0.9035266686097347, "f1-score": 0.8645935016036814, "support": 3431 }, "PRD": { "precision": 0.5647058823529412, "recall": 0.6180257510729614, "f1-score": 0.5901639344262295, "support": 1165 }, "micro avg": { "precision": 0.7201817493373722, "recall": 0.7734851565677104, "f1-score": 0.7458823529411764, "support": 7377 }, "macro avg": { "precision": 0.6724822666104957, "recall": 0.7223447808449004, "f1-score": 0.6963261661995581, "support": 7377 }, "weighted avg": { "precision": 0.7192127621774514, "recall": 0.7734851565677104, "f1-score": 0.7451826504454406, "support": 7377 } }, "eval_runtime": 3.7142, "eval_samples_per_second": 1102.805, "eval_steps_per_second": 8.616, "epoch": 1.28, "step": 4000 }, { "loss": 0.054, "grad_norm": 0.5575366616249084, "learning_rate": 3.167464514979492e-05, "epoch": 1.2816, "step": 4005 }, { "loss": 0.0493, "grad_norm": 0.533478319644928, "learning_rate": 3.160493636337148e-05, "epoch": 1.2832, "step": 4010 }, { "loss": 0.0608, "grad_norm": 0.4764353334903717, "learning_rate": 3.153521888554621e-05, "epoch": 1.2848, "step": 4015 }, { "loss": 0.0594, "grad_norm": 0.9175963401794434, "learning_rate": 3.146549309386841e-05, "epoch": 1.2864, "step": 4020 }, { "loss": 0.0495, "grad_norm": 0.9631668925285339, "learning_rate": 3.139575936593241e-05, "epoch": 1.288, "step": 4025 }, { "loss": 0.0687, "grad_norm": 0.8266165852546692, "learning_rate": 3.132601807937552e-05, "epoch": 1.2896, "step": 4030 }, { "loss": 0.0579, "grad_norm": 1.2651548385620117, "learning_rate": 3.125626961187599e-05, "epoch": 1.2912, "step": 4035 }, { "loss": 0.0529, "grad_norm": 0.6622834801673889, "learning_rate": 3.1186514341150965e-05, "epoch": 1.2928, "step": 4040 }, { "loss": 0.0835, "grad_norm": 0.7092466950416565, "learning_rate": 3.111675264495438e-05, "epoch": 1.2944, "step": 4045 }, { "loss": 0.0692, "grad_norm": 0.7468956708908081, "learning_rate": 3.104698490107504e-05, "epoch": 1.296, "step": 4050 }, { "loss": 0.0514, "grad_norm": 0.8713874220848083, "learning_rate": 3.097721148733444e-05, "epoch": 1.2976, "step": 4055 }, { "loss": 0.0579, "grad_norm": 0.2624255120754242, "learning_rate": 3.090743278158483e-05, "epoch": 1.2992, "step": 4060 }, { "loss": 0.092, "grad_norm": 0.7738727331161499, "learning_rate": 3.083764916170707e-05, "epoch": 1.3008, "step": 4065 }, { "loss": 0.0551, "grad_norm": 0.33460375666618347, "learning_rate": 3.0767861005608667e-05, "epoch": 1.3024, "step": 4070 }, { "loss": 0.0689, "grad_norm": 1.136635661125183, "learning_rate": 3.069806869122169e-05, "epoch": 1.304, "step": 4075 }, { "loss": 0.057, "grad_norm": 0.41338932514190674, "learning_rate": 3.0628272596500716e-05, "epoch": 1.3056, "step": 4080 }, { "loss": 0.0536, "grad_norm": 0.3877381682395935, "learning_rate": 3.0558473099420785e-05, "epoch": 1.3072, "step": 4085 }, { "loss": 0.05, "grad_norm": 0.5449187159538269, "learning_rate": 3.0488670577975377e-05, "epoch": 1.3088, "step": 4090 }, { "loss": 0.0703, "grad_norm": 0.6798962950706482, "learning_rate": 3.041886541017437e-05, "epoch": 1.3104, "step": 4095 }, { "loss": 0.0638, "grad_norm": 1.000394344329834, "learning_rate": 3.034905797404192e-05, "epoch": 1.312, "step": 4100 }, { "loss": 0.0687, "grad_norm": 0.6879838705062866, "learning_rate": 3.027924864761451e-05, "epoch": 1.3136, "step": 4105 }, { "loss": 0.0512, "grad_norm": 0.7496501803398132, "learning_rate": 3.020943780893885e-05, "epoch": 1.3152, "step": 4110 }, { "loss": 0.0637, "grad_norm": 0.5175178050994873, "learning_rate": 3.0139625836069844e-05, "epoch": 1.3168, "step": 4115 }, { "loss": 0.0652, "grad_norm": 0.4603586494922638, "learning_rate": 3.006981310706852e-05, "epoch": 1.3184, "step": 4120 }, { "loss": 0.0493, "grad_norm": 0.6498084664344788, "learning_rate": 3.0000000000000004e-05, "epoch": 1.32, "step": 4125 }, { "loss": 0.067, "grad_norm": 0.5029028058052063, "learning_rate": 2.99301868929315e-05, "epoch": 1.3216, "step": 4130 }, { "loss": 0.0642, "grad_norm": 0.8726014494895935, "learning_rate": 2.9860374163930168e-05, "epoch": 1.3232, "step": 4135 }, { "loss": 0.0528, "grad_norm": 0.6043103337287903, "learning_rate": 2.9790562191061158e-05, "epoch": 1.3248, "step": 4140 }, { "loss": 0.0666, "grad_norm": 0.6896954774856567, "learning_rate": 2.9720751352385504e-05, "epoch": 1.3264, "step": 4145 }, { "loss": 0.0589, "grad_norm": 0.6057456731796265, "learning_rate": 2.96509420259581e-05, "epoch": 1.328, "step": 4150 }, { "loss": 0.0596, "grad_norm": 0.7103518843650818, "learning_rate": 2.9581134589825647e-05, "epoch": 1.3296000000000001, "step": 4155 }, { "loss": 0.0774, "grad_norm": 0.3551633954048157, "learning_rate": 2.951132942202463e-05, "epoch": 1.3312, "step": 4160 }, { "loss": 0.0532, "grad_norm": 0.6072956323623657, "learning_rate": 2.944152690057923e-05, "epoch": 1.3328, "step": 4165 }, { "loss": 0.0629, "grad_norm": 0.5620079636573792, "learning_rate": 2.93717274034993e-05, "epoch": 1.3344, "step": 4170 }, { "loss": 0.0592, "grad_norm": 0.944299042224884, "learning_rate": 2.930193130877832e-05, "epoch": 1.336, "step": 4175 }, { "loss": 0.0557, "grad_norm": 0.7111073136329651, "learning_rate": 2.9232138994391342e-05, "epoch": 1.3376000000000001, "step": 4180 }, { "loss": 0.0585, "grad_norm": 0.36760005354881287, "learning_rate": 2.9162350838292943e-05, "epoch": 1.3392, "step": 4185 }, { "loss": 0.0592, "grad_norm": 0.5795009732246399, "learning_rate": 2.9092567218415184e-05, "epoch": 1.3408, "step": 4190 }, { "loss": 0.0427, "grad_norm": 0.7939180135726929, "learning_rate": 2.9022788512665573e-05, "epoch": 1.3424, "step": 4195 }, { "loss": 0.0599, "grad_norm": 0.8299146890640259, "learning_rate": 2.8953015098924984e-05, "epoch": 1.3439999999999999, "step": 4200 }, { "eval_loss": 0.09328293800354004, "eval_f1": 0.7473286421683607, "eval_recall": 0.7774162938864037, "eval_accuracy": 0.9671736169001485, "eval_precision": 0.719483126332957, "eval_classification_report": { "LOC": { "precision": 0.6297739672642245, "recall": 0.7433302667893285, "f1-score": 0.6818565400843881, "support": 1087 }, "ORG": { "precision": 0.6680647094068305, "recall": 0.6582054309327037, "f1-score": 0.663098424026167, "support": 1694 }, "PER": { "precision": 0.8276510067114093, "recall": 0.8985718449431652, "f1-score": 0.8616545556176635, "support": 3431 }, "PRD": { "precision": 0.5633693972179289, "recall": 0.6257510729613734, "f1-score": 0.5929239528263522, "support": 1165 }, "micro avg": { "precision": 0.719483126332957, "recall": 0.7774162938864037, "f1-score": 0.7473286421683607, "support": 7377 }, "macro avg": { "precision": 0.6722147701500983, "recall": 0.7314646539066427, "f1-score": 0.6998833681386427, "support": 7377 }, "weighted avg": { "precision": 0.7201114100497649, "recall": 0.7774162938864037, "f1-score": 0.7471275552038715, "support": 7377 } }, "eval_runtime": 3.54, "eval_samples_per_second": 1157.071, "eval_steps_per_second": 9.04, "epoch": 1.3439999999999999, "step": 4200 }, { "loss": 0.0717, "grad_norm": 0.7720775604248047, "learning_rate": 2.8883247355045627e-05, "epoch": 1.3456000000000001, "step": 4205 }, { "loss": 0.0647, "grad_norm": 1.1658906936645508, "learning_rate": 2.881348565884905e-05, "epoch": 1.3472, "step": 4210 }, { "loss": 0.0513, "grad_norm": 0.7927054762840271, "learning_rate": 2.874373038812402e-05, "epoch": 1.3488, "step": 4215 }, { "loss": 0.0447, "grad_norm": 0.36519894003868103, "learning_rate": 2.8673981920624483e-05, "epoch": 1.3504, "step": 4220 }, { "loss": 0.0815, "grad_norm": 0.4403943419456482, "learning_rate": 2.8604240634067603e-05, "epoch": 1.3519999999999999, "step": 4225 }, { "loss": 0.0542, "grad_norm": 0.4859263598918915, "learning_rate": 2.8534506906131604e-05, "epoch": 1.3536000000000001, "step": 4230 }, { "loss": 0.0581, "grad_norm": 0.6051929593086243, "learning_rate": 2.8464781114453813e-05, "epoch": 1.3552, "step": 4235 }, { "loss": 0.0677, "grad_norm": 0.43061649799346924, "learning_rate": 2.8395063636628527e-05, "epoch": 1.3568, "step": 4240 }, { "loss": 0.059, "grad_norm": 0.6342320442199707, "learning_rate": 2.8325354850205088e-05, "epoch": 1.3584, "step": 4245 }, { "loss": 0.0549, "grad_norm": 0.4992300868034363, "learning_rate": 2.825565513268573e-05, "epoch": 1.3599999999999999, "step": 4250 }, { "loss": 0.0562, "grad_norm": 0.7390466332435608, "learning_rate": 2.8185964861523573e-05, "epoch": 1.3616, "step": 4255 }, { "loss": 0.0727, "grad_norm": 1.0254685878753662, "learning_rate": 2.81162844141206e-05, "epoch": 1.3632, "step": 4260 }, { "loss": 0.0653, "grad_norm": 0.37108129262924194, "learning_rate": 2.8046614167825587e-05, "epoch": 1.3648, "step": 4265 }, { "loss": 0.0539, "grad_norm": 0.5602735280990601, "learning_rate": 2.7976954499932053e-05, "epoch": 1.3664, "step": 4270 }, { "loss": 0.0704, "grad_norm": 0.5932009816169739, "learning_rate": 2.7907305787676246e-05, "epoch": 1.3679999999999999, "step": 4275 }, { "loss": 0.0417, "grad_norm": 0.35224395990371704, "learning_rate": 2.7837668408235083e-05, "epoch": 1.3696, "step": 4280 }, { "loss": 0.0708, "grad_norm": 0.6071919202804565, "learning_rate": 2.776804273872411e-05, "epoch": 1.3712, "step": 4285 }, { "loss": 0.0689, "grad_norm": 0.597146213054657, "learning_rate": 2.7698429156195445e-05, "epoch": 1.3728, "step": 4290 }, { "loss": 0.0695, "grad_norm": 1.0103567838668823, "learning_rate": 2.762882803763577e-05, "epoch": 1.3744, "step": 4295 }, { "loss": 0.0611, "grad_norm": 0.6905353665351868, "learning_rate": 2.7559239759964257e-05, "epoch": 1.376, "step": 4300 }, { "loss": 0.0743, "grad_norm": 1.4044324159622192, "learning_rate": 2.7489664700030546e-05, "epoch": 1.3776, "step": 4305 }, { "loss": 0.068, "grad_norm": 2.9490318298339844, "learning_rate": 2.7420103234612677e-05, "epoch": 1.3792, "step": 4310 }, { "loss": 0.0594, "grad_norm": 0.49620071053504944, "learning_rate": 2.7350555740415107e-05, "epoch": 1.3808, "step": 4315 }, { "loss": 0.08, "grad_norm": 0.44799941778182983, "learning_rate": 2.7281022594066604e-05, "epoch": 1.3824, "step": 4320 }, { "loss": 0.068, "grad_norm": 0.6641474962234497, "learning_rate": 2.721150417211824e-05, "epoch": 1.384, "step": 4325 }, { "loss": 0.054, "grad_norm": 0.4016796946525574, "learning_rate": 2.714200085104136e-05, "epoch": 1.3856, "step": 4330 }, { "loss": 0.0671, "grad_norm": 0.617506206035614, "learning_rate": 2.7072513007225524e-05, "epoch": 1.3872, "step": 4335 }, { "loss": 0.0642, "grad_norm": 0.6914502382278442, "learning_rate": 2.7003041016976476e-05, "epoch": 1.3888, "step": 4340 }, { "loss": 0.0516, "grad_norm": 0.3531930148601532, "learning_rate": 2.6933585256514097e-05, "epoch": 1.3904, "step": 4345 }, { "loss": 0.074, "grad_norm": 0.7457120418548584, "learning_rate": 2.6864146101970405e-05, "epoch": 1.392, "step": 4350 }, { "loss": 0.0702, "grad_norm": 0.7747828960418701, "learning_rate": 2.679472392938746e-05, "epoch": 1.3936, "step": 4355 }, { "loss": 0.0584, "grad_norm": 0.5167232155799866, "learning_rate": 2.672531911471536e-05, "epoch": 1.3952, "step": 4360 }, { "loss": 0.0567, "grad_norm": 0.66608726978302, "learning_rate": 2.6655932033810243e-05, "epoch": 1.3968, "step": 4365 }, { "loss": 0.0545, "grad_norm": 0.5147083401679993, "learning_rate": 2.6586563062432155e-05, "epoch": 1.3984, "step": 4370 }, { "loss": 0.0726, "grad_norm": 1.0069822072982788, "learning_rate": 2.6517212576243094e-05, "epoch": 1.4, "step": 4375 }, { "loss": 0.0673, "grad_norm": 0.5472681522369385, "learning_rate": 2.6447880950804973e-05, "epoch": 1.4016, "step": 4380 }, { "loss": 0.0618, "grad_norm": 0.40183454751968384, "learning_rate": 2.637856856157755e-05, "epoch": 1.4032, "step": 4385 }, { "loss": 0.0585, "grad_norm": 0.6724686622619629, "learning_rate": 2.630927578391639e-05, "epoch": 1.4048, "step": 4390 }, { "loss": 0.0614, "grad_norm": 0.4009018838405609, "learning_rate": 2.6240002993070872e-05, "epoch": 1.4064, "step": 4395 }, { "loss": 0.0572, "grad_norm": 0.42729130387306213, "learning_rate": 2.6170750564182165e-05, "epoch": 1.408, "step": 4400 }, { "eval_loss": 0.09126374870538712, "eval_f1": 0.7465940054495913, "eval_recall": 0.7799918666124441, "eval_accuracy": 0.9676005937010755, "eval_precision": 0.7159387831280328, "eval_classification_report": { "LOC": { "precision": 0.6418032786885246, "recall": 0.7203311867525299, "f1-score": 0.6788036410923277, "support": 1087 }, "ORG": { "precision": 0.6404371584699453, "recall": 0.69185360094451, "f1-score": 0.6651532349602723, "support": 1694 }, "PER": { "precision": 0.8339261285909713, "recall": 0.8883707373943457, "f1-score": 0.8602878916172736, "support": 3431 }, "PRD": { "precision": 0.5638138138138138, "recall": 0.6446351931330472, "f1-score": 0.6015218261914297, "support": 1165 }, "micro avg": { "precision": 0.7159387831280328, "recall": 0.7799918666124441, "f1-score": 0.7465940054495913, "support": 7377 }, "macro avg": { "precision": 0.6699950948908138, "recall": 0.7362976795561083, "f1-score": 0.7014416484653259, "support": 7377 }, "weighted avg": { "precision": 0.7185284466139664, "recall": 0.7799918666124441, "f1-score": 0.7478717393983927, "support": 7377 } }, "eval_runtime": 3.6574, "eval_samples_per_second": 1119.934, "eval_steps_per_second": 8.749, "epoch": 1.408, "step": 4400 }, { "loss": 0.0491, "grad_norm": 0.5767428874969482, "learning_rate": 2.6101518872281136e-05, "epoch": 1.4096, "step": 4405 }, { "loss": 0.0631, "grad_norm": 0.8151782155036926, "learning_rate": 2.6032308292286327e-05, "epoch": 1.4112, "step": 4410 }, { "loss": 0.0637, "grad_norm": 0.6059090495109558, "learning_rate": 2.5963119199002023e-05, "epoch": 1.4128, "step": 4415 }, { "loss": 0.0558, "grad_norm": 0.8576734066009521, "learning_rate": 2.58939519671161e-05, "epoch": 1.4144, "step": 4420 }, { "loss": 0.0822, "grad_norm": 0.6731064915657043, "learning_rate": 2.5824806971198047e-05, "epoch": 1.416, "step": 4425 }, { "loss": 0.0501, "grad_norm": 0.5025277137756348, "learning_rate": 2.5755684585696935e-05, "epoch": 1.4176, "step": 4430 }, { "loss": 0.0573, "grad_norm": 0.5587552189826965, "learning_rate": 2.568658518493942e-05, "epoch": 1.4192, "step": 4435 }, { "loss": 0.0588, "grad_norm": 0.909436047077179, "learning_rate": 2.5617509143127658e-05, "epoch": 1.4208, "step": 4440 }, { "loss": 0.0714, "grad_norm": 0.8166884183883667, "learning_rate": 2.5548456834337303e-05, "epoch": 1.4224, "step": 4445 }, { "loss": 0.0526, "grad_norm": 0.6847903728485107, "learning_rate": 2.547942863251551e-05, "epoch": 1.424, "step": 4450 }, { "loss": 0.0602, "grad_norm": 0.4967225193977356, "learning_rate": 2.5410424911478865e-05, "epoch": 1.4256, "step": 4455 }, { "loss": 0.0605, "grad_norm": 0.6421734094619751, "learning_rate": 2.534144604491138e-05, "epoch": 1.4272, "step": 4460 }, { "loss": 0.0575, "grad_norm": 0.6059199571609497, "learning_rate": 2.5272492406362475e-05, "epoch": 1.4288, "step": 4465 }, { "loss": 0.0682, "grad_norm": 1.0718897581100464, "learning_rate": 2.520356436924496e-05, "epoch": 1.4304000000000001, "step": 4470 }, { "loss": 0.0453, "grad_norm": 0.5459166169166565, "learning_rate": 2.5134662306832984e-05, "epoch": 1.432, "step": 4475 }, { "loss": 0.0466, "grad_norm": 0.47302019596099854, "learning_rate": 2.5065786592260034e-05, "epoch": 1.4336, "step": 4480 }, { "loss": 0.0682, "grad_norm": 0.8037794232368469, "learning_rate": 2.499693759851694e-05, "epoch": 1.4352, "step": 4485 }, { "loss": 0.0745, "grad_norm": 1.0462980270385742, "learning_rate": 2.4928115698449782e-05, "epoch": 1.4368, "step": 4490 }, { "loss": 0.0675, "grad_norm": 0.6248451471328735, "learning_rate": 2.485932126475796e-05, "epoch": 1.4384000000000001, "step": 4495 }, { "loss": 0.067, "grad_norm": 0.5556265711784363, "learning_rate": 2.4790554669992094e-05, "epoch": 1.44, "step": 4500 }, { "loss": 0.0575, "grad_norm": 0.6949689984321594, "learning_rate": 2.472181628655208e-05, "epoch": 1.4416, "step": 4505 }, { "loss": 0.0827, "grad_norm": 0.5264003872871399, "learning_rate": 2.4653106486685015e-05, "epoch": 1.4432, "step": 4510 }, { "loss": 0.0497, "grad_norm": 0.624577522277832, "learning_rate": 2.4584425642483208e-05, "epoch": 1.4447999999999999, "step": 4515 }, { "loss": 0.0455, "grad_norm": 0.5404865741729736, "learning_rate": 2.4515774125882166e-05, "epoch": 1.4464000000000001, "step": 4520 }, { "loss": 0.0596, "grad_norm": 0.39943668246269226, "learning_rate": 2.444715230865857e-05, "epoch": 1.448, "step": 4525 }, { "loss": 0.0582, "grad_norm": 0.5870445370674133, "learning_rate": 2.437856056242826e-05, "epoch": 1.4496, "step": 4530 }, { "loss": 0.0663, "grad_norm": 0.7443811893463135, "learning_rate": 2.4309999258644265e-05, "epoch": 1.4512, "step": 4535 }, { "loss": 0.0492, "grad_norm": 0.3346763551235199, "learning_rate": 2.42414687685947e-05, "epoch": 1.4527999999999999, "step": 4540 }, { "loss": 0.0652, "grad_norm": 0.7657554745674133, "learning_rate": 2.417296946340085e-05, "epoch": 1.4544000000000001, "step": 4545 }, { "loss": 0.0683, "grad_norm": 0.5719790458679199, "learning_rate": 2.4104501714015095e-05, "epoch": 1.456, "step": 4550 }, { "loss": 0.0501, "grad_norm": 0.3786943256855011, "learning_rate": 2.4036065891218976e-05, "epoch": 1.4576, "step": 4555 }, { "loss": 0.0548, "grad_norm": 0.5226039886474609, "learning_rate": 2.396766236562107e-05, "epoch": 1.4592, "step": 4560 }, { "loss": 0.056, "grad_norm": 0.4600558280944824, "learning_rate": 2.3899291507655084e-05, "epoch": 1.4607999999999999, "step": 4565 }, { "loss": 0.0545, "grad_norm": 0.5823372602462769, "learning_rate": 2.3830953687577847e-05, "epoch": 1.4624, "step": 4570 }, { "loss": 0.0543, "grad_norm": 0.4289751648902893, "learning_rate": 2.376264927546723e-05, "epoch": 1.464, "step": 4575 }, { "loss": 0.0563, "grad_norm": 0.5520490407943726, "learning_rate": 2.369437864122019e-05, "epoch": 1.4656, "step": 4580 }, { "loss": 0.0557, "grad_norm": 0.6033503413200378, "learning_rate": 2.3626142154550765e-05, "epoch": 1.4672, "step": 4585 }, { "loss": 0.0703, "grad_norm": 0.7329562902450562, "learning_rate": 2.3557940184988106e-05, "epoch": 1.4687999999999999, "step": 4590 }, { "loss": 0.0554, "grad_norm": 0.6088122725486755, "learning_rate": 2.3489773101874404e-05, "epoch": 1.4704, "step": 4595 }, { "loss": 0.0503, "grad_norm": 0.5583781599998474, "learning_rate": 2.34216412743629e-05, "epoch": 1.472, "step": 4600 }, { "eval_loss": 0.09182178974151611, "eval_f1": 0.7479813562660014, "eval_recall": 0.7722651484343229, "eval_accuracy": 0.9674786003293822, "eval_precision": 0.7251782077393075, "eval_classification_report": { "LOC": { "precision": 0.6660746003552398, "recall": 0.6899724011039559, "f1-score": 0.6778129236330773, "support": 1087 }, "ORG": { "precision": 0.6569060773480663, "recall": 0.7018890200708382, "f1-score": 0.6786529680365296, "support": 1694 }, "PER": { "precision": 0.8354465270121278, "recall": 0.8834159137277762, "f1-score": 0.8587618642867261, "support": 3431 }, "PRD": { "precision": 0.5626934984520123, "recall": 0.6240343347639485, "f1-score": 0.5917785917785917, "support": 1165 }, "micro avg": { "precision": 0.7251782077393075, "recall": 0.7722651484343229, "f1-score": 0.7479813562660014, "support": 7377 }, "macro avg": { "precision": 0.6802801757918616, "recall": 0.7248279174166298, "f1-score": 0.7017515869337312, "support": 7377 }, "weighted avg": { "precision": 0.7264168287229192, "recall": 0.7722651484343229, "f1-score": 0.7485773067144982, "support": 7377 } }, "eval_runtime": 2.632, "eval_samples_per_second": 1556.212, "eval_steps_per_second": 12.158, "epoch": 1.472, "step": 4600 }, { "loss": 0.058, "grad_norm": 0.7265459299087524, "learning_rate": 2.3353545071415986e-05, "epoch": 1.4736, "step": 4605 }, { "loss": 0.0569, "grad_norm": 0.6447118520736694, "learning_rate": 2.3285484861803063e-05, "epoch": 1.4752, "step": 4610 }, { "loss": 0.0613, "grad_norm": 0.7986389994621277, "learning_rate": 2.3217461014098656e-05, "epoch": 1.4768, "step": 4615 }, { "loss": 0.0554, "grad_norm": 0.8035600185394287, "learning_rate": 2.314947389668033e-05, "epoch": 1.4784, "step": 4620 }, { "loss": 0.0457, "grad_norm": 0.583641767501831, "learning_rate": 2.3081523877726798e-05, "epoch": 1.48, "step": 4625 }, { "loss": 0.0452, "grad_norm": 0.5306426286697388, "learning_rate": 2.301361132521583e-05, "epoch": 1.4816, "step": 4630 }, { "loss": 0.0798, "grad_norm": 0.598943829536438, "learning_rate": 2.2945736606922303e-05, "epoch": 1.4832, "step": 4635 }, { "loss": 0.0565, "grad_norm": 0.6511633992195129, "learning_rate": 2.287790009041622e-05, "epoch": 1.4848, "step": 4640 }, { "loss": 0.0489, "grad_norm": 0.5468058586120605, "learning_rate": 2.2810102143060696e-05, "epoch": 1.4864, "step": 4645 }, { "loss": 0.0567, "grad_norm": 0.5945963859558105, "learning_rate": 2.2742343132009975e-05, "epoch": 1.488, "step": 4650 }, { "loss": 0.0714, "grad_norm": 0.7722645998001099, "learning_rate": 2.2674623424207447e-05, "epoch": 1.4896, "step": 4655 }, { "loss": 0.0621, "grad_norm": 0.7465737462043762, "learning_rate": 2.2606943386383674e-05, "epoch": 1.4912, "step": 4660 }, { "loss": 0.0592, "grad_norm": 0.4908687472343445, "learning_rate": 2.2539303385054365e-05, "epoch": 1.4928, "step": 4665 }, { "loss": 0.0591, "grad_norm": 0.4045424163341522, "learning_rate": 2.2471703786518415e-05, "epoch": 1.4944, "step": 4670 }, { "loss": 0.0592, "grad_norm": 0.5765438675880432, "learning_rate": 2.240414495685595e-05, "epoch": 1.496, "step": 4675 }, { "loss": 0.0604, "grad_norm": 1.1051089763641357, "learning_rate": 2.233662726192629e-05, "epoch": 1.4976, "step": 4680 }, { "loss": 0.0597, "grad_norm": 0.36962801218032837, "learning_rate": 2.226915106736599e-05, "epoch": 1.4992, "step": 4685 }, { "loss": 0.0569, "grad_norm": 0.4730798006057739, "learning_rate": 2.2201716738586884e-05, "epoch": 1.5008, "step": 4690 }, { "loss": 0.0658, "grad_norm": 0.8506127595901489, "learning_rate": 2.2134324640774067e-05, "epoch": 1.5024, "step": 4695 }, { "loss": 0.0568, "grad_norm": 0.333552747964859, "learning_rate": 2.2066975138883946e-05, "epoch": 1.504, "step": 4700 }, { "loss": 0.0477, "grad_norm": 0.6479647755622864, "learning_rate": 2.1999668597642234e-05, "epoch": 1.5056, "step": 4705 }, { "loss": 0.0557, "grad_norm": 0.5841954350471497, "learning_rate": 2.1932405381542034e-05, "epoch": 1.5072, "step": 4710 }, { "loss": 0.0755, "grad_norm": 0.7870712876319885, "learning_rate": 2.186518585484179e-05, "epoch": 1.5088, "step": 4715 }, { "loss": 0.0595, "grad_norm": 0.914937436580658, "learning_rate": 2.179801038156335e-05, "epoch": 1.5104, "step": 4720 }, { "loss": 0.0596, "grad_norm": 0.640117347240448, "learning_rate": 2.173087932549003e-05, "epoch": 1.512, "step": 4725 }, { "loss": 0.0673, "grad_norm": 0.7457531690597534, "learning_rate": 2.1663793050164566e-05, "epoch": 1.5135999999999998, "step": 4730 }, { "loss": 0.0682, "grad_norm": 0.43587005138397217, "learning_rate": 2.1596751918887202e-05, "epoch": 1.5152, "step": 4735 }, { "loss": 0.0441, "grad_norm": 0.6509613394737244, "learning_rate": 2.1529756294713704e-05, "epoch": 1.5168, "step": 4740 }, { "loss": 0.055, "grad_norm": 0.7873203754425049, "learning_rate": 2.1462806540453438e-05, "epoch": 1.5184, "step": 4745 }, { "loss": 0.0564, "grad_norm": 0.2910894751548767, "learning_rate": 2.1395903018667303e-05, "epoch": 1.52, "step": 4750 }, { "loss": 0.0547, "grad_norm": 0.6000471711158752, "learning_rate": 2.132904609166585e-05, "epoch": 1.5215999999999998, "step": 4755 }, { "loss": 0.0571, "grad_norm": 0.5183636546134949, "learning_rate": 2.126223612150734e-05, "epoch": 1.5232, "step": 4760 }, { "loss": 0.0692, "grad_norm": 0.6496992707252502, "learning_rate": 2.1195473469995696e-05, "epoch": 1.5248, "step": 4765 }, { "loss": 0.0546, "grad_norm": 0.48704588413238525, "learning_rate": 2.1128758498678615e-05, "epoch": 1.5264, "step": 4770 }, { "loss": 0.0462, "grad_norm": 0.42258185148239136, "learning_rate": 2.106209156884554e-05, "epoch": 1.528, "step": 4775 }, { "loss": 0.0474, "grad_norm": 0.37046363949775696, "learning_rate": 2.0995473041525833e-05, "epoch": 1.5295999999999998, "step": 4780 }, { "loss": 0.0503, "grad_norm": 0.7661680579185486, "learning_rate": 2.092890327748667e-05, "epoch": 1.5312000000000001, "step": 4785 }, { "loss": 0.066, "grad_norm": 0.6843987703323364, "learning_rate": 2.0862382637231166e-05, "epoch": 1.5328, "step": 4790 }, { "loss": 0.0623, "grad_norm": 0.9721200466156006, "learning_rate": 2.0795911480996433e-05, "epoch": 1.5344, "step": 4795 }, { "loss": 0.0582, "grad_norm": 0.4940122961997986, "learning_rate": 2.0729490168751584e-05, "epoch": 1.536, "step": 4800 }, { "eval_loss": 0.09071080386638641, "eval_f1": 0.7473286421683607, "eval_recall": 0.7774162938864037, "eval_accuracy": 0.9677835837586157, "eval_precision": 0.719483126332957, "eval_classification_report": { "LOC": { "precision": 0.6368899917287014, "recall": 0.7083716651333947, "f1-score": 0.6707317073170731, "support": 1087 }, "ORG": { "precision": 0.6440306681270537, "recall": 0.6942148760330579, "f1-score": 0.6681818181818182, "support": 1694 }, "PER": { "precision": 0.8421197144426139, "recall": 0.8939084814922763, "f1-score": 0.8672416230736604, "support": 3431 }, "PRD": { "precision": 0.5579598145285936, "recall": 0.6197424892703862, "f1-score": 0.5872305815372103, "support": 1165 }, "micro avg": { "precision": 0.719483126332957, "recall": 0.7774162938864037, "f1-score": 0.7473286421683607, "support": 7377 }, "macro avg": { "precision": 0.6702500472067405, "recall": 0.7290593779822787, "f1-score": 0.6983464325274404, "support": 7377 }, "weighted avg": { "precision": 0.7215159681435199, "recall": 0.7774162938864037, "f1-score": 0.748355022652872, "support": 7377 } }, "eval_runtime": 2.3708, "eval_samples_per_second": 1727.668, "eval_steps_per_second": 13.497, "epoch": 1.536, "step": 4800 }, { "loss": 0.0667, "grad_norm": 0.6013182401657104, "learning_rate": 2.0663119060195807e-05, "epoch": 1.5375999999999999, "step": 4805 }, { "loss": 0.052, "grad_norm": 0.619006335735321, "learning_rate": 2.0596798514756417e-05, "epoch": 1.5392000000000001, "step": 4810 }, { "loss": 0.0528, "grad_norm": 0.6325690746307373, "learning_rate": 2.0530528891586934e-05, "epoch": 1.5408, "step": 4815 }, { "loss": 0.043, "grad_norm": 0.5661787986755371, "learning_rate": 2.046431054956507e-05, "epoch": 1.5424, "step": 4820 }, { "loss": 0.0706, "grad_norm": 1.6266577243804932, "learning_rate": 2.0398143847290853e-05, "epoch": 1.544, "step": 4825 }, { "loss": 0.0682, "grad_norm": 0.5183210372924805, "learning_rate": 2.0332029143084675e-05, "epoch": 1.5455999999999999, "step": 4830 }, { "loss": 0.0745, "grad_norm": 0.7144554853439331, "learning_rate": 2.02659667949853e-05, "epoch": 1.5472000000000001, "step": 4835 }, { "loss": 0.0663, "grad_norm": 0.4844765067100525, "learning_rate": 2.0199957160747977e-05, "epoch": 1.5488, "step": 4840 }, { "loss": 0.0636, "grad_norm": 0.7972126603126526, "learning_rate": 2.0134000597842506e-05, "epoch": 1.5504, "step": 4845 }, { "loss": 0.0672, "grad_norm": 0.5802795886993408, "learning_rate": 2.0068097463451265e-05, "epoch": 1.552, "step": 4850 }, { "loss": 0.0477, "grad_norm": 0.5143976807594299, "learning_rate": 2.0002248114467288e-05, "epoch": 1.5535999999999999, "step": 4855 }, { "loss": 0.0571, "grad_norm": 0.4149232804775238, "learning_rate": 1.9936452907492357e-05, "epoch": 1.5552000000000001, "step": 4860 }, { "loss": 0.0586, "grad_norm": 0.520021378993988, "learning_rate": 1.987071219883504e-05, "epoch": 1.5568, "step": 4865 }, { "loss": 0.0645, "grad_norm": 0.8288071155548096, "learning_rate": 1.9805026344508773e-05, "epoch": 1.5584, "step": 4870 }, { "loss": 0.0668, "grad_norm": 0.494091272354126, "learning_rate": 1.9739395700229943e-05, "epoch": 1.56, "step": 4875 }, { "loss": 0.0634, "grad_norm": 0.6778819561004639, "learning_rate": 1.9673820621415946e-05, "epoch": 1.5615999999999999, "step": 4880 }, { "loss": 0.072, "grad_norm": 0.4304638206958771, "learning_rate": 1.9608301463183263e-05, "epoch": 1.5632000000000001, "step": 4885 }, { "loss": 0.0628, "grad_norm": 0.6170662045478821, "learning_rate": 1.954283858034555e-05, "epoch": 1.5648, "step": 4890 }, { "loss": 0.0734, "grad_norm": 0.5407703518867493, "learning_rate": 1.94774323274117e-05, "epoch": 1.5664, "step": 4895 }, { "loss": 0.0606, "grad_norm": 0.5799199938774109, "learning_rate": 1.941208305858394e-05, "epoch": 1.568, "step": 4900 }, { "loss": 0.0643, "grad_norm": 0.6992772817611694, "learning_rate": 1.9346791127755897e-05, "epoch": 1.5695999999999999, "step": 4905 }, { "loss": 0.0516, "grad_norm": 0.5506798624992371, "learning_rate": 1.9281556888510682e-05, "epoch": 1.5712000000000002, "step": 4910 }, { "loss": 0.0539, "grad_norm": 0.5714758634567261, "learning_rate": 1.921638069411901e-05, "epoch": 1.5728, "step": 4915 }, { "loss": 0.057, "grad_norm": 0.6541491150856018, "learning_rate": 1.915126289753724e-05, "epoch": 1.5744, "step": 4920 }, { "loss": 0.0606, "grad_norm": 0.37992826104164124, "learning_rate": 1.908620385140547e-05, "epoch": 1.576, "step": 4925 }, { "loss": 0.0645, "grad_norm": 1.114528775215149, "learning_rate": 1.9021203908045637e-05, "epoch": 1.5776, "step": 4930 }, { "loss": 0.0645, "grad_norm": 0.3801097571849823, "learning_rate": 1.8956263419459666e-05, "epoch": 1.5792000000000002, "step": 4935 }, { "loss": 0.0535, "grad_norm": 1.211111307144165, "learning_rate": 1.8891382737327453e-05, "epoch": 1.5808, "step": 4940 }, { "loss": 0.0616, "grad_norm": 0.4274652600288391, "learning_rate": 1.8826562213005015e-05, "epoch": 1.5824, "step": 4945 }, { "loss": 0.0587, "grad_norm": 1.0418193340301514, "learning_rate": 1.876180219752264e-05, "epoch": 1.584, "step": 4950 }, { "loss": 0.0556, "grad_norm": 0.8618273735046387, "learning_rate": 1.8697103041582896e-05, "epoch": 1.5856, "step": 4955 }, { "loss": 0.0548, "grad_norm": 0.8356909155845642, "learning_rate": 1.8632465095558773e-05, "epoch": 1.5872000000000002, "step": 4960 }, { "loss": 0.0605, "grad_norm": 0.6360939741134644, "learning_rate": 1.856788870949178e-05, "epoch": 1.5888, "step": 4965 }, { "loss": 0.0559, "grad_norm": 0.3836934268474579, "learning_rate": 1.8503374233090083e-05, "epoch": 1.5904, "step": 4970 }, { "loss": 0.0533, "grad_norm": 0.7138825058937073, "learning_rate": 1.8438922015726557e-05, "epoch": 1.592, "step": 4975 }, { "loss": 0.0405, "grad_norm": 0.30180883407592773, "learning_rate": 1.8374532406436914e-05, "epoch": 1.5936, "step": 4980 }, { "loss": 0.063, "grad_norm": 0.7802498936653137, "learning_rate": 1.8310205753917836e-05, "epoch": 1.5952, "step": 4985 }, { "loss": 0.0588, "grad_norm": 0.5062848329544067, "learning_rate": 1.824594240652505e-05, "epoch": 1.5968, "step": 4990 }, { "loss": 0.0732, "grad_norm": 0.9171712398529053, "learning_rate": 1.818174271227147e-05, "epoch": 1.5984, "step": 4995 }, { "loss": 0.0471, "grad_norm": 0.766470193862915, "learning_rate": 1.8117607018825297e-05, "epoch": 1.6, "step": 5000 }, { "eval_loss": 0.08992542326450348, "eval_f1": 0.7501472031403337, "eval_recall": 0.7771451809678731, "eval_accuracy": 0.967966573816156, "eval_precision": 0.7249620637329287, "eval_classification_report": { "LOC": { "precision": 0.6383154417836499, "recall": 0.7111315547378105, "f1-score": 0.6727589208006962, "support": 1087 }, "ORG": { "precision": 0.6534988713318285, "recall": 0.6835891381345927, "f1-score": 0.6682054241200232, "support": 1694 }, "PER": { "precision": 0.8354020507285483, "recall": 0.9023608277470125, "f1-score": 0.8675914249684743, "support": 3431 }, "PRD": { "precision": 0.579163248564397, "recall": 0.6060085836909871, "f1-score": 0.5922818791946308, "support": 1165 }, "micro avg": { "precision": 0.7249620637329287, "recall": 0.7771451809678731, "f1-score": 0.7501472031403337, "support": 7377 }, "macro avg": { "precision": 0.6765949031021059, "recall": 0.7257725260776007, "f1-score": 0.7002094122709561, "support": 7377 }, "weighted avg": { "precision": 0.7241243857777032, "recall": 0.7771451809678731, "f1-score": 0.7496195612983946, "support": 7377 } }, "eval_runtime": 2.389, "eval_samples_per_second": 1714.559, "eval_steps_per_second": 13.395, "epoch": 1.6, "step": 5000 }, { "loss": 0.0651, "grad_norm": 0.7612237930297852, "learning_rate": 1.805353567350815e-05, "epoch": 1.6016, "step": 5005 }, { "loss": 0.0617, "grad_norm": 0.5750963687896729, "learning_rate": 1.798952902329316e-05, "epoch": 1.6032, "step": 5010 }, { "loss": 0.0831, "grad_norm": 0.7880896329879761, "learning_rate": 1.7925587414803108e-05, "epoch": 1.6048, "step": 5015 }, { "loss": 0.0546, "grad_norm": 0.7208923101425171, "learning_rate": 1.786171119430857e-05, "epoch": 1.6064, "step": 5020 }, { "loss": 0.0496, "grad_norm": 0.579612672328949, "learning_rate": 1.7797900707726e-05, "epoch": 1.608, "step": 5025 }, { "loss": 0.0561, "grad_norm": 0.587574303150177, "learning_rate": 1.7734156300615866e-05, "epoch": 1.6096, "step": 5030 }, { "loss": 0.0483, "grad_norm": 0.7044540643692017, "learning_rate": 1.7670478318180814e-05, "epoch": 1.6112, "step": 5035 }, { "loss": 0.0594, "grad_norm": 0.506405770778656, "learning_rate": 1.760686710526375e-05, "epoch": 1.6128, "step": 5040 }, { "loss": 0.0611, "grad_norm": 0.9228460788726807, "learning_rate": 1.7543323006346013e-05, "epoch": 1.6143999999999998, "step": 5045 }, { "loss": 0.0764, "grad_norm": 0.585800051689148, "learning_rate": 1.747984636554547e-05, "epoch": 1.616, "step": 5050 }, { "loss": 0.0492, "grad_norm": 0.5618112683296204, "learning_rate": 1.7416437526614687e-05, "epoch": 1.6176, "step": 5055 }, { "loss": 0.0738, "grad_norm": 0.6069150567054749, "learning_rate": 1.7353096832939075e-05, "epoch": 1.6192, "step": 5060 }, { "loss": 0.0547, "grad_norm": 0.34684813022613525, "learning_rate": 1.728982462753496e-05, "epoch": 1.6208, "step": 5065 }, { "loss": 0.0504, "grad_norm": 0.5947714447975159, "learning_rate": 1.7226621253047823e-05, "epoch": 1.6223999999999998, "step": 5070 }, { "loss": 0.0538, "grad_norm": 0.45219752192497253, "learning_rate": 1.7163487051750383e-05, "epoch": 1.624, "step": 5075 }, { "loss": 0.0572, "grad_norm": 0.8056257963180542, "learning_rate": 1.7100422365540728e-05, "epoch": 1.6256, "step": 5080 }, { "loss": 0.0599, "grad_norm": 0.5458232760429382, "learning_rate": 1.703742753594053e-05, "epoch": 1.6272, "step": 5085 }, { "loss": 0.0565, "grad_norm": 0.737158477306366, "learning_rate": 1.697450290409316e-05, "epoch": 1.6288, "step": 5090 }, { "loss": 0.0636, "grad_norm": 0.5676600337028503, "learning_rate": 1.69116488107618e-05, "epoch": 1.6303999999999998, "step": 5095 }, { "loss": 0.0588, "grad_norm": 1.1098604202270508, "learning_rate": 1.6848865596327676e-05, "epoch": 1.6320000000000001, "step": 5100 }, { "loss": 0.0773, "grad_norm": 0.5632255673408508, "learning_rate": 1.6786153600788164e-05, "epoch": 1.6336, "step": 5105 }, { "loss": 0.0695, "grad_norm": 0.5433540940284729, "learning_rate": 1.6723513163754934e-05, "epoch": 1.6352, "step": 5110 }, { "loss": 0.0544, "grad_norm": 0.6632165312767029, "learning_rate": 1.6660944624452186e-05, "epoch": 1.6368, "step": 5115 }, { "loss": 0.0535, "grad_norm": 0.4192460775375366, "learning_rate": 1.659844832171471e-05, "epoch": 1.6383999999999999, "step": 5120 }, { "loss": 0.0632, "grad_norm": 0.6999644041061401, "learning_rate": 1.6536024593986138e-05, "epoch": 1.6400000000000001, "step": 5125 }, { "loss": 0.0442, "grad_norm": 0.4887322783470154, "learning_rate": 1.6473673779317085e-05, "epoch": 1.6416, "step": 5130 }, { "loss": 0.0579, "grad_norm": 0.6644463539123535, "learning_rate": 1.641139621536328e-05, "epoch": 1.6432, "step": 5135 }, { "loss": 0.0586, "grad_norm": 0.5576483011245728, "learning_rate": 1.634919223938379e-05, "epoch": 1.6448, "step": 5140 }, { "loss": 0.0533, "grad_norm": 0.5672375559806824, "learning_rate": 1.6287062188239186e-05, "epoch": 1.6463999999999999, "step": 5145 }, { "loss": 0.0689, "grad_norm": 0.7527726292610168, "learning_rate": 1.6225006398389646e-05, "epoch": 1.6480000000000001, "step": 5150 }, { "loss": 0.0569, "grad_norm": 0.5524255037307739, "learning_rate": 1.616302520589328e-05, "epoch": 1.6496, "step": 5155 }, { "loss": 0.0539, "grad_norm": 0.7990140914916992, "learning_rate": 1.6101118946404156e-05, "epoch": 1.6512, "step": 5160 }, { "loss": 0.0735, "grad_norm": 0.5248995423316956, "learning_rate": 1.6039287955170556e-05, "epoch": 1.6528, "step": 5165 }, { "loss": 0.0703, "grad_norm": 0.8196117281913757, "learning_rate": 1.5977532567033174e-05, "epoch": 1.6543999999999999, "step": 5170 }, { "loss": 0.053, "grad_norm": 0.5602748394012451, "learning_rate": 1.5915853116423285e-05, "epoch": 1.6560000000000001, "step": 5175 }, { "loss": 0.0512, "grad_norm": 0.5342245101928711, "learning_rate": 1.5854249937360895e-05, "epoch": 1.6576, "step": 5180 }, { "loss": 0.0745, "grad_norm": 0.44414862990379333, "learning_rate": 1.5792723363453003e-05, "epoch": 1.6592, "step": 5185 }, { "loss": 0.0553, "grad_norm": 0.5646689534187317, "learning_rate": 1.573127372789174e-05, "epoch": 1.6608, "step": 5190 }, { "loss": 0.0591, "grad_norm": 0.5511468052864075, "learning_rate": 1.5669901363452612e-05, "epoch": 1.6623999999999999, "step": 5195 }, { "loss": 0.0738, "grad_norm": 0.5547064542770386, "learning_rate": 1.5608606602492626e-05, "epoch": 1.6640000000000001, "step": 5200 }, { "eval_loss": 0.09016244113445282, "eval_f1": 0.7520103761348897, "eval_recall": 0.7859563508201166, "eval_accuracy": 0.9679411585303865, "eval_precision": 0.7208752952878279, "eval_classification_report": { "LOC": { "precision": 0.6305882352941177, "recall": 0.7396504139834407, "f1-score": 0.68077900084674, "support": 1087 }, "ORG": { "precision": 0.6509695290858726, "recall": 0.6936245572609209, "f1-score": 0.6716204629894255, "support": 1694 }, "PER": { "precision": 0.8265009256810367, "recall": 0.9108131740017488, "f1-score": 0.8666112035496394, "support": 3431 }, "PRD": { "precision": 0.5871404399323181, "recall": 0.5957081545064378, "f1-score": 0.5913932680017043, "support": 1165 }, "micro avg": { "precision": 0.7208752952878279, "recall": 0.7859563508201166, "f1-score": 0.7520103761348897, "support": 7377 }, "macro avg": { "precision": 0.6737997824983362, "recall": 0.7349490749381371, "f1-score": 0.7026009838468774, "support": 7377 }, "weighted avg": { "precision": 0.7195248858030311, "recall": 0.7859563508201166, "f1-score": 0.7509892957605112, "support": 7377 } }, "eval_runtime": 2.6399, "eval_samples_per_second": 1551.565, "eval_steps_per_second": 12.122, "epoch": 1.6640000000000001, "step": 5200 }, { "loss": 0.0604, "grad_norm": 0.7207816243171692, "learning_rate": 1.554738977694854e-05, "epoch": 1.6656, "step": 5205 }, { "loss": 0.0652, "grad_norm": 0.742792010307312, "learning_rate": 1.5486251218335116e-05, "epoch": 1.6672, "step": 5210 }, { "loss": 0.0632, "grad_norm": 0.40902987122535706, "learning_rate": 1.5425191257743206e-05, "epoch": 1.6688, "step": 5215 }, { "loss": 0.0548, "grad_norm": 0.42884084582328796, "learning_rate": 1.5364210225838017e-05, "epoch": 1.6703999999999999, "step": 5220 }, { "loss": 0.059, "grad_norm": 0.5977588891983032, "learning_rate": 1.5303308452857395e-05, "epoch": 1.6720000000000002, "step": 5225 }, { "loss": 0.0663, "grad_norm": 0.7355903387069702, "learning_rate": 1.5242486268609884e-05, "epoch": 1.6736, "step": 5230 }, { "loss": 0.0592, "grad_norm": 0.6354838013648987, "learning_rate": 1.5181744002473078e-05, "epoch": 1.6752, "step": 5235 }, { "loss": 0.0555, "grad_norm": 0.40993958711624146, "learning_rate": 1.5121081983391741e-05, "epoch": 1.6768, "step": 5240 }, { "loss": 0.0632, "grad_norm": 1.3255341053009033, "learning_rate": 1.5060500539876096e-05, "epoch": 1.6784, "step": 5245 }, { "loss": 0.0518, "grad_norm": 0.5360626578330994, "learning_rate": 1.5000000000000009e-05, "epoch": 1.6800000000000002, "step": 5250 }, { "loss": 0.0585, "grad_norm": 0.6329696774482727, "learning_rate": 1.4939580691399195e-05, "epoch": 1.6816, "step": 5255 }, { "loss": 0.0533, "grad_norm": 0.6233601570129395, "learning_rate": 1.4879242941269501e-05, "epoch": 1.6832, "step": 5260 }, { "loss": 0.0528, "grad_norm": 0.4295250177383423, "learning_rate": 1.4818987076365093e-05, "epoch": 1.6848, "step": 5265 }, { "loss": 0.0459, "grad_norm": 0.44546204805374146, "learning_rate": 1.4758813422996669e-05, "epoch": 1.6864, "step": 5270 }, { "loss": 0.0514, "grad_norm": 0.6285777688026428, "learning_rate": 1.4698722307029744e-05, "epoch": 1.688, "step": 5275 }, { "loss": 0.0547, "grad_norm": 1.315048336982727, "learning_rate": 1.463871405388286e-05, "epoch": 1.6896, "step": 5280 }, { "loss": 0.0595, "grad_norm": 1.3447016477584839, "learning_rate": 1.4578788988525795e-05, "epoch": 1.6912, "step": 5285 }, { "loss": 0.0593, "grad_norm": 0.6958591938018799, "learning_rate": 1.4518947435477857e-05, "epoch": 1.6928, "step": 5290 }, { "loss": 0.0616, "grad_norm": 0.6774111986160278, "learning_rate": 1.44591897188061e-05, "epoch": 1.6944, "step": 5295 }, { "loss": 0.0562, "grad_norm": 0.6731957793235779, "learning_rate": 1.4399516162123541e-05, "epoch": 1.696, "step": 5300 }, { "loss": 0.0547, "grad_norm": 0.5012421607971191, "learning_rate": 1.4339927088587461e-05, "epoch": 1.6976, "step": 5305 }, { "loss": 0.0516, "grad_norm": 0.8082280158996582, "learning_rate": 1.4280422820897632e-05, "epoch": 1.6992, "step": 5310 }, { "loss": 0.0548, "grad_norm": 0.4952707290649414, "learning_rate": 1.422100368129454e-05, "epoch": 1.7008, "step": 5315 }, { "loss": 0.0462, "grad_norm": 0.45786386728286743, "learning_rate": 1.4161669991557707e-05, "epoch": 1.7024, "step": 5320 }, { "loss": 0.0606, "grad_norm": 0.44391462206840515, "learning_rate": 1.4102422073003858e-05, "epoch": 1.704, "step": 5325 }, { "loss": 0.0454, "grad_norm": 0.5133768916130066, "learning_rate": 1.4043260246485272e-05, "epoch": 1.7056, "step": 5330 }, { "loss": 0.0545, "grad_norm": 1.3031102418899536, "learning_rate": 1.3984184832388002e-05, "epoch": 1.7072, "step": 5335 }, { "loss": 0.0464, "grad_norm": 0.7489848732948303, "learning_rate": 1.3925196150630096e-05, "epoch": 1.7088, "step": 5340 }, { "loss": 0.062, "grad_norm": 0.9719669222831726, "learning_rate": 1.3866294520659987e-05, "epoch": 1.7104, "step": 5345 }, { "loss": 0.0396, "grad_norm": 0.4147973358631134, "learning_rate": 1.3807480261454625e-05, "epoch": 1.712, "step": 5350 }, { "loss": 0.0699, "grad_norm": 0.7659831643104553, "learning_rate": 1.374875369151781e-05, "epoch": 1.7136, "step": 5355 }, { "loss": 0.054, "grad_norm": 0.6687998175621033, "learning_rate": 1.3690115128878503e-05, "epoch": 1.7151999999999998, "step": 5360 }, { "loss": 0.0492, "grad_norm": 0.4139270484447479, "learning_rate": 1.3631564891089062e-05, "epoch": 1.7168, "step": 5365 }, { "loss": 0.0559, "grad_norm": 0.8909143805503845, "learning_rate": 1.3573103295223497e-05, "epoch": 1.7184, "step": 5370 }, { "loss": 0.0584, "grad_norm": 0.5491580367088318, "learning_rate": 1.3514730657875821e-05, "epoch": 1.72, "step": 5375 }, { "loss": 0.065, "grad_norm": 0.6440926790237427, "learning_rate": 1.345644729515828e-05, "epoch": 1.7216, "step": 5380 }, { "loss": 0.0606, "grad_norm": 0.7107974290847778, "learning_rate": 1.3398253522699681e-05, "epoch": 1.7231999999999998, "step": 5385 }, { "loss": 0.0697, "grad_norm": 0.6807038187980652, "learning_rate": 1.3340149655643639e-05, "epoch": 1.7248, "step": 5390 }, { "loss": 0.0586, "grad_norm": 0.7593914866447449, "learning_rate": 1.3282136008646873e-05, "epoch": 1.7264, "step": 5395 }, { "loss": 0.063, "grad_norm": 0.410159707069397, "learning_rate": 1.3224212895877601e-05, "epoch": 1.728, "step": 5400 }, { "eval_loss": 0.09004965424537659, "eval_f1": 0.7501796094311279, "eval_recall": 0.778500745560526, "eval_accuracy": 0.968159729988004, "eval_precision": 0.7238467355684396, "eval_classification_report": { "LOC": { "precision": 0.6470588235294118, "recall": 0.6982520699172033, "f1-score": 0.6716814159292036, "support": 1087 }, "ORG": { "precision": 0.6537180910099889, "recall": 0.6953955135773318, "f1-score": 0.6739130434782609, "support": 1694 }, "PER": { "precision": 0.8385515927035121, "recall": 0.8976974642961236, "f1-score": 0.8671171171171171, "support": 3431 }, "PRD": { "precision": 0.5645412130637636, "recall": 0.623175965665236, "f1-score": 0.5924112607099142, "support": 1165 }, "micro avg": { "precision": 0.7238467355684396, "recall": 0.778500745560526, "f1-score": 0.7501796094311279, "support": 7377 }, "macro avg": { "precision": 0.675967430076669, "recall": 0.7286302533639737, "f1-score": 0.701280709308624, "support": 7377 }, "weighted avg": { "precision": 0.7246187359539684, "recall": 0.778500745560526, "f1-score": 0.7505712813234509, "support": 7377 } }, "eval_runtime": 2.6917, "eval_samples_per_second": 1521.735, "eval_steps_per_second": 11.889, "epoch": 1.728, "step": 5400 }, { "loss": 0.0536, "grad_norm": 2.2031803131103516, "learning_rate": 1.3166380631013676e-05, "epoch": 1.7296, "step": 5405 }, { "loss": 0.0522, "grad_norm": 0.6857964396476746, "learning_rate": 1.3108639527241015e-05, "epoch": 1.7311999999999999, "step": 5410 }, { "loss": 0.0604, "grad_norm": 0.513286828994751, "learning_rate": 1.3050989897251865e-05, "epoch": 1.7328000000000001, "step": 5415 }, { "loss": 0.0583, "grad_norm": 0.8243392109870911, "learning_rate": 1.2993432053243072e-05, "epoch": 1.7344, "step": 5420 }, { "loss": 0.042, "grad_norm": 2.6605751514434814, "learning_rate": 1.2935966306914455e-05, "epoch": 1.736, "step": 5425 }, { "loss": 0.0543, "grad_norm": 0.9517694115638733, "learning_rate": 1.2878592969467054e-05, "epoch": 1.7376, "step": 5430 }, { "loss": 0.06, "grad_norm": 0.7506289482116699, "learning_rate": 1.2821312351601503e-05, "epoch": 1.7391999999999999, "step": 5435 }, { "loss": 0.0631, "grad_norm": 1.1797980070114136, "learning_rate": 1.2764124763516323e-05, "epoch": 1.7408000000000001, "step": 5440 }, { "loss": 0.0539, "grad_norm": 0.8965333700180054, "learning_rate": 1.2707030514906207e-05, "epoch": 1.7424, "step": 5445 }, { "loss": 0.044, "grad_norm": 0.4877997934818268, "learning_rate": 1.2650029914960406e-05, "epoch": 1.744, "step": 5450 }, { "loss": 0.0577, "grad_norm": 0.8100159764289856, "learning_rate": 1.2593123272361026e-05, "epoch": 1.7456, "step": 5455 }, { "loss": 0.0563, "grad_norm": 0.4702410101890564, "learning_rate": 1.2536310895281322e-05, "epoch": 1.7471999999999999, "step": 5460 }, { "loss": 0.0488, "grad_norm": 0.7850738763809204, "learning_rate": 1.2479593091384093e-05, "epoch": 1.7488000000000001, "step": 5465 }, { "loss": 0.0662, "grad_norm": 0.7510388493537903, "learning_rate": 1.2422970167819992e-05, "epoch": 1.7504, "step": 5470 }, { "loss": 0.0611, "grad_norm": 0.5892331600189209, "learning_rate": 1.236644243122581e-05, "epoch": 1.752, "step": 5475 }, { "loss": 0.0726, "grad_norm": 0.5672028660774231, "learning_rate": 1.2310010187722904e-05, "epoch": 1.7536, "step": 5480 }, { "loss": 0.0594, "grad_norm": 0.8108931183815002, "learning_rate": 1.2253673742915488e-05, "epoch": 1.7551999999999999, "step": 5485 }, { "loss": 0.0439, "grad_norm": 0.5195989012718201, "learning_rate": 1.2197433401888961e-05, "epoch": 1.7568000000000001, "step": 5490 }, { "loss": 0.0513, "grad_norm": 1.0256688594818115, "learning_rate": 1.2141289469208307e-05, "epoch": 1.7584, "step": 5495 }, { "loss": 0.0541, "grad_norm": 0.6065859198570251, "learning_rate": 1.2085242248916423e-05, "epoch": 1.76, "step": 5500 }, { "loss": 0.0635, "grad_norm": 0.5748668909072876, "learning_rate": 1.202929204453243e-05, "epoch": 1.7616, "step": 5505 }, { "loss": 0.0461, "grad_norm": 0.5684790015220642, "learning_rate": 1.197343915905011e-05, "epoch": 1.7631999999999999, "step": 5510 }, { "loss": 0.0613, "grad_norm": 0.8853943943977356, "learning_rate": 1.191768389493619e-05, "epoch": 1.7648000000000001, "step": 5515 }, { "loss": 0.0585, "grad_norm": 0.5165310502052307, "learning_rate": 1.1862026554128756e-05, "epoch": 1.7664, "step": 5520 }, { "loss": 0.0656, "grad_norm": 0.8656335473060608, "learning_rate": 1.1806467438035609e-05, "epoch": 1.768, "step": 5525 }, { "loss": 0.073, "grad_norm": 0.5189085602760315, "learning_rate": 1.1751006847532564e-05, "epoch": 1.7696, "step": 5530 }, { "loss": 0.0637, "grad_norm": 0.7718567252159119, "learning_rate": 1.1695645082961973e-05, "epoch": 1.7711999999999999, "step": 5535 }, { "loss": 0.0625, "grad_norm": 1.012103796005249, "learning_rate": 1.1640382444130927e-05, "epoch": 1.7728000000000002, "step": 5540 }, { "loss": 0.0421, "grad_norm": 0.5390584468841553, "learning_rate": 1.1585219230309725e-05, "epoch": 1.7744, "step": 5545 }, { "loss": 0.0572, "grad_norm": 0.5569730401039124, "learning_rate": 1.1530155740230253e-05, "epoch": 1.776, "step": 5550 }, { "loss": 0.0675, "grad_norm": 0.6209940314292908, "learning_rate": 1.1475192272084353e-05, "epoch": 1.7776, "step": 5555 }, { "loss": 0.0581, "grad_norm": 0.640369176864624, "learning_rate": 1.1420329123522206e-05, "epoch": 1.7792, "step": 5560 }, { "loss": 0.0537, "grad_norm": 0.7028957009315491, "learning_rate": 1.1365566591650693e-05, "epoch": 1.7808000000000002, "step": 5565 }, { "loss": 0.0506, "grad_norm": 0.5617539286613464, "learning_rate": 1.131090497303184e-05, "epoch": 1.7824, "step": 5570 }, { "loss": 0.0493, "grad_norm": 0.6098236441612244, "learning_rate": 1.1256344563681202e-05, "epoch": 1.784, "step": 5575 }, { "loss": 0.0636, "grad_norm": 0.7526241540908813, "learning_rate": 1.1201885659066196e-05, "epoch": 1.7856, "step": 5580 }, { "loss": 0.0607, "grad_norm": 0.5181493759155273, "learning_rate": 1.1147528554104564e-05, "epoch": 1.7872, "step": 5585 }, { "loss": 0.063, "grad_norm": 0.7684550881385803, "learning_rate": 1.1093273543162807e-05, "epoch": 1.7888, "step": 5590 }, { "loss": 0.0518, "grad_norm": 0.45765841007232666, "learning_rate": 1.1039120920054474e-05, "epoch": 1.7904, "step": 5595 }, { "loss": 0.0474, "grad_norm": 0.4420490264892578, "learning_rate": 1.098507097803869e-05, "epoch": 1.792, "step": 5600 }, { "eval_loss": 0.0899905115365982, "eval_f1": 0.7504558478770513, "eval_recall": 0.7810763182865663, "eval_accuracy": 0.9682359758453124, "eval_precision": 0.7221456322847475, "eval_classification_report": { "LOC": { "precision": 0.6540120793787748, "recall": 0.6973321067157314, "f1-score": 0.6749777382012467, "support": 1087 }, "ORG": { "precision": 0.6482796286182414, "recall": 0.7007083825265643, "f1-score": 0.6734751773049644, "support": 1694 }, "PER": { "precision": 0.8355868036776636, "recall": 0.9006120664529291, "f1-score": 0.8668817505961565, "support": 3431 }, "PRD": { "precision": 0.5631293570875291, "recall": 0.6240343347639485, "f1-score": 0.5920195439739414, "support": 1165 }, "micro avg": { "precision": 0.7221456322847475, "recall": 0.7810763182865663, "f1-score": 0.7504558478770513, "support": 7377 }, "macro avg": { "precision": 0.6752519671905521, "recall": 0.7306717226147933, "f1-score": 0.7018385525190773, "support": 7377 }, "weighted avg": { "precision": 0.7227925776859243, "recall": 0.7810763182865663, "f1-score": 0.7507851168231557, "support": 7377 } }, "eval_runtime": 2.5096, "eval_samples_per_second": 1632.1, "eval_steps_per_second": 12.751, "epoch": 1.792, "step": 5600 }, { "loss": 0.061, "grad_norm": 0.64898282289505, "learning_rate": 1.0931124009818508e-05, "epoch": 1.7936, "step": 5605 }, { "loss": 0.049, "grad_norm": 0.35064372420310974, "learning_rate": 1.087728030753931e-05, "epoch": 1.7952, "step": 5610 }, { "loss": 0.0559, "grad_norm": 0.7362012267112732, "learning_rate": 1.0823540162787281e-05, "epoch": 1.7968, "step": 5615 }, { "loss": 0.0487, "grad_norm": 0.7440046668052673, "learning_rate": 1.0769903866587797e-05, "epoch": 1.7984, "step": 5620 }, { "loss": 0.0621, "grad_norm": 0.9273952841758728, "learning_rate": 1.071637170940382e-05, "epoch": 1.8, "step": 5625 }, { "loss": 0.0487, "grad_norm": 0.39958876371383667, "learning_rate": 1.06629439811344e-05, "epoch": 1.8016, "step": 5630 }, { "loss": 0.0486, "grad_norm": 0.5148971676826477, "learning_rate": 1.0609620971113012e-05, "epoch": 1.8032, "step": 5635 }, { "loss": 0.0641, "grad_norm": 0.5023757815361023, "learning_rate": 1.0556402968106075e-05, "epoch": 1.8048, "step": 5640 }, { "loss": 0.0599, "grad_norm": 0.6529368758201599, "learning_rate": 1.050329026031135e-05, "epoch": 1.8064, "step": 5645 }, { "loss": 0.0704, "grad_norm": 0.7101647257804871, "learning_rate": 1.0450283135356347e-05, "epoch": 1.808, "step": 5650 }, { "loss": 0.0688, "grad_norm": 0.8530227541923523, "learning_rate": 1.0397381880296837e-05, "epoch": 1.8096, "step": 5655 }, { "loss": 0.0585, "grad_norm": 0.6301195621490479, "learning_rate": 1.0344586781615255e-05, "epoch": 1.8112, "step": 5660 }, { "loss": 0.0619, "grad_norm": 0.5728119015693665, "learning_rate": 1.0291898125219129e-05, "epoch": 1.8128, "step": 5665 }, { "loss": 0.0493, "grad_norm": 0.5901598930358887, "learning_rate": 1.0239316196439584e-05, "epoch": 1.8144, "step": 5670 }, { "loss": 0.067, "grad_norm": 0.6017441153526306, "learning_rate": 1.0186841280029774e-05, "epoch": 1.8159999999999998, "step": 5675 }, { "loss": 0.0594, "grad_norm": 1.1164287328720093, "learning_rate": 1.0134473660163305e-05, "epoch": 1.8176, "step": 5680 }, { "loss": 0.0671, "grad_norm": 0.46879228949546814, "learning_rate": 1.0082213620432754e-05, "epoch": 1.8192, "step": 5685 }, { "loss": 0.0468, "grad_norm": 0.597293496131897, "learning_rate": 1.003006144384811e-05, "epoch": 1.8208, "step": 5690 }, { "loss": 0.0527, "grad_norm": 1.1491529941558838, "learning_rate": 9.978017412835205e-06, "epoch": 1.8224, "step": 5695 }, { "loss": 0.0607, "grad_norm": 0.4876425266265869, "learning_rate": 9.926081809234264e-06, "epoch": 1.8239999999999998, "step": 5700 }, { "loss": 0.0679, "grad_norm": 1.8595439195632935, "learning_rate": 9.874254914298277e-06, "epoch": 1.8256000000000001, "step": 5705 }, { "loss": 0.0521, "grad_norm": 0.34880518913269043, "learning_rate": 9.8225370086916e-06, "epoch": 1.8272, "step": 5710 }, { "loss": 0.0429, "grad_norm": 0.32612618803977966, "learning_rate": 9.770928372488308e-06, "epoch": 1.8288, "step": 5715 }, { "loss": 0.0477, "grad_norm": 0.43178775906562805, "learning_rate": 9.719429285170738e-06, "epoch": 1.8304, "step": 5720 }, { "loss": 0.0621, "grad_norm": 0.6590454578399658, "learning_rate": 9.668040025628035e-06, "epoch": 1.8319999999999999, "step": 5725 }, { "loss": 0.052, "grad_norm": 0.6318001747131348, "learning_rate": 9.616760872154513e-06, "epoch": 1.8336000000000001, "step": 5730 }, { "loss": 0.0551, "grad_norm": 0.592621922492981, "learning_rate": 9.565592102448235e-06, "epoch": 1.8352, "step": 5735 }, { "loss": 0.0525, "grad_norm": 0.5423243641853333, "learning_rate": 9.514533993609504e-06, "epoch": 1.8368, "step": 5740 }, { "loss": 0.0702, "grad_norm": 1.3273900747299194, "learning_rate": 9.463586822139339e-06, "epoch": 1.8384, "step": 5745 }, { "loss": 0.0812, "grad_norm": 0.6305306553840637, "learning_rate": 9.412750863937996e-06, "epoch": 1.8399999999999999, "step": 5750 }, { "loss": 0.0605, "grad_norm": 0.8204154968261719, "learning_rate": 9.362026394303443e-06, "epoch": 1.8416000000000001, "step": 5755 }, { "loss": 0.0567, "grad_norm": 0.6160128116607666, "learning_rate": 9.311413687929906e-06, "epoch": 1.8432, "step": 5760 }, { "loss": 0.0497, "grad_norm": 0.6393466591835022, "learning_rate": 9.260913018906375e-06, "epoch": 1.8448, "step": 5765 }, { "loss": 0.0564, "grad_norm": 0.5231694579124451, "learning_rate": 9.210524660715086e-06, "epoch": 1.8464, "step": 5770 }, { "loss": 0.0525, "grad_norm": 0.3782692551612854, "learning_rate": 9.16024888623008e-06, "epoch": 1.8479999999999999, "step": 5775 }, { "loss": 0.067, "grad_norm": 0.5416700839996338, "learning_rate": 9.110085967715723e-06, "epoch": 1.8496000000000001, "step": 5780 }, { "loss": 0.0577, "grad_norm": 0.5100595951080322, "learning_rate": 9.060036176825188e-06, "epoch": 1.8512, "step": 5785 }, { "loss": 0.0479, "grad_norm": 0.45913222432136536, "learning_rate": 9.010099784599036e-06, "epoch": 1.8528, "step": 5790 }, { "loss": 0.0714, "grad_norm": 1.3072829246520996, "learning_rate": 8.960277061463735e-06, "epoch": 1.8544, "step": 5795 }, { "loss": 0.0625, "grad_norm": 0.6462113261222839, "learning_rate": 8.910568277230148e-06, "epoch": 1.8559999999999999, "step": 5800 }, { "eval_loss": 0.08891498297452927, "eval_f1": 0.753665110775097, "eval_recall": 0.7770096245086079, "eval_accuracy": 0.9682868064168514, "eval_precision": 0.7316824100076589, "eval_classification_report": { "LOC": { "precision": 0.6537102473498233, "recall": 0.6807727690892365, "f1-score": 0.6669671022983326, "support": 1087 }, "ORG": { "precision": 0.6558726673984633, "recall": 0.70543093270366, "f1-score": 0.6797497155858931, "support": 1694 }, "PER": { "precision": 0.8421913327882257, "recall": 0.9006120664529291, "f1-score": 0.8704225352112677, "support": 3431 }, "PRD": { "precision": 0.5838150289017341, "recall": 0.6068669527896996, "f1-score": 0.5951178451178452, "support": 1165 }, "micro avg": { "precision": 0.7316824100076589, "recall": 0.7770096245086079, "f1-score": 0.753665110775097, "support": 7377 }, "macro avg": { "precision": 0.6838973191095616, "recall": 0.7234206802588813, "f1-score": 0.7030642995533346, "support": 7377 }, "weighted avg": { "precision": 0.7308301896311749, "recall": 0.7770096245086079, "f1-score": 0.7531816817504324, "support": 7377 } }, "eval_runtime": 5.1913, "eval_samples_per_second": 789.016, "eval_steps_per_second": 6.164, "epoch": 1.8559999999999999, "step": 5800 } ], "best_metric": null, "best_model_checkpoint": null, "is_local_process_zero": true, "is_world_process_zero": true, "is_hyper_param_search": false, "trial_name": null, "trial_params": null, "stateful_callbacks": { "TrainerControl": { "args": { "should_training_stop": false, "should_epoch_stop": false, "should_save": false, "should_evaluate": false, "should_log": false }, "attributes": {} } } }