End of training
Browse files- README.md +14 -13
- all_results.json +26 -0
- eval_results.json +12 -0
- predict_results.json +10 -0
- predictions.txt +0 -0
- tb/events.out.tfevents.1725570294.c3806e32a2f8.1237.1 +3 -0
- train.log +48 -0
- train_results.json +9 -0
- trainer_state.json +218 -0
README.md
CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: michiyasunaga/BioLinkBERT-base
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- drugtemist-en-9-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
@@ -18,24 +19,24 @@ model-index:
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
-
name: drugtemist-en-9-ner
|
22 |
-
type: drugtemist-en-9-ner
|
23 |
config: DrugTEMIST English NER
|
24 |
split: validation
|
25 |
args: DrugTEMIST English NER
|
26 |
metrics:
|
27 |
- name: Precision
|
28 |
type: precision
|
29 |
-
value: 0.
|
30 |
- name: Recall
|
31 |
type: recall
|
32 |
-
value: 0.
|
33 |
- name: F1
|
34 |
type: f1
|
35 |
-
value: 0.
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
-
value: 0.
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
-
This model is a fine-tuned version of [michiyasunaga/BioLinkBERT-base](https://huggingface.co/michiyasunaga/BioLinkBERT-base) on the drugtemist-en-9-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss: 0.
|
49 |
-
- Precision: 0.
|
50 |
-
- Recall: 0.
|
51 |
-
- F1: 0.
|
52 |
-
- Accuracy: 0.
|
53 |
|
54 |
## Model description
|
55 |
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: michiyasunaga/BioLinkBERT-base
|
5 |
tags:
|
6 |
+
- token-classification
|
7 |
- generated_from_trainer
|
8 |
datasets:
|
9 |
+
- Rodrigo1771/drugtemist-en-9-ner
|
10 |
metrics:
|
11 |
- precision
|
12 |
- recall
|
|
|
19 |
name: Token Classification
|
20 |
type: token-classification
|
21 |
dataset:
|
22 |
+
name: Rodrigo1771/drugtemist-en-9-ner
|
23 |
+
type: Rodrigo1771/drugtemist-en-9-ner
|
24 |
config: DrugTEMIST English NER
|
25 |
split: validation
|
26 |
args: DrugTEMIST English NER
|
27 |
metrics:
|
28 |
- name: Precision
|
29 |
type: precision
|
30 |
+
value: 0.9297597042513863
|
31 |
- name: Recall
|
32 |
type: recall
|
33 |
+
value: 0.9375582479030755
|
34 |
- name: F1
|
35 |
type: f1
|
36 |
+
value: 0.9336426914153132
|
37 |
- name: Accuracy
|
38 |
type: accuracy
|
39 |
+
value: 0.9987999888371054
|
40 |
---
|
41 |
|
42 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
44 |
|
45 |
# output
|
46 |
|
47 |
+
This model is a fine-tuned version of [michiyasunaga/BioLinkBERT-base](https://huggingface.co/michiyasunaga/BioLinkBERT-base) on the Rodrigo1771/drugtemist-en-9-ner dataset.
|
48 |
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.0046
|
50 |
+
- Precision: 0.9298
|
51 |
+
- Recall: 0.9376
|
52 |
+
- F1: 0.9336
|
53 |
+
- Accuracy: 0.9988
|
54 |
|
55 |
## Model description
|
56 |
|
all_results.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9987999888371054,
|
4 |
+
"eval_f1": 0.9336426914153132,
|
5 |
+
"eval_loss": 0.004605981521308422,
|
6 |
+
"eval_precision": 0.9297597042513863,
|
7 |
+
"eval_recall": 0.9375582479030755,
|
8 |
+
"eval_runtime": 13.2724,
|
9 |
+
"eval_samples": 6946,
|
10 |
+
"eval_samples_per_second": 523.342,
|
11 |
+
"eval_steps_per_second": 65.474,
|
12 |
+
"predict_accuracy": 0.9986882326988529,
|
13 |
+
"predict_f1": 0.9205633802816902,
|
14 |
+
"predict_loss": 0.004973105154931545,
|
15 |
+
"predict_precision": 0.8938730853391685,
|
16 |
+
"predict_recall": 0.9488966318234611,
|
17 |
+
"predict_runtime": 25.752,
|
18 |
+
"predict_samples_per_second": 571.412,
|
19 |
+
"predict_steps_per_second": 71.451,
|
20 |
+
"total_flos": 1.1151464037050934e+16,
|
21 |
+
"train_loss": 0.002938754050150616,
|
22 |
+
"train_runtime": 1039.0289,
|
23 |
+
"train_samples": 27967,
|
24 |
+
"train_samples_per_second": 269.165,
|
25 |
+
"train_steps_per_second": 4.206
|
26 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9987999888371054,
|
4 |
+
"eval_f1": 0.9336426914153132,
|
5 |
+
"eval_loss": 0.004605981521308422,
|
6 |
+
"eval_precision": 0.9297597042513863,
|
7 |
+
"eval_recall": 0.9375582479030755,
|
8 |
+
"eval_runtime": 13.2724,
|
9 |
+
"eval_samples": 6946,
|
10 |
+
"eval_samples_per_second": 523.342,
|
11 |
+
"eval_steps_per_second": 65.474
|
12 |
+
}
|
predict_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_accuracy": 0.9986882326988529,
|
3 |
+
"predict_f1": 0.9205633802816902,
|
4 |
+
"predict_loss": 0.004973105154931545,
|
5 |
+
"predict_precision": 0.8938730853391685,
|
6 |
+
"predict_recall": 0.9488966318234611,
|
7 |
+
"predict_runtime": 25.752,
|
8 |
+
"predict_samples_per_second": 571.412,
|
9 |
+
"predict_steps_per_second": 71.451
|
10 |
+
}
|
predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725570294.c3806e32a2f8.1237.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8921f39abba2cbc517633b1fc318ac0654a4e096e416d5ce4dde888385d854c5
|
3 |
+
size 560
|
train.log
CHANGED
@@ -1280,3 +1280,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
1280 |
{'eval_loss': 0.00707679707556963, 'eval_precision': 0.924860853432282, 'eval_recall': 0.9291705498602051, 'eval_f1': 0.9270106927010694, 'eval_accuracy': 0.9986534758462869, 'eval_runtime': 13.4189, 'eval_samples_per_second': 517.629, 'eval_steps_per_second': 64.76, 'epoch': 10.0}
|
1281 |
{'train_runtime': 1039.0289, 'train_samples_per_second': 269.165, 'train_steps_per_second': 4.206, 'train_loss': 0.002938754050150616, 'epoch': 10.0}
|
1282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1283 |
0%| | 0/869 [00:00<?, ?it/s]
|
1284 |
1%| | 10/869 [00:00<00:08, 98.30it/s]
|
1285 |
2%|▏ | 20/869 [00:00<00:09, 88.58it/s]
|
1286 |
3%|▎ | 29/869 [00:00<00:09, 88.26it/s]
|
1287 |
4%|▍ | 38/869 [00:00<00:09, 88.48it/s]
|
1288 |
6%|▌ | 48/869 [00:00<00:09, 90.80it/s]
|
1289 |
7%|▋ | 58/869 [00:00<00:08, 91.97it/s]
|
1290 |
8%|▊ | 68/869 [00:00<00:08, 90.67it/s]
|
1291 |
9%|▉ | 78/869 [00:00<00:08, 90.51it/s]
|
1292 |
10%|█ | 89/869 [00:00<00:08, 93.51it/s]
|
1293 |
12%|█▏ | 100/869 [00:01<00:08, 95.79it/s]
|
1294 |
13%|█▎ | 110/869 [00:01<00:08, 94.19it/s]
|
1295 |
14%|█▍ | 120/869 [00:01<00:07, 94.41it/s]
|
1296 |
15%|█▍ | 130/869 [00:01<00:07, 93.70it/s]
|
1297 |
16%|█▌ | 140/869 [00:01<00:07, 94.25it/s]
|
1298 |
17%|█▋ | 150/869 [00:01<00:07, 91.67it/s]
|
1299 |
18%|█▊ | 160/869 [00:01<00:07, 90.44it/s]
|
1300 |
20%|█▉ | 170/869 [00:01<00:07, 88.93it/s]
|
1301 |
21%|██ | 179/869 [00:01<00:07, 88.51it/s]
|
1302 |
22%|██▏ | 189/869 [00:02<00:07, 90.19it/s]
|
1303 |
23%|██▎ | 199/869 [00:02<00:07, 89.37it/s]
|
1304 |
24%|██▍ | 209/869 [00:02<00:07, 90.40it/s]
|
1305 |
25%|██▌ | 219/869 [00:02<00:07, 92.85it/s]
|
1306 |
26%|██▋ | 229/869 [00:02<00:07, 90.28it/s]
|
1307 |
28%|██▊ | 239/869 [00:02<00:07, 87.08it/s]
|
1308 |
29%|██▊ | 248/869 [00:02<00:07, 82.72it/s]
|
1309 |
30%|██▉ | 258/869 [00:02<00:07, 86.16it/s]
|
1310 |
31%|███ | 267/869 [00:02<00:06, 86.93it/s]
|
1311 |
32%|███▏ | 276/869 [00:03<00:06, 87.45it/s]
|
1312 |
33%|███▎ | 285/869 [00:03<00:06, 87.83it/s]
|
1313 |
34%|███▍ | 295/869 [00:03<00:06, 88.99it/s]
|
1314 |
35%|███▍ | 304/869 [00:03<00:06, 87.61it/s]
|
1315 |
36%|███▌ | 313/869 [00:03<00:06, 86.93it/s]
|
1316 |
37%|███▋ | 322/869 [00:03<00:06, 85.96it/s]
|
1317 |
38%|███▊ | 332/869 [00:03<00:06, 88.73it/s]
|
1318 |
39%|███▉ | 341/869 [00:03<00:05, 88.78it/s]
|
1319 |
40%|████ | 350/869 [00:03<00:05, 87.18it/s]
|
1320 |
41%|████▏ | 360/869 [00:04<00:05, 90.49it/s]
|
1321 |
43%|████▎ | 370/869 [00:04<00:05, 92.05it/s]
|
1322 |
44%|████▎ | 380/869 [00:04<00:05, 89.42it/s]
|
1323 |
45%|████▍ | 389/869 [00:04<00:05, 86.57it/s]
|
1324 |
46%|████▌ | 399/869 [00:04<00:05, 89.33it/s]
|
1325 |
47%|████▋ | 409/869 [00:04<00:05, 91.90it/s]
|
1326 |
48%|████▊ | 419/869 [00:04<00:04, 92.49it/s]
|
1327 |
49%|████▉ | 429/869 [00:04<00:04, 92.52it/s]
|
1328 |
51%|█████ | 439/869 [00:04<00:04, 90.46it/s]
|
1329 |
52%|█████▏ | 449/869 [00:04<00:04, 90.02it/s]
|
1330 |
53%|█████▎ | 459/869 [00:05<00:04, 90.68it/s]
|
1331 |
54%|█████▍ | 469/869 [00:05<00:04, 91.56it/s]
|
1332 |
55%|█████▌ | 480/869 [00:05<00:04, 94.03it/s]
|
1333 |
56%|█████▋ | 490/869 [00:05<00:04, 91.51it/s]
|
1334 |
58%|█████▊ | 500/869 [00:05<00:03, 92.57it/s]
|
1335 |
59%|█████▊ | 510/869 [00:05<00:03, 90.78it/s]
|
1336 |
60%|█████▉ | 520/869 [00:05<00:03, 93.29it/s]
|
1337 |
61%|██████ | 530/869 [00:05<00:03, 88.69it/s]
|
1338 |
62%|██████▏ | 539/869 [00:05<00:03, 88.40it/s]
|
1339 |
63%|██████▎ | 549/869 [00:06<00:03, 90.30it/s]
|
1340 |
64%|██████▍ | 559/869 [00:06<00:03, 90.75it/s]
|
1341 |
65%|██████▌ | 569/869 [00:06<00:03, 90.68it/s]
|
1342 |
67%|██████▋ | 579/869 [00:06<00:03, 92.01it/s]
|
1343 |
68%|██████▊ | 589/869 [00:06<00:03, 90.49it/s]
|
1344 |
69%|██████▉ | 599/869 [00:06<00:02, 92.77it/s]
|
1345 |
70%|███████ | 609/869 [00:06<00:02, 93.32it/s]
|
1346 |
71%|███████ | 619/869 [00:06<00:02, 92.00it/s]
|
1347 |
72%|███████▏ | 629/869 [00:06<00:02, 92.91it/s]
|
1348 |
74%|███████▎ | 639/869 [00:07<00:02, 91.85it/s]
|
1349 |
75%|███████▍ | 649/869 [00:07<00:02, 93.27it/s]
|
1350 |
76%|███████▌ | 659/869 [00:07<00:02, 92.46it/s]
|
1351 |
77%|███████▋ | 670/869 [00:07<00:02, 94.83it/s]
|
1352 |
78%|███████▊ | 680/869 [00:07<00:01, 95.72it/s]
|
1353 |
79%|███████▉ | 690/869 [00:07<00:02, 88.88it/s]
|
1354 |
81%|████████ | 700/869 [00:07<00:01, 90.68it/s]
|
1355 |
82%|████████▏ | 710/869 [00:07<00:01, 91.37it/s]
|
1356 |
83%|████████▎ | 720/869 [00:07<00:01, 90.58it/s]
|
1357 |
84%|████████▍ | 730/869 [00:08<00:01, 92.01it/s]
|
1358 |
85%|████████▌ | 740/869 [00:08<00:01, 91.19it/s]
|
1359 |
86%|████████▋ | 750/869 [00:08<00:01, 91.65it/s]
|
1360 |
87%|████████▋ | 760/869 [00:08<00:01, 92.57it/s]
|
1361 |
89%|████████▊ | 770/869 [00:08<00:01, 90.79it/s]
|
1362 |
90%|████████▉ | 780/869 [00:08<00:01, 85.70it/s]
|
1363 |
91%|█████████ | 790/869 [00:08<00:00, 89.02it/s]
|
1364 |
92%|█████████▏| 800/869 [00:08<00:00, 90.94it/s]
|
1365 |
93%|█████████▎| 810/869 [00:08<00:00, 92.20it/s]
|
1366 |
94%|█████████▍| 820/869 [00:09<00:00, 92.22it/s]
|
1367 |
96%|█████████▌| 830/869 [00:09<00:00, 93.70it/s]
|
1368 |
97%|█████████▋| 840/869 [00:09<00:00, 93.56it/s]
|
1369 |
98%|█████████▊| 850/869 [00:09<00:00, 95.06it/s]
|
1370 |
99%|█████████▉| 860/869 [00:09<00:00, 92.13it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1371 |
0%| | 0/1840 [00:00<?, ?it/s]
|
1372 |
1%| | 11/1840 [00:00<00:18, 100.66it/s]
|
1373 |
1%| | 22/1840 [00:00<00:20, 87.32it/s]
|
1374 |
2%|▏ | 32/1840 [00:00<00:20, 89.81it/s]
|
1375 |
2%|▏ | 42/1840 [00:00<00:19, 91.46it/s]
|
1376 |
3%|▎ | 52/1840 [00:00<00:19, 92.87it/s]
|
1377 |
3%|▎ | 62/1840 [00:00<00:19, 90.69it/s]
|
1378 |
4%|▍ | 72/1840 [00:00<00:19, 91.57it/s]
|
1379 |
4%|▍ | 82/1840 [00:00<00:18, 92.80it/s]
|
1380 |
5%|▌ | 92/1840 [00:01<00:19, 91.99it/s]
|
1381 |
6%|▌ | 102/1840 [00:01<00:18, 93.57it/s]
|
1382 |
6%|▌ | 112/1840 [00:01<00:18, 93.70it/s]
|
1383 |
7%|▋ | 122/1840 [00:01<00:18, 92.44it/s]
|
1384 |
7%|▋ | 132/1840 [00:01<00:18, 91.33it/s]
|
1385 |
8%|▊ | 142/1840 [00:01<00:18, 91.08it/s]
|
1386 |
8%|▊ | 152/1840 [00:01<00:18, 92.76it/s]
|
1387 |
9%|▉ | 162/1840 [00:01<00:18, 89.12it/s]
|
1388 |
9%|▉ | 172/1840 [00:01<00:18, 91.00it/s]
|
1389 |
10%|▉ | 182/1840 [00:01<00:17, 93.30it/s]
|
1390 |
10%|█ | 192/1840 [00:02<00:17, 93.99it/s]
|
1391 |
11%|█ | 202/1840 [00:02<00:17, 92.71it/s]
|
1392 |
12%|█▏ | 212/1840 [00:02<00:17, 92.90it/s]
|
1393 |
12%|█▏ | 222/1840 [00:02<00:17, 93.02it/s]
|
1394 |
13%|█▎ | 232/1840 [00:02<00:17, 90.92it/s]
|
1395 |
13%|█▎ | 242/1840 [00:02<00:17, 89.04it/s]
|
1396 |
14%|█▎ | 251/1840 [00:02<00:17, 89.29it/s]
|
1397 |
14%|█▍ | 261/1840 [00:02<00:17, 90.53it/s]
|
1398 |
15%|█▍ | 271/1840 [00:02<00:17, 92.01it/s]
|
1399 |
15%|█▌ | 282/1840 [00:03<00:16, 94.95it/s]
|
1400 |
16%|█▌ | 292/1840 [00:03<00:16, 94.44it/s]
|
1401 |
16%|█▋ | 302/1840 [00:03<00:16, 95.06it/s]
|
1402 |
17%|█▋ | 312/1840 [00:03<00:16, 94.21it/s]
|
1403 |
18%|█▊ | 322/1840 [00:03<00:16, 94.83it/s]
|
1404 |
18%|█▊ | 332/1840 [00:03<00:15, 95.15it/s]
|
1405 |
19%|█▊ | 343/1840 [00:03<00:15, 96.70it/s]
|
1406 |
19%|█▉ | 353/1840 [00:03<00:15, 93.88it/s]
|
1407 |
20%|█▉ | 363/1840 [00:03<00:15, 94.46it/s]
|
1408 |
20%|██ | 374/1840 [00:04<00:15, 96.46it/s]
|
1409 |
21%|██ | 384/1840 [00:04<00:15, 96.58it/s]
|
1410 |
21%|██▏ | 394/1840 [00:04<00:15, 94.52it/s]
|
1411 |
22%|██▏ | 404/1840 [00:04<00:15, 93.54it/s]
|
1412 |
22%|██▎ | 414/1840 [00:04<00:15, 91.33it/s]
|
1413 |
23%|██▎ | 424/1840 [00:04<00:15, 90.95it/s]
|
1414 |
24%|██▎ | 434/1840 [00:04<00:15, 90.46it/s]
|
1415 |
24%|██▍ | 444/1840 [00:04<00:15, 92.37it/s]
|
1416 |
25%|██▍ | 454/1840 [00:04<00:14, 92.51it/s]
|
1417 |
25%|██▌ | 464/1840 [00:05<00:14, 93.47it/s]
|
1418 |
26%|██▌ | 474/1840 [00:05<00:14, 92.31it/s]
|
1419 |
26%|██▋ | 484/1840 [00:05<00:14, 90.63it/s]
|
1420 |
27%|██▋ | 494/1840 [00:05<00:14, 90.70it/s]
|
1421 |
27%|██▋ | 504/1840 [00:05<00:14, 91.02it/s]
|
1422 |
28%|██▊ | 514/1840 [00:05<00:14, 93.08it/s]
|
1423 |
28%|██▊ | 524/1840 [00:05<00:13, 94.07it/s]
|
1424 |
29%|██▉ | 534/1840 [00:05<00:13, 94.78it/s]
|
1425 |
30%|██▉ | 544/1840 [00:05<00:13, 93.68it/s]
|
1426 |
30%|███ | 554/1840 [00:05<00:13, 94.96it/s]
|
1427 |
31%|███ | 564/1840 [00:06<00:13, 94.31it/s]
|
1428 |
31%|███ | 574/1840 [00:06<00:13, 95.70it/s]
|
1429 |
32%|███▏ | 584/1840 [00:06<00:13, 96.46it/s]
|
1430 |
32%|███▏ | 594/1840 [00:06<00:13, 93.35it/s]
|
1431 |
33%|███▎ | 604/1840 [00:06<00:13, 91.14it/s]
|
1432 |
33%|███▎ | 614/1840 [00:06<00:13, 92.08it/s]
|
1433 |
34%|███▍ | 624/1840 [00:06<00:13, 90.07it/s]
|
1434 |
34%|███▍ | 634/1840 [00:06<00:13, 92.36it/s]
|
1435 |
35%|███▌ | 644/1840 [00:06<00:12, 94.27it/s]
|
1436 |
36%|███▌ | 654/1840 [00:07<00:12, 95.15it/s]
|
1437 |
36%|███▌ | 664/1840 [00:07<00:12, 91.92it/s]
|
1438 |
37%|███▋ | 675/1840 [00:07<00:12, 94.52it/s]
|
1439 |
37%|███▋ | 685/1840 [00:07<00:12, 93.52it/s]
|
1440 |
38%|███▊ | 695/1840 [00:07<00:12, 94.01it/s]
|
1441 |
38%|███▊ | 705/1840 [00:07<00:11, 95.00it/s]
|
1442 |
39%|███▉ | 715/1840 [00:07<00:11, 95.08it/s]
|
1443 |
39%|███▉ | 726/1840 [00:07<00:11, 97.47it/s]
|
1444 |
40%|████ | 737/1840 [00:07<00:11, 98.74it/s]
|
1445 |
41%|████ | 747/1840 [00:08<00:11, 97.58it/s]
|
1446 |
41%|████ | 758/1840 [00:08<00:10, 99.05it/s]
|
1447 |
42%|████▏ | 768/1840 [00:08<00:10, 98.35it/s]
|
1448 |
42%|████▏ | 778/1840 [00:08<00:10, 97.78it/s]
|
1449 |
43%|████▎ | 788/1840 [00:08<00:10, 96.12it/s]
|
1450 |
43%|████▎ | 798/1840 [00:08<00:10, 96.37it/s]
|
1451 |
44%|████▍ | 808/1840 [00:08<00:10, 94.99it/s]
|
1452 |
44%|████▍ | 818/1840 [00:08<00:10, 95.70it/s]
|
1453 |
45%|████▌ | 828/1840 [00:08<00:10, 95.46it/s]
|
1454 |
46%|████▌ | 838/1840 [00:08<00:10, 96.12it/s]
|
1455 |
46%|████▌ | 848/1840 [00:09<00:10, 94.62it/s]
|
1456 |
47%|████▋ | 859/1840 [00:09<00:10, 96.32it/s]
|
1457 |
47%|████▋ | 869/1840 [00:09<00:10, 96.37it/s]
|
1458 |
48%|████▊ | 879/1840 [00:09<00:10, 94.02it/s]
|
1459 |
48%|████▊ | 889/1840 [00:09<00:10, 93.01it/s]
|
1460 |
49%|████▉ | 899/1840 [00:09<00:10, 93.14it/s]
|
1461 |
49%|████▉ | 910/1840 [00:09<00:09, 95.61it/s]
|
1462 |
50%|█████ | 920/1840 [00:09<00:09, 96.63it/s]
|
1463 |
51%|█████ | 930/1840 [00:09<00:09, 96.96it/s]
|
1464 |
51%|█████ | 940/1840 [00:10<00:09, 94.21it/s]
|
1465 |
52%|█████▏ | 950/1840 [00:10<00:09, 91.67it/s]
|
1466 |
52%|█████▏ | 960/1840 [00:10<00:10, 86.59it/s]
|
1467 |
53%|█████▎ | 970/1840 [00:10<00:09, 89.43it/s]
|
1468 |
53%|█████▎ | 980/1840 [00:10<00:09, 91.22it/s]
|
1469 |
54%|█████▍ | 990/1840 [00:10<00:09, 92.21it/s]
|
1470 |
54%|█████▍ | 1000/1840 [00:10<00:09, 92.68it/s]
|
1471 |
55%|█████▍ | 1010/1840 [00:10<00:08, 93.86it/s]
|
1472 |
55%|█████▌ | 1020/1840 [00:10<00:08, 95.17it/s]
|
1473 |
56%|█████▌ | 1031/1840 [00:11<00:08, 96.74it/s]
|
1474 |
57%|█████▋ | 1041/1840 [00:11<00:08, 94.91it/s]
|
1475 |
57%|█████▋ | 1051/1840 [00:11<00:08, 95.37it/s]
|
1476 |
58%|█████▊ | 1061/1840 [00:11<00:08, 94.87it/s]
|
1477 |
58%|█████▊ | 1071/1840 [00:11<00:08, 95.93it/s]
|
1478 |
59%|█████▉ | 1082/1840 [00:11<00:07, 97.63it/s]
|
1479 |
59%|█████▉ | 1092/1840 [00:11<00:07, 96.67it/s]
|
1480 |
60%|█████▉ | 1102/1840 [00:11<00:07, 95.77it/s]
|
1481 |
60%|██████ | 1112/1840 [00:11<00:07, 95.81it/s]
|
1482 |
61%|██████ | 1122/1840 [00:11<00:07, 95.09it/s]
|
1483 |
62%|██████▏ | 1132/1840 [00:12<00:07, 94.79it/s]
|
1484 |
62%|██████▏ | 1142/1840 [00:12<00:07, 94.93it/s]
|
1485 |
63%|██████▎ | 1152/1840 [00:12<00:07, 95.37it/s]
|
1486 |
63%|██████▎ | 1162/1840 [00:12<00:07, 95.48it/s]
|
1487 |
64%|██████▎ | 1172/1840 [00:12<00:07, 92.83it/s]
|
1488 |
64%|██████▍ | 1182/1840 [00:12<00:07, 93.70it/s]
|
1489 |
65%|██████▍ | 1192/1840 [00:12<00:07, 91.77it/s]
|
1490 |
65%|██████▌ | 1202/1840 [00:12<00:06, 93.80it/s]
|
1491 |
66%|██████▌ | 1212/1840 [00:12<00:06, 93.86it/s]
|
1492 |
66%|██████▋ | 1222/1840 [00:13<00:06, 92.60it/s]
|
1493 |
67%|██████▋ | 1232/1840 [00:13<00:06, 92.08it/s]
|
1494 |
68%|██████▊ | 1242/1840 [00:13<00:06, 93.89it/s]
|
1495 |
68%|██████▊ | 1252/1840 [00:13<00:06, 94.06it/s]
|
1496 |
69%|██████▊ | 1262/1840 [00:13<00:06, 92.09it/s]
|
1497 |
69%|██████▉ | 1272/1840 [00:13<00:06, 92.27it/s]
|
1498 |
70%|██████▉ | 1282/1840 [00:13<00:06, 92.93it/s]
|
1499 |
70%|███████ | 1292/1840 [00:13<00:05, 93.23it/s]
|
1500 |
71%|███████ | 1302/1840 [00:13<00:05, 94.53it/s]
|
1501 |
71%|███████▏ | 1313/1840 [00:14<00:05, 96.30it/s]
|
1502 |
72%|███████▏ | 1324/1840 [00:14<00:05, 97.65it/s]
|
1503 |
72%|███████▎ | 1334/1840 [00:14<00:05, 96.65it/s]
|
1504 |
73%|███████▎ | 1344/1840 [00:14<00:05, 96.78it/s]
|
1505 |
74%|███████▎ | 1354/1840 [00:14<00:05, 96.36it/s]
|
1506 |
74%|███████▍ | 1364/1840 [00:14<00:04, 96.05it/s]
|
1507 |
75%|███████▍ | 1374/1840 [00:14<00:04, 96.54it/s]
|
1508 |
75%|███████▌ | 1384/1840 [00:14<00:04, 95.78it/s]
|
1509 |
76%|███████▌ | 1394/1840 [00:14<00:04, 96.55it/s]
|
1510 |
76%|███████▋ | 1404/1840 [00:14<00:04, 96.13it/s]
|
1511 |
77%|███████▋ | 1415/1840 [00:15<00:04, 97.23it/s]
|
1512 |
78%|███████▊ | 1426/1840 [00:15<00:04, 98.32it/s]
|
1513 |
78%|███████▊ | 1436/1840 [00:15<00:04, 95.65it/s]
|
1514 |
79%|███████▊ | 1446/1840 [00:15<00:04, 92.20it/s]
|
1515 |
79%|███████▉ | 1456/1840 [00:15<00:04, 92.15it/s]
|
1516 |
80%|███████▉ | 1466/1840 [00:15<00:03, 93.93it/s]
|
1517 |
80%|████████ | 1476/1840 [00:15<00:03, 94.96it/s]
|
1518 |
81%|████████ | 1486/1840 [00:15<00:03, 92.39it/s]
|
1519 |
81%|████████▏ | 1496/1840 [00:15<00:03, 92.62it/s]
|
1520 |
82%|████████▏ | 1506/1840 [00:16<00:03, 90.94it/s]
|
1521 |
82%|████████▏ | 1516/1840 [00:16<00:03, 89.76it/s]
|
1522 |
83%|████████▎ | 1526/1840 [00:16<00:03, 92.31it/s]
|
1523 |
83%|████████▎ | 1536/1840 [00:16<00:03, 91.36it/s]
|
1524 |
84%|████████▍ | 1546/1840 [00:16<00:03, 91.31it/s]
|
1525 |
85%|████████▍ | 1556/1840 [00:16<00:03, 91.22it/s]
|
1526 |
85%|████████▌ | 1566/1840 [00:16<00:03, 90.24it/s]
|
1527 |
86%|████████▌ | 1576/1840 [00:16<00:02, 89.47it/s]
|
1528 |
86%|████████▌ | 1586/1840 [00:16<00:02, 90.91it/s]
|
1529 |
87%|████████▋ | 1596/1840 [00:17<00:02, 91.45it/s]
|
1530 |
87%|████████▋ | 1606/1840 [00:17<00:02, 92.23it/s]
|
1531 |
88%|████████▊ | 1616/1840 [00:17<00:02, 93.15it/s]
|
1532 |
88%|████████▊ | 1626/1840 [00:17<00:02, 92.75it/s]
|
1533 |
89%|████████▉ | 1636/1840 [00:17<00:02, 86.16it/s]
|
1534 |
89%|████████▉ | 1645/1840 [00:17<00:02, 86.13it/s]
|
1535 |
90%|████████▉ | 1655/1840 [00:17<00:02, 88.10it/s]
|
1536 |
90%|█████████ | 1665/1840 [00:17<00:01, 91.07it/s]
|
1537 |
91%|█████████ | 1675/1840 [00:17<00:01, 91.32it/s]
|
1538 |
92%|█████████▏| 1685/1840 [00:18<00:01, 92.00it/s]
|
1539 |
92%|█████████▏| 1695/1840 [00:18<00:01, 94.04it/s]
|
1540 |
93%|█████████▎| 1705/1840 [00:18<00:01, 92.60it/s]
|
1541 |
93%|█████████▎| 1715/1840 [00:18<00:01, 93.06it/s]
|
1542 |
94%|█████████▍| 1725/1840 [00:18<00:01, 94.15it/s]
|
1543 |
94%|█████████▍| 1735/1840 [00:18<00:01, 91.69it/s]
|
1544 |
95%|█████████▍| 1745/1840 [00:18<00:01, 93.98it/s]
|
1545 |
95%|█████████▌| 1755/1840 [00:18<00:00, 95.51it/s]
|
1546 |
96%|█████████▌| 1765/1840 [00:18<00:00, 96.50it/s]
|
1547 |
96%|█████████▋| 1775/1840 [00:18<00:00, 96.16it/s]
|
1548 |
97%|█████████▋| 1785/1840 [00:19<00:00, 95.98it/s]
|
1549 |
98%|█████████▊| 1795/1840 [00:19<00:00, 96.85it/s]
|
1550 |
98%|█████████▊| 1805/1840 [00:19<00:00, 96.83it/s]
|
1551 |
99%|█████████▊| 1815/1840 [00:19<00:00, 93.07it/s]
|
1552 |
99%|█████████▉| 1825/1840 [00:19<00:00, 94.73it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1280 |
{'eval_loss': 0.00707679707556963, 'eval_precision': 0.924860853432282, 'eval_recall': 0.9291705498602051, 'eval_f1': 0.9270106927010694, 'eval_accuracy': 0.9986534758462869, 'eval_runtime': 13.4189, 'eval_samples_per_second': 517.629, 'eval_steps_per_second': 64.76, 'epoch': 10.0}
|
1281 |
{'train_runtime': 1039.0289, 'train_samples_per_second': 269.165, 'train_steps_per_second': 4.206, 'train_loss': 0.002938754050150616, 'epoch': 10.0}
|
1282 |
|
1283 |
+
***** train metrics *****
|
1284 |
+
epoch = 10.0
|
1285 |
+
total_flos = 10385610GF
|
1286 |
+
train_loss = 0.0029
|
1287 |
+
train_runtime = 0:17:19.02
|
1288 |
+
train_samples = 27967
|
1289 |
+
train_samples_per_second = 269.165
|
1290 |
+
train_steps_per_second = 4.206
|
1291 |
+
09/05/2024 21:04:40 - INFO - __main__ - *** Evaluate ***
|
1292 |
+
[INFO|trainer.py:811] 2024-09-05 21:04:40,816 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
1293 |
+
[INFO|trainer.py:3819] 2024-09-05 21:04:40,819 >>
|
1294 |
+
***** Running Evaluation *****
|
1295 |
+
[INFO|trainer.py:3821] 2024-09-05 21:04:40,819 >> Num examples = 6946
|
1296 |
+
[INFO|trainer.py:3824] 2024-09-05 21:04:40,819 >> Batch size = 8
|
1297 |
+
|
1298 |
0%| | 0/869 [00:00<?, ?it/s]
|
1299 |
1%| | 10/869 [00:00<00:08, 98.30it/s]
|
1300 |
2%|▏ | 20/869 [00:00<00:09, 88.58it/s]
|
1301 |
3%|▎ | 29/869 [00:00<00:09, 88.26it/s]
|
1302 |
4%|▍ | 38/869 [00:00<00:09, 88.48it/s]
|
1303 |
6%|▌ | 48/869 [00:00<00:09, 90.80it/s]
|
1304 |
7%|▋ | 58/869 [00:00<00:08, 91.97it/s]
|
1305 |
8%|▊ | 68/869 [00:00<00:08, 90.67it/s]
|
1306 |
9%|▉ | 78/869 [00:00<00:08, 90.51it/s]
|
1307 |
10%|█ | 89/869 [00:00<00:08, 93.51it/s]
|
1308 |
12%|█▏ | 100/869 [00:01<00:08, 95.79it/s]
|
1309 |
13%|█▎ | 110/869 [00:01<00:08, 94.19it/s]
|
1310 |
14%|█▍ | 120/869 [00:01<00:07, 94.41it/s]
|
1311 |
15%|█▍ | 130/869 [00:01<00:07, 93.70it/s]
|
1312 |
16%|█▌ | 140/869 [00:01<00:07, 94.25it/s]
|
1313 |
17%|█▋ | 150/869 [00:01<00:07, 91.67it/s]
|
1314 |
18%|█▊ | 160/869 [00:01<00:07, 90.44it/s]
|
1315 |
20%|█▉ | 170/869 [00:01<00:07, 88.93it/s]
|
1316 |
21%|██ | 179/869 [00:01<00:07, 88.51it/s]
|
1317 |
22%|██▏ | 189/869 [00:02<00:07, 90.19it/s]
|
1318 |
23%|██▎ | 199/869 [00:02<00:07, 89.37it/s]
|
1319 |
24%|██▍ | 209/869 [00:02<00:07, 90.40it/s]
|
1320 |
25%|██▌ | 219/869 [00:02<00:07, 92.85it/s]
|
1321 |
26%|██▋ | 229/869 [00:02<00:07, 90.28it/s]
|
1322 |
28%|██▊ | 239/869 [00:02<00:07, 87.08it/s]
|
1323 |
29%|██▊ | 248/869 [00:02<00:07, 82.72it/s]
|
1324 |
30%|██▉ | 258/869 [00:02<00:07, 86.16it/s]
|
1325 |
31%|███ | 267/869 [00:02<00:06, 86.93it/s]
|
1326 |
32%|███▏ | 276/869 [00:03<00:06, 87.45it/s]
|
1327 |
33%|███▎ | 285/869 [00:03<00:06, 87.83it/s]
|
1328 |
34%|███▍ | 295/869 [00:03<00:06, 88.99it/s]
|
1329 |
35%|███▍ | 304/869 [00:03<00:06, 87.61it/s]
|
1330 |
36%|███▌ | 313/869 [00:03<00:06, 86.93it/s]
|
1331 |
37%|███▋ | 322/869 [00:03<00:06, 85.96it/s]
|
1332 |
38%|███▊ | 332/869 [00:03<00:06, 88.73it/s]
|
1333 |
39%|███▉ | 341/869 [00:03<00:05, 88.78it/s]
|
1334 |
40%|████ | 350/869 [00:03<00:05, 87.18it/s]
|
1335 |
41%|████▏ | 360/869 [00:04<00:05, 90.49it/s]
|
1336 |
43%|████▎ | 370/869 [00:04<00:05, 92.05it/s]
|
1337 |
44%|████▎ | 380/869 [00:04<00:05, 89.42it/s]
|
1338 |
45%|████▍ | 389/869 [00:04<00:05, 86.57it/s]
|
1339 |
46%|████▌ | 399/869 [00:04<00:05, 89.33it/s]
|
1340 |
47%|████▋ | 409/869 [00:04<00:05, 91.90it/s]
|
1341 |
48%|████▊ | 419/869 [00:04<00:04, 92.49it/s]
|
1342 |
49%|████▉ | 429/869 [00:04<00:04, 92.52it/s]
|
1343 |
51%|█████ | 439/869 [00:04<00:04, 90.46it/s]
|
1344 |
52%|█████▏ | 449/869 [00:04<00:04, 90.02it/s]
|
1345 |
53%|█████▎ | 459/869 [00:05<00:04, 90.68it/s]
|
1346 |
54%|█████▍ | 469/869 [00:05<00:04, 91.56it/s]
|
1347 |
55%|█████▌ | 480/869 [00:05<00:04, 94.03it/s]
|
1348 |
56%|█████▋ | 490/869 [00:05<00:04, 91.51it/s]
|
1349 |
58%|█████▊ | 500/869 [00:05<00:03, 92.57it/s]
|
1350 |
59%|█████▊ | 510/869 [00:05<00:03, 90.78it/s]
|
1351 |
60%|█████▉ | 520/869 [00:05<00:03, 93.29it/s]
|
1352 |
61%|██████ | 530/869 [00:05<00:03, 88.69it/s]
|
1353 |
62%|██████▏ | 539/869 [00:05<00:03, 88.40it/s]
|
1354 |
63%|██████▎ | 549/869 [00:06<00:03, 90.30it/s]
|
1355 |
64%|██████▍ | 559/869 [00:06<00:03, 90.75it/s]
|
1356 |
65%|██████▌ | 569/869 [00:06<00:03, 90.68it/s]
|
1357 |
67%|██████▋ | 579/869 [00:06<00:03, 92.01it/s]
|
1358 |
68%|██████▊ | 589/869 [00:06<00:03, 90.49it/s]
|
1359 |
69%|██████▉ | 599/869 [00:06<00:02, 92.77it/s]
|
1360 |
70%|███████ | 609/869 [00:06<00:02, 93.32it/s]
|
1361 |
71%|███████ | 619/869 [00:06<00:02, 92.00it/s]
|
1362 |
72%|███████▏ | 629/869 [00:06<00:02, 92.91it/s]
|
1363 |
74%|███████▎ | 639/869 [00:07<00:02, 91.85it/s]
|
1364 |
75%|███████▍ | 649/869 [00:07<00:02, 93.27it/s]
|
1365 |
76%|███████▌ | 659/869 [00:07<00:02, 92.46it/s]
|
1366 |
77%|███████▋ | 670/869 [00:07<00:02, 94.83it/s]
|
1367 |
78%|███████▊ | 680/869 [00:07<00:01, 95.72it/s]
|
1368 |
79%|███████▉ | 690/869 [00:07<00:02, 88.88it/s]
|
1369 |
81%|████████ | 700/869 [00:07<00:01, 90.68it/s]
|
1370 |
82%|████████▏ | 710/869 [00:07<00:01, 91.37it/s]
|
1371 |
83%|████████▎ | 720/869 [00:07<00:01, 90.58it/s]
|
1372 |
84%|████████▍ | 730/869 [00:08<00:01, 92.01it/s]
|
1373 |
85%|████████▌ | 740/869 [00:08<00:01, 91.19it/s]
|
1374 |
86%|████████▋ | 750/869 [00:08<00:01, 91.65it/s]
|
1375 |
87%|████████▋ | 760/869 [00:08<00:01, 92.57it/s]
|
1376 |
89%|████████▊ | 770/869 [00:08<00:01, 90.79it/s]
|
1377 |
90%|████████▉ | 780/869 [00:08<00:01, 85.70it/s]
|
1378 |
91%|█████████ | 790/869 [00:08<00:00, 89.02it/s]
|
1379 |
92%|█████████▏| 800/869 [00:08<00:00, 90.94it/s]
|
1380 |
93%|█████████▎| 810/869 [00:08<00:00, 92.20it/s]
|
1381 |
94%|█████████▍| 820/869 [00:09<00:00, 92.22it/s]
|
1382 |
96%|█████████▌| 830/869 [00:09<00:00, 93.70it/s]
|
1383 |
97%|█████████▋| 840/869 [00:09<00:00, 93.56it/s]
|
1384 |
98%|█████████▊| 850/869 [00:09<00:00, 95.06it/s]
|
1385 |
99%|█████████▉| 860/869 [00:09<00:00, 92.13it/s]
|
1386 |
+
***** eval metrics *****
|
1387 |
+
epoch = 10.0
|
1388 |
+
eval_accuracy = 0.9988
|
1389 |
+
eval_f1 = 0.9336
|
1390 |
+
eval_loss = 0.0046
|
1391 |
+
eval_precision = 0.9298
|
1392 |
+
eval_recall = 0.9376
|
1393 |
+
eval_runtime = 0:00:13.27
|
1394 |
+
eval_samples = 6946
|
1395 |
+
eval_samples_per_second = 523.342
|
1396 |
+
eval_steps_per_second = 65.474
|
1397 |
+
09/05/2024 21:04:54 - INFO - __main__ - *** Predict ***
|
1398 |
+
[INFO|trainer.py:811] 2024-09-05 21:04:54,097 >> The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
1399 |
+
[INFO|trainer.py:3819] 2024-09-05 21:04:54,099 >>
|
1400 |
+
***** Running Prediction *****
|
1401 |
+
[INFO|trainer.py:3821] 2024-09-05 21:04:54,099 >> Num examples = 14715
|
1402 |
+
[INFO|trainer.py:3824] 2024-09-05 21:04:54,099 >> Batch size = 8
|
1403 |
+
|
1404 |
0%| | 0/1840 [00:00<?, ?it/s]
|
1405 |
1%| | 11/1840 [00:00<00:18, 100.66it/s]
|
1406 |
1%| | 22/1840 [00:00<00:20, 87.32it/s]
|
1407 |
2%|▏ | 32/1840 [00:00<00:20, 89.81it/s]
|
1408 |
2%|▏ | 42/1840 [00:00<00:19, 91.46it/s]
|
1409 |
3%|▎ | 52/1840 [00:00<00:19, 92.87it/s]
|
1410 |
3%|▎ | 62/1840 [00:00<00:19, 90.69it/s]
|
1411 |
4%|▍ | 72/1840 [00:00<00:19, 91.57it/s]
|
1412 |
4%|▍ | 82/1840 [00:00<00:18, 92.80it/s]
|
1413 |
5%|▌ | 92/1840 [00:01<00:19, 91.99it/s]
|
1414 |
6%|▌ | 102/1840 [00:01<00:18, 93.57it/s]
|
1415 |
6%|▌ | 112/1840 [00:01<00:18, 93.70it/s]
|
1416 |
7%|▋ | 122/1840 [00:01<00:18, 92.44it/s]
|
1417 |
7%|▋ | 132/1840 [00:01<00:18, 91.33it/s]
|
1418 |
8%|▊ | 142/1840 [00:01<00:18, 91.08it/s]
|
1419 |
8%|▊ | 152/1840 [00:01<00:18, 92.76it/s]
|
1420 |
9%|▉ | 162/1840 [00:01<00:18, 89.12it/s]
|
1421 |
9%|▉ | 172/1840 [00:01<00:18, 91.00it/s]
|
1422 |
10%|▉ | 182/1840 [00:01<00:17, 93.30it/s]
|
1423 |
10%|█ | 192/1840 [00:02<00:17, 93.99it/s]
|
1424 |
11%|█ | 202/1840 [00:02<00:17, 92.71it/s]
|
1425 |
12%|█▏ | 212/1840 [00:02<00:17, 92.90it/s]
|
1426 |
12%|█▏ | 222/1840 [00:02<00:17, 93.02it/s]
|
1427 |
13%|█▎ | 232/1840 [00:02<00:17, 90.92it/s]
|
1428 |
13%|█▎ | 242/1840 [00:02<00:17, 89.04it/s]
|
1429 |
14%|█▎ | 251/1840 [00:02<00:17, 89.29it/s]
|
1430 |
14%|█▍ | 261/1840 [00:02<00:17, 90.53it/s]
|
1431 |
15%|█▍ | 271/1840 [00:02<00:17, 92.01it/s]
|
1432 |
15%|█▌ | 282/1840 [00:03<00:16, 94.95it/s]
|
1433 |
16%|█▌ | 292/1840 [00:03<00:16, 94.44it/s]
|
1434 |
16%|█▋ | 302/1840 [00:03<00:16, 95.06it/s]
|
1435 |
17%|█▋ | 312/1840 [00:03<00:16, 94.21it/s]
|
1436 |
18%|█▊ | 322/1840 [00:03<00:16, 94.83it/s]
|
1437 |
18%|█▊ | 332/1840 [00:03<00:15, 95.15it/s]
|
1438 |
19%|█▊ | 343/1840 [00:03<00:15, 96.70it/s]
|
1439 |
19%|█▉ | 353/1840 [00:03<00:15, 93.88it/s]
|
1440 |
20%|█▉ | 363/1840 [00:03<00:15, 94.46it/s]
|
1441 |
20%|██ | 374/1840 [00:04<00:15, 96.46it/s]
|
1442 |
21%|██ | 384/1840 [00:04<00:15, 96.58it/s]
|
1443 |
21%|██▏ | 394/1840 [00:04<00:15, 94.52it/s]
|
1444 |
22%|██▏ | 404/1840 [00:04<00:15, 93.54it/s]
|
1445 |
22%|██▎ | 414/1840 [00:04<00:15, 91.33it/s]
|
1446 |
23%|██▎ | 424/1840 [00:04<00:15, 90.95it/s]
|
1447 |
24%|██▎ | 434/1840 [00:04<00:15, 90.46it/s]
|
1448 |
24%|██▍ | 444/1840 [00:04<00:15, 92.37it/s]
|
1449 |
25%|██▍ | 454/1840 [00:04<00:14, 92.51it/s]
|
1450 |
25%|██▌ | 464/1840 [00:05<00:14, 93.47it/s]
|
1451 |
26%|██▌ | 474/1840 [00:05<00:14, 92.31it/s]
|
1452 |
26%|██▋ | 484/1840 [00:05<00:14, 90.63it/s]
|
1453 |
27%|██▋ | 494/1840 [00:05<00:14, 90.70it/s]
|
1454 |
27%|██▋ | 504/1840 [00:05<00:14, 91.02it/s]
|
1455 |
28%|██▊ | 514/1840 [00:05<00:14, 93.08it/s]
|
1456 |
28%|██▊ | 524/1840 [00:05<00:13, 94.07it/s]
|
1457 |
29%|██▉ | 534/1840 [00:05<00:13, 94.78it/s]
|
1458 |
30%|██▉ | 544/1840 [00:05<00:13, 93.68it/s]
|
1459 |
30%|███ | 554/1840 [00:05<00:13, 94.96it/s]
|
1460 |
31%|███ | 564/1840 [00:06<00:13, 94.31it/s]
|
1461 |
31%|███ | 574/1840 [00:06<00:13, 95.70it/s]
|
1462 |
32%|███▏ | 584/1840 [00:06<00:13, 96.46it/s]
|
1463 |
32%|███▏ | 594/1840 [00:06<00:13, 93.35it/s]
|
1464 |
33%|███▎ | 604/1840 [00:06<00:13, 91.14it/s]
|
1465 |
33%|███▎ | 614/1840 [00:06<00:13, 92.08it/s]
|
1466 |
34%|███▍ | 624/1840 [00:06<00:13, 90.07it/s]
|
1467 |
34%|███▍ | 634/1840 [00:06<00:13, 92.36it/s]
|
1468 |
35%|███▌ | 644/1840 [00:06<00:12, 94.27it/s]
|
1469 |
36%|███▌ | 654/1840 [00:07<00:12, 95.15it/s]
|
1470 |
36%|███▌ | 664/1840 [00:07<00:12, 91.92it/s]
|
1471 |
37%|███▋ | 675/1840 [00:07<00:12, 94.52it/s]
|
1472 |
37%|███▋ | 685/1840 [00:07<00:12, 93.52it/s]
|
1473 |
38%|███▊ | 695/1840 [00:07<00:12, 94.01it/s]
|
1474 |
38%|███▊ | 705/1840 [00:07<00:11, 95.00it/s]
|
1475 |
39%|███▉ | 715/1840 [00:07<00:11, 95.08it/s]
|
1476 |
39%|███▉ | 726/1840 [00:07<00:11, 97.47it/s]
|
1477 |
40%|████ | 737/1840 [00:07<00:11, 98.74it/s]
|
1478 |
41%|████ | 747/1840 [00:08<00:11, 97.58it/s]
|
1479 |
41%|████ | 758/1840 [00:08<00:10, 99.05it/s]
|
1480 |
42%|████▏ | 768/1840 [00:08<00:10, 98.35it/s]
|
1481 |
42%|████▏ | 778/1840 [00:08<00:10, 97.78it/s]
|
1482 |
43%|████▎ | 788/1840 [00:08<00:10, 96.12it/s]
|
1483 |
43%|████▎ | 798/1840 [00:08<00:10, 96.37it/s]
|
1484 |
44%|████▍ | 808/1840 [00:08<00:10, 94.99it/s]
|
1485 |
44%|████▍ | 818/1840 [00:08<00:10, 95.70it/s]
|
1486 |
45%|████▌ | 828/1840 [00:08<00:10, 95.46it/s]
|
1487 |
46%|████▌ | 838/1840 [00:08<00:10, 96.12it/s]
|
1488 |
46%|████▌ | 848/1840 [00:09<00:10, 94.62it/s]
|
1489 |
47%|████▋ | 859/1840 [00:09<00:10, 96.32it/s]
|
1490 |
47%|████▋ | 869/1840 [00:09<00:10, 96.37it/s]
|
1491 |
48%|████▊ | 879/1840 [00:09<00:10, 94.02it/s]
|
1492 |
48%|████▊ | 889/1840 [00:09<00:10, 93.01it/s]
|
1493 |
49%|████▉ | 899/1840 [00:09<00:10, 93.14it/s]
|
1494 |
49%|████▉ | 910/1840 [00:09<00:09, 95.61it/s]
|
1495 |
50%|█████ | 920/1840 [00:09<00:09, 96.63it/s]
|
1496 |
51%|█████ | 930/1840 [00:09<00:09, 96.96it/s]
|
1497 |
51%|█████ | 940/1840 [00:10<00:09, 94.21it/s]
|
1498 |
52%|█████▏ | 950/1840 [00:10<00:09, 91.67it/s]
|
1499 |
52%|█████▏ | 960/1840 [00:10<00:10, 86.59it/s]
|
1500 |
53%|█████▎ | 970/1840 [00:10<00:09, 89.43it/s]
|
1501 |
53%|█████▎ | 980/1840 [00:10<00:09, 91.22it/s]
|
1502 |
54%|█████▍ | 990/1840 [00:10<00:09, 92.21it/s]
|
1503 |
54%|█████▍ | 1000/1840 [00:10<00:09, 92.68it/s]
|
1504 |
55%|█████▍ | 1010/1840 [00:10<00:08, 93.86it/s]
|
1505 |
55%|█████▌ | 1020/1840 [00:10<00:08, 95.17it/s]
|
1506 |
56%|█████▌ | 1031/1840 [00:11<00:08, 96.74it/s]
|
1507 |
57%|█████▋ | 1041/1840 [00:11<00:08, 94.91it/s]
|
1508 |
57%|█████▋ | 1051/1840 [00:11<00:08, 95.37it/s]
|
1509 |
58%|█████▊ | 1061/1840 [00:11<00:08, 94.87it/s]
|
1510 |
58%|█████▊ | 1071/1840 [00:11<00:08, 95.93it/s]
|
1511 |
59%|█████▉ | 1082/1840 [00:11<00:07, 97.63it/s]
|
1512 |
59%|█████▉ | 1092/1840 [00:11<00:07, 96.67it/s]
|
1513 |
60%|█████▉ | 1102/1840 [00:11<00:07, 95.77it/s]
|
1514 |
60%|██████ | 1112/1840 [00:11<00:07, 95.81it/s]
|
1515 |
61%|██████ | 1122/1840 [00:11<00:07, 95.09it/s]
|
1516 |
62%|██████▏ | 1132/1840 [00:12<00:07, 94.79it/s]
|
1517 |
62%|██████▏ | 1142/1840 [00:12<00:07, 94.93it/s]
|
1518 |
63%|██████▎ | 1152/1840 [00:12<00:07, 95.37it/s]
|
1519 |
63%|██████▎ | 1162/1840 [00:12<00:07, 95.48it/s]
|
1520 |
64%|██████▎ | 1172/1840 [00:12<00:07, 92.83it/s]
|
1521 |
64%|██████▍ | 1182/1840 [00:12<00:07, 93.70it/s]
|
1522 |
65%|██████▍ | 1192/1840 [00:12<00:07, 91.77it/s]
|
1523 |
65%|██████▌ | 1202/1840 [00:12<00:06, 93.80it/s]
|
1524 |
66%|██████▌ | 1212/1840 [00:12<00:06, 93.86it/s]
|
1525 |
66%|██████▋ | 1222/1840 [00:13<00:06, 92.60it/s]
|
1526 |
67%|██████▋ | 1232/1840 [00:13<00:06, 92.08it/s]
|
1527 |
68%|██████▊ | 1242/1840 [00:13<00:06, 93.89it/s]
|
1528 |
68%|██████▊ | 1252/1840 [00:13<00:06, 94.06it/s]
|
1529 |
69%|██████▊ | 1262/1840 [00:13<00:06, 92.09it/s]
|
1530 |
69%|██████▉ | 1272/1840 [00:13<00:06, 92.27it/s]
|
1531 |
70%|██████▉ | 1282/1840 [00:13<00:06, 92.93it/s]
|
1532 |
70%|███████ | 1292/1840 [00:13<00:05, 93.23it/s]
|
1533 |
71%|███████ | 1302/1840 [00:13<00:05, 94.53it/s]
|
1534 |
71%|███████▏ | 1313/1840 [00:14<00:05, 96.30it/s]
|
1535 |
72%|███████▏ | 1324/1840 [00:14<00:05, 97.65it/s]
|
1536 |
72%|███████▎ | 1334/1840 [00:14<00:05, 96.65it/s]
|
1537 |
73%|███████▎ | 1344/1840 [00:14<00:05, 96.78it/s]
|
1538 |
74%|███████▎ | 1354/1840 [00:14<00:05, 96.36it/s]
|
1539 |
74%|███████▍ | 1364/1840 [00:14<00:04, 96.05it/s]
|
1540 |
75%|███████▍ | 1374/1840 [00:14<00:04, 96.54it/s]
|
1541 |
75%|███████▌ | 1384/1840 [00:14<00:04, 95.78it/s]
|
1542 |
76%|███████▌ | 1394/1840 [00:14<00:04, 96.55it/s]
|
1543 |
76%|███████▋ | 1404/1840 [00:14<00:04, 96.13it/s]
|
1544 |
77%|███████▋ | 1415/1840 [00:15<00:04, 97.23it/s]
|
1545 |
78%|███████▊ | 1426/1840 [00:15<00:04, 98.32it/s]
|
1546 |
78%|███████▊ | 1436/1840 [00:15<00:04, 95.65it/s]
|
1547 |
79%|███████▊ | 1446/1840 [00:15<00:04, 92.20it/s]
|
1548 |
79%|███████▉ | 1456/1840 [00:15<00:04, 92.15it/s]
|
1549 |
80%|███████▉ | 1466/1840 [00:15<00:03, 93.93it/s]
|
1550 |
80%|████████ | 1476/1840 [00:15<00:03, 94.96it/s]
|
1551 |
81%|████████ | 1486/1840 [00:15<00:03, 92.39it/s]
|
1552 |
81%|████████▏ | 1496/1840 [00:15<00:03, 92.62it/s]
|
1553 |
82%|████████▏ | 1506/1840 [00:16<00:03, 90.94it/s]
|
1554 |
82%|████████▏ | 1516/1840 [00:16<00:03, 89.76it/s]
|
1555 |
83%|████████▎ | 1526/1840 [00:16<00:03, 92.31it/s]
|
1556 |
83%|████████▎ | 1536/1840 [00:16<00:03, 91.36it/s]
|
1557 |
84%|████████▍ | 1546/1840 [00:16<00:03, 91.31it/s]
|
1558 |
85%|████████▍ | 1556/1840 [00:16<00:03, 91.22it/s]
|
1559 |
85%|████████▌ | 1566/1840 [00:16<00:03, 90.24it/s]
|
1560 |
86%|████████▌ | 1576/1840 [00:16<00:02, 89.47it/s]
|
1561 |
86%|████████▌ | 1586/1840 [00:16<00:02, 90.91it/s]
|
1562 |
87%|████████▋ | 1596/1840 [00:17<00:02, 91.45it/s]
|
1563 |
87%|████████▋ | 1606/1840 [00:17<00:02, 92.23it/s]
|
1564 |
88%|████████▊ | 1616/1840 [00:17<00:02, 93.15it/s]
|
1565 |
88%|████████▊ | 1626/1840 [00:17<00:02, 92.75it/s]
|
1566 |
89%|████████▉ | 1636/1840 [00:17<00:02, 86.16it/s]
|
1567 |
89%|████████▉ | 1645/1840 [00:17<00:02, 86.13it/s]
|
1568 |
90%|████████▉ | 1655/1840 [00:17<00:02, 88.10it/s]
|
1569 |
90%|█████████ | 1665/1840 [00:17<00:01, 91.07it/s]
|
1570 |
91%|█████████ | 1675/1840 [00:17<00:01, 91.32it/s]
|
1571 |
92%|█████████▏| 1685/1840 [00:18<00:01, 92.00it/s]
|
1572 |
92%|█████████▏| 1695/1840 [00:18<00:01, 94.04it/s]
|
1573 |
93%|█████████▎| 1705/1840 [00:18<00:01, 92.60it/s]
|
1574 |
93%|█████████▎| 1715/1840 [00:18<00:01, 93.06it/s]
|
1575 |
94%|█████████▍| 1725/1840 [00:18<00:01, 94.15it/s]
|
1576 |
94%|█████████▍| 1735/1840 [00:18<00:01, 91.69it/s]
|
1577 |
95%|█████████▍| 1745/1840 [00:18<00:01, 93.98it/s]
|
1578 |
95%|█████████▌| 1755/1840 [00:18<00:00, 95.51it/s]
|
1579 |
96%|█████████▌| 1765/1840 [00:18<00:00, 96.50it/s]
|
1580 |
96%|█████████▋| 1775/1840 [00:18<00:00, 96.16it/s]
|
1581 |
97%|█████████▋| 1785/1840 [00:19<00:00, 95.98it/s]
|
1582 |
98%|█████████▊| 1795/1840 [00:19<00:00, 96.85it/s]
|
1583 |
98%|█████████▊| 1805/1840 [00:19<00:00, 96.83it/s]
|
1584 |
99%|█████████▊| 1815/1840 [00:19<00:00, 93.07it/s]
|
1585 |
99%|█████████▉| 1825/1840 [00:19<00:00, 94.73it/s]
|
1586 |
+
[INFO|trainer.py:3503] 2024-09-05 21:05:20,650 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
1587 |
+
[INFO|configuration_utils.py:472] 2024-09-05 21:05:20,652 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
1588 |
+
[INFO|modeling_utils.py:2799] 2024-09-05 21:05:21,937 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
1589 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-05 21:05:21,938 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
1590 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-05 21:05:21,939 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
1591 |
+
***** predict metrics *****
|
1592 |
+
predict_accuracy = 0.9987
|
1593 |
+
predict_f1 = 0.9206
|
1594 |
+
predict_loss = 0.005
|
1595 |
+
predict_precision = 0.8939
|
1596 |
+
predict_recall = 0.9489
|
1597 |
+
predict_runtime = 0:00:25.75
|
1598 |
+
predict_samples_per_second = 571.412
|
1599 |
+
predict_steps_per_second = 71.451
|
1600 |
+
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"total_flos": 1.1151464037050934e+16,
|
4 |
+
"train_loss": 0.002938754050150616,
|
5 |
+
"train_runtime": 1039.0289,
|
6 |
+
"train_samples": 27967,
|
7 |
+
"train_samples_per_second": 269.165,
|
8 |
+
"train_steps_per_second": 4.206
|
9 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.9336426914153132,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1311",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 4370,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.9984720788100354,
|
14 |
+
"eval_f1": 0.9122645842903078,
|
15 |
+
"eval_loss": 0.004654619377106428,
|
16 |
+
"eval_precision": 0.8994565217391305,
|
17 |
+
"eval_recall": 0.9254426840633737,
|
18 |
+
"eval_runtime": 13.3719,
|
19 |
+
"eval_samples_per_second": 519.448,
|
20 |
+
"eval_steps_per_second": 64.987,
|
21 |
+
"step": 437
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.1441647597254005,
|
25 |
+
"grad_norm": 0.35407835245132446,
|
26 |
+
"learning_rate": 4.4279176201373e-05,
|
27 |
+
"loss": 0.0144,
|
28 |
+
"step": 500
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"eval_accuracy": 0.9984511483827756,
|
33 |
+
"eval_f1": 0.9131627056672761,
|
34 |
+
"eval_loss": 0.005305842496454716,
|
35 |
+
"eval_precision": 0.8959641255605382,
|
36 |
+
"eval_recall": 0.9310344827586207,
|
37 |
+
"eval_runtime": 13.2738,
|
38 |
+
"eval_samples_per_second": 523.286,
|
39 |
+
"eval_steps_per_second": 65.467,
|
40 |
+
"step": 874
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.288329519450801,
|
44 |
+
"grad_norm": 0.008026196621358395,
|
45 |
+
"learning_rate": 3.8558352402745995e-05,
|
46 |
+
"loss": 0.0038,
|
47 |
+
"step": 1000
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.0,
|
51 |
+
"eval_accuracy": 0.9987999888371054,
|
52 |
+
"eval_f1": 0.9336426914153132,
|
53 |
+
"eval_loss": 0.004605981521308422,
|
54 |
+
"eval_precision": 0.9297597042513863,
|
55 |
+
"eval_recall": 0.9375582479030755,
|
56 |
+
"eval_runtime": 13.2443,
|
57 |
+
"eval_samples_per_second": 524.45,
|
58 |
+
"eval_steps_per_second": 65.613,
|
59 |
+
"step": 1311
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 3.4324942791762014,
|
63 |
+
"grad_norm": 0.13839516043663025,
|
64 |
+
"learning_rate": 3.2837528604119e-05,
|
65 |
+
"loss": 0.0022,
|
66 |
+
"step": 1500
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 4.0,
|
70 |
+
"eval_accuracy": 0.9985627773281612,
|
71 |
+
"eval_f1": 0.9223616922361693,
|
72 |
+
"eval_loss": 0.005482749082148075,
|
73 |
+
"eval_precision": 0.9202226345083488,
|
74 |
+
"eval_recall": 0.9245107176141659,
|
75 |
+
"eval_runtime": 13.4564,
|
76 |
+
"eval_samples_per_second": 516.186,
|
77 |
+
"eval_steps_per_second": 64.579,
|
78 |
+
"step": 1748
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 4.576659038901602,
|
82 |
+
"grad_norm": 0.01504958514124155,
|
83 |
+
"learning_rate": 2.7116704805491993e-05,
|
84 |
+
"loss": 0.0019,
|
85 |
+
"step": 2000
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.0,
|
89 |
+
"eval_accuracy": 0.9985627773281612,
|
90 |
+
"eval_f1": 0.9231477220432582,
|
91 |
+
"eval_loss": 0.005268561653792858,
|
92 |
+
"eval_precision": 0.9118181818181819,
|
93 |
+
"eval_recall": 0.934762348555452,
|
94 |
+
"eval_runtime": 13.2814,
|
95 |
+
"eval_samples_per_second": 522.986,
|
96 |
+
"eval_steps_per_second": 65.43,
|
97 |
+
"step": 2185
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"epoch": 5.720823798627002,
|
101 |
+
"grad_norm": 0.11815565079450607,
|
102 |
+
"learning_rate": 2.139588100686499e-05,
|
103 |
+
"loss": 0.0014,
|
104 |
+
"step": 2500
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 6.0,
|
108 |
+
"eval_accuracy": 0.998618591800854,
|
109 |
+
"eval_f1": 0.92243381328379,
|
110 |
+
"eval_loss": 0.005419280380010605,
|
111 |
+
"eval_precision": 0.9194444444444444,
|
112 |
+
"eval_recall": 0.9254426840633737,
|
113 |
+
"eval_runtime": 13.1103,
|
114 |
+
"eval_samples_per_second": 529.811,
|
115 |
+
"eval_steps_per_second": 66.284,
|
116 |
+
"step": 2622
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 6.864988558352403,
|
120 |
+
"grad_norm": 0.012974879704415798,
|
121 |
+
"learning_rate": 1.5675057208237986e-05,
|
122 |
+
"loss": 0.0009,
|
123 |
+
"step": 3000
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 7.0,
|
127 |
+
"eval_accuracy": 0.9986255686099406,
|
128 |
+
"eval_f1": 0.9289055191768008,
|
129 |
+
"eval_loss": 0.007274709176272154,
|
130 |
+
"eval_precision": 0.9323943661971831,
|
131 |
+
"eval_recall": 0.9254426840633737,
|
132 |
+
"eval_runtime": 13.1693,
|
133 |
+
"eval_samples_per_second": 527.439,
|
134 |
+
"eval_steps_per_second": 65.987,
|
135 |
+
"step": 3059
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 8.0,
|
139 |
+
"eval_accuracy": 0.9986604526553735,
|
140 |
+
"eval_f1": 0.9297752808988765,
|
141 |
+
"eval_loss": 0.006545887794345617,
|
142 |
+
"eval_precision": 0.9341486359360301,
|
143 |
+
"eval_recall": 0.9254426840633737,
|
144 |
+
"eval_runtime": 13.1956,
|
145 |
+
"eval_samples_per_second": 526.386,
|
146 |
+
"eval_steps_per_second": 65.855,
|
147 |
+
"step": 3496
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 8.009153318077804,
|
151 |
+
"grad_norm": 0.0015736627392470837,
|
152 |
+
"learning_rate": 9.954233409610985e-06,
|
153 |
+
"loss": 0.0005,
|
154 |
+
"step": 3500
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"epoch": 9.0,
|
158 |
+
"eval_accuracy": 0.9986674294644602,
|
159 |
+
"eval_f1": 0.930905695611578,
|
160 |
+
"eval_loss": 0.006908744107931852,
|
161 |
+
"eval_precision": 0.9326473339569691,
|
162 |
+
"eval_recall": 0.9291705498602051,
|
163 |
+
"eval_runtime": 13.5052,
|
164 |
+
"eval_samples_per_second": 514.321,
|
165 |
+
"eval_steps_per_second": 64.346,
|
166 |
+
"step": 3933
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 9.153318077803204,
|
170 |
+
"grad_norm": 0.0005491800257004797,
|
171 |
+
"learning_rate": 4.233409610983982e-06,
|
172 |
+
"loss": 0.0004,
|
173 |
+
"step": 4000
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"epoch": 10.0,
|
177 |
+
"eval_accuracy": 0.9986534758462869,
|
178 |
+
"eval_f1": 0.9270106927010694,
|
179 |
+
"eval_loss": 0.00707679707556963,
|
180 |
+
"eval_precision": 0.924860853432282,
|
181 |
+
"eval_recall": 0.9291705498602051,
|
182 |
+
"eval_runtime": 13.4189,
|
183 |
+
"eval_samples_per_second": 517.629,
|
184 |
+
"eval_steps_per_second": 64.76,
|
185 |
+
"step": 4370
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"epoch": 10.0,
|
189 |
+
"step": 4370,
|
190 |
+
"total_flos": 1.1151464037050934e+16,
|
191 |
+
"train_loss": 0.002938754050150616,
|
192 |
+
"train_runtime": 1039.0289,
|
193 |
+
"train_samples_per_second": 269.165,
|
194 |
+
"train_steps_per_second": 4.206
|
195 |
+
}
|
196 |
+
],
|
197 |
+
"logging_steps": 500,
|
198 |
+
"max_steps": 4370,
|
199 |
+
"num_input_tokens_seen": 0,
|
200 |
+
"num_train_epochs": 10,
|
201 |
+
"save_steps": 500,
|
202 |
+
"stateful_callbacks": {
|
203 |
+
"TrainerControl": {
|
204 |
+
"args": {
|
205 |
+
"should_epoch_stop": false,
|
206 |
+
"should_evaluate": false,
|
207 |
+
"should_log": false,
|
208 |
+
"should_save": true,
|
209 |
+
"should_training_stop": true
|
210 |
+
},
|
211 |
+
"attributes": {}
|
212 |
+
}
|
213 |
+
},
|
214 |
+
"total_flos": 1.1151464037050934e+16,
|
215 |
+
"train_batch_size": 32,
|
216 |
+
"trial_name": null,
|
217 |
+
"trial_params": null
|
218 |
+
}
|