Rodrigo1771
commited on
Commit
•
d457a4b
1
Parent(s):
fe65bfb
End of training
Browse files- README.md +13 -12
- all_results.json +26 -0
- eval_results.json +12 -0
- predict_results.json +10 -0
- predictions.txt +0 -0
- tb/events.out.tfevents.1725527837.6cb9bed92fd1.4510.1 +3 -0
- train.log +48 -0
- train_results.json +9 -0
- trainer_state.json +232 -0
README.md
CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: michiyasunaga/BioLinkBERT-base
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- drugtemist-en-75-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
@@ -18,24 +19,24 @@ model-index:
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
-
name: drugtemist-en-75-ner
|
22 |
-
type: drugtemist-en-75-ner
|
23 |
config: DrugTEMIST English NER
|
24 |
split: validation
|
25 |
args: DrugTEMIST English NER
|
26 |
metrics:
|
27 |
- name: Precision
|
28 |
type: precision
|
29 |
-
value: 0.
|
30 |
- name: Recall
|
31 |
type: recall
|
32 |
-
value: 0.
|
33 |
- name: F1
|
34 |
type: f1
|
35 |
-
value: 0.
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
-
value: 0.
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
-
This model is a fine-tuned version of [michiyasunaga/BioLinkBERT-base](https://huggingface.co/michiyasunaga/BioLinkBERT-base) on the drugtemist-en-75-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss: 0.
|
49 |
-
- Precision: 0.
|
50 |
-
- Recall: 0.
|
51 |
-
- F1: 0.
|
52 |
- Accuracy: 0.9987
|
53 |
|
54 |
## Model description
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: michiyasunaga/BioLinkBERT-base
|
5 |
tags:
|
6 |
+
- token-classification
|
7 |
- generated_from_trainer
|
8 |
datasets:
|
9 |
+
- Rodrigo1771/drugtemist-en-75-ner
|
10 |
metrics:
|
11 |
- precision
|
12 |
- recall
|
|
|
19 |
name: Token Classification
|
20 |
type: token-classification
|
21 |
dataset:
|
22 |
+
name: Rodrigo1771/drugtemist-en-75-ner
|
23 |
+
type: Rodrigo1771/drugtemist-en-75-ner
|
24 |
config: DrugTEMIST English NER
|
25 |
split: validation
|
26 |
args: DrugTEMIST English NER
|
27 |
metrics:
|
28 |
- name: Precision
|
29 |
type: precision
|
30 |
+
value: 0.9342105263157895
|
31 |
- name: Recall
|
32 |
type: recall
|
33 |
+
value: 0.9263746505125815
|
34 |
- name: F1
|
35 |
type: f1
|
36 |
+
value: 0.930276087973795
|
37 |
- name: Accuracy
|
38 |
type: accuracy
|
39 |
+
value: 0.9987162671280663
|
40 |
---
|
41 |
|
42 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
44 |
|
45 |
# output
|
46 |
|
47 |
+
This model is a fine-tuned version of [michiyasunaga/BioLinkBERT-base](https://huggingface.co/michiyasunaga/BioLinkBERT-base) on the Rodrigo1771/drugtemist-en-75-ner dataset.
|
48 |
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.0065
|
50 |
+
- Precision: 0.9342
|
51 |
+
- Recall: 0.9264
|
52 |
+
- F1: 0.9303
|
53 |
- Accuracy: 0.9987
|
54 |
|
55 |
## Model description
|
all_results.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9987162671280663,
|
4 |
+
"eval_f1": 0.930276087973795,
|
5 |
+
"eval_loss": 0.0064844791777431965,
|
6 |
+
"eval_precision": 0.9342105263157895,
|
7 |
+
"eval_recall": 0.9263746505125815,
|
8 |
+
"eval_runtime": 13.1881,
|
9 |
+
"eval_samples": 6946,
|
10 |
+
"eval_samples_per_second": 526.688,
|
11 |
+
"eval_steps_per_second": 65.893,
|
12 |
+
"predict_accuracy": 0.9986488402874071,
|
13 |
+
"predict_f1": 0.9212396929201025,
|
14 |
+
"predict_loss": 0.006953952368348837,
|
15 |
+
"predict_precision": 0.9025069637883009,
|
16 |
+
"predict_recall": 0.9407665505226481,
|
17 |
+
"predict_runtime": 26.178,
|
18 |
+
"predict_samples_per_second": 562.112,
|
19 |
+
"predict_steps_per_second": 70.288,
|
20 |
+
"total_flos": 1.394320679130096e+16,
|
21 |
+
"train_loss": 0.0030765269683407886,
|
22 |
+
"train_runtime": 1249.6681,
|
23 |
+
"train_samples": 32232,
|
24 |
+
"train_samples_per_second": 257.924,
|
25 |
+
"train_steps_per_second": 4.033
|
26 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9987162671280663,
|
4 |
+
"eval_f1": 0.930276087973795,
|
5 |
+
"eval_loss": 0.0064844791777431965,
|
6 |
+
"eval_precision": 0.9342105263157895,
|
7 |
+
"eval_recall": 0.9263746505125815,
|
8 |
+
"eval_runtime": 13.1881,
|
9 |
+
"eval_samples": 6946,
|
10 |
+
"eval_samples_per_second": 526.688,
|
11 |
+
"eval_steps_per_second": 65.893
|
12 |
+
}
|
predict_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_accuracy": 0.9986488402874071,
|
3 |
+
"predict_f1": 0.9212396929201025,
|
4 |
+
"predict_loss": 0.006953952368348837,
|
5 |
+
"predict_precision": 0.9025069637883009,
|
6 |
+
"predict_recall": 0.9407665505226481,
|
7 |
+
"predict_runtime": 26.178,
|
8 |
+
"predict_samples_per_second": 562.112,
|
9 |
+
"predict_steps_per_second": 70.288
|
10 |
+
}
|
predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725527837.6cb9bed92fd1.4510.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24dfba6e51d8a0727fb46592f105dea95aa3a1892fc3bb573fbee1166feae180
|
3 |
+
size 560
|
train.log
CHANGED
@@ -1289,3 +1289,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
1289 |
{'eval_loss': 0.008253143168985844, 'eval_precision': 0.921028466483012, 'eval_recall': 0.934762348555452, 'eval_f1': 0.9278445883441258, 'eval_accuracy': 0.9986883598917199, 'eval_runtime': 13.7257, 'eval_samples_per_second': 506.057, 'eval_steps_per_second': 63.312, 'epoch': 10.0}
|
1290 |
{'train_runtime': 1249.6681, 'train_samples_per_second': 257.924, 'train_steps_per_second': 4.033, 'train_loss': 0.0030765269683407886, 'epoch': 10.0}
|
1291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1292 |
0%| | 0/869 [00:00<?, ?it/s]
|
1293 |
1%| | 10/869 [00:00<00:08, 97.10it/s]
|
1294 |
2%|▏ | 20/869 [00:00<00:09, 90.01it/s]
|
1295 |
3%|▎ | 30/869 [00:00<00:09, 89.18it/s]
|
1296 |
5%|▍ | 40/869 [00:00<00:09, 90.30it/s]
|
1297 |
6%|▌ | 50/869 [00:00<00:08, 92.35it/s]
|
1298 |
7%|▋ | 60/869 [00:00<00:08, 92.23it/s]
|
1299 |
8%|▊ | 70/869 [00:00<00:08, 91.59it/s]
|
1300 |
9%|▉ | 81/869 [00:00<00:08, 94.55it/s]
|
1301 |
10%|█ | 91/869 [00:00<00:08, 96.01it/s]
|
1302 |
12%|█▏ | 101/869 [00:01<00:07, 97.12it/s]
|
1303 |
13%|█▎ | 111/869 [00:01<00:07, 94.84it/s]
|
1304 |
14%|█▍ | 121/869 [00:01<00:07, 95.04it/s]
|
1305 |
15%|█▌ | 131/869 [00:01<00:07, 93.42it/s]
|
1306 |
16%|█▌ | 141/869 [00:01<00:07, 93.24it/s]
|
1307 |
17%|█▋ | 151/869 [00:01<00:07, 93.28it/s]
|
1308 |
19%|█▊ | 161/869 [00:01<00:07, 92.79it/s]
|
1309 |
20%|█▉ | 171/869 [00:01<00:07, 91.74it/s]
|
1310 |
21%|██ | 181/869 [00:01<00:07, 92.14it/s]
|
1311 |
22%|██▏ | 191/869 [00:02<00:07, 91.28it/s]
|
1312 |
23%|██▎ | 201/869 [00:02<00:07, 91.88it/s]
|
1313 |
24%|██▍ | 211/869 [00:02<00:07, 92.23it/s]
|
1314 |
25%|██▌ | 221/869 [00:02<00:06, 93.41it/s]
|
1315 |
27%|██▋ | 231/869 [00:02<00:06, 92.39it/s]
|
1316 |
28%|██▊ | 241/869 [00:02<00:07, 86.85it/s]
|
1317 |
29%|██▉ | 251/869 [00:02<00:06, 89.50it/s]
|
1318 |
30%|███ | 261/869 [00:02<00:06, 92.32it/s]
|
1319 |
31%|███ | 271/869 [00:02<00:06, 92.37it/s]
|
1320 |
32%|███▏ | 281/869 [00:03<00:06, 91.65it/s]
|
1321 |
33%|███▎ | 291/869 [00:03<00:06, 91.10it/s]
|
1322 |
35%|███▍ | 301/869 [00:03<00:06, 90.53it/s]
|
1323 |
36%|███▌ | 311/869 [00:03<00:06, 90.71it/s]
|
1324 |
37%|███▋ | 321/869 [00:03<00:06, 90.54it/s]
|
1325 |
38%|███▊ | 331/869 [00:03<00:05, 92.74it/s]
|
1326 |
39%|███▉ | 341/869 [00:03<00:05, 92.85it/s]
|
1327 |
40%|████ | 351/869 [00:03<00:05, 89.75it/s]
|
1328 |
42%|████▏ | 361/869 [00:03<00:05, 91.39it/s]
|
1329 |
43%|████▎ | 371/869 [00:04<00:05, 92.76it/s]
|
1330 |
44%|████▍ | 381/869 [00:04<00:05, 90.33it/s]
|
1331 |
45%|████▍ | 391/869 [00:04<00:05, 87.76it/s]
|
1332 |
46%|████▌ | 401/869 [00:04<00:05, 89.51it/s]
|
1333 |
47%|████▋ | 411/869 [00:04<00:04, 91.66it/s]
|
1334 |
48%|████▊ | 421/869 [00:04<00:04, 92.04it/s]
|
1335 |
50%|████▉ | 431/869 [00:04<00:04, 93.80it/s]
|
1336 |
51%|█████ | 441/869 [00:04<00:04, 90.42it/s]
|
1337 |
52%|█████▏ | 451/869 [00:04<00:04, 90.86it/s]
|
1338 |
53%|█████▎ | 461/869 [00:05<00:04, 90.51it/s]
|
1339 |
54%|█████▍ | 471/869 [00:05<00:04, 92.69it/s]
|
1340 |
55%|█████▌ | 482/869 [00:05<00:04, 95.27it/s]
|
1341 |
57%|█████▋ | 492/869 [00:05<00:04, 92.82it/s]
|
1342 |
58%|█████▊ | 502/869 [00:05<00:04, 91.33it/s]
|
1343 |
59%|█████▉ | 512/869 [00:05<00:03, 93.04it/s]
|
1344 |
60%|██████ | 522/869 [00:05<00:03, 90.12it/s]
|
1345 |
61%|██████ | 532/869 [00:05<00:03, 91.17it/s]
|
1346 |
62%|██████▏ | 542/869 [00:05<00:03, 90.57it/s]
|
1347 |
64%|██████▎ | 552/869 [00:06<00:03, 92.71it/s]
|
1348 |
65%|██████▍ | 562/869 [00:06<00:03, 94.03it/s]
|
1349 |
66%|██████▌ | 572/869 [00:06<00:03, 93.29it/s]
|
1350 |
67%|██████▋ | 582/869 [00:06<00:03, 91.83it/s]
|
1351 |
68%|██████▊ | 592/869 [00:06<00:03, 91.81it/s]
|
1352 |
69%|██████▉ | 602/869 [00:06<00:02, 93.95it/s]
|
1353 |
70%|███████ | 612/869 [00:06<00:02, 92.85it/s]
|
1354 |
72%|███████▏ | 622/869 [00:06<00:02, 92.26it/s]
|
1355 |
73%|███████▎ | 632/869 [00:06<00:02, 92.74it/s]
|
1356 |
74%|███████▍ | 642/869 [00:06<00:02, 91.09it/s]
|
1357 |
75%|███████▌ | 652/869 [00:07<00:02, 92.01it/s]
|
1358 |
76%|███████▌ | 662/869 [00:07<00:02, 91.29it/s]
|
1359 |
77%|███████▋ | 672/869 [00:07<00:02, 93.02it/s]
|
1360 |
78%|███████▊ | 682/869 [00:07<00:02, 86.69it/s]
|
1361 |
80%|███████▉ | 692/869 [00:07<00:01, 89.55it/s]
|
1362 |
81%|████████ | 702/869 [00:07<00:01, 89.54it/s]
|
1363 |
82%|████████▏ | 712/869 [00:07<00:01, 91.99it/s]
|
1364 |
83%|████████▎ | 722/869 [00:07<00:01, 91.92it/s]
|
1365 |
84%|████████▍ | 732/869 [00:07<00:01, 93.22it/s]
|
1366 |
85%|████████▌ | 742/869 [00:08<00:01, 93.18it/s]
|
1367 |
87%|████████▋ | 752/869 [00:08<00:01, 92.20it/s]
|
1368 |
88%|████████▊ | 763/869 [00:08<00:01, 95.00it/s]
|
1369 |
89%|████████▉ | 773/869 [00:08<00:01, 92.74it/s]
|
1370 |
90%|█████████ | 783/869 [00:08<00:00, 87.07it/s]
|
1371 |
91%|█████████▏| 793/869 [00:08<00:00, 89.65it/s]
|
1372 |
92%|█████████▏| 803/869 [00:08<00:00, 91.00it/s]
|
1373 |
94%|█████████▎| 813/869 [00:08<00:00, 90.83it/s]
|
1374 |
95%|█████████▍| 823/869 [00:08<00:00, 91.40it/s]
|
1375 |
96%|█████████▌| 833/869 [00:09<00:00, 90.29it/s]
|
1376 |
97%|█████████▋| 843/869 [00:09<00:00, 92.01it/s]
|
1377 |
98%|█████████▊| 853/869 [00:09<00:00, 93.22it/s]
|
1378 |
99%|█████████▉| 863/869 [00:09<00:00, 90.24it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1379 |
0%| | 0/1840 [00:00<?, ?it/s]
|
1380 |
1%| | 11/1840 [00:00<00:17, 101.79it/s]
|
1381 |
1%| | 22/1840 [00:00<00:19, 91.52it/s]
|
1382 |
2%|▏ | 32/1840 [00:00<00:19, 94.89it/s]
|
1383 |
2%|▏ | 42/1840 [00:00<00:18, 94.92it/s]
|
1384 |
3%|▎ | 52/1840 [00:00<00:18, 94.68it/s]
|
1385 |
3%|▎ | 62/1840 [00:00<00:19, 91.54it/s]
|
1386 |
4%|▍ | 72/1840 [00:00<00:19, 91.04it/s]
|
1387 |
4%|▍ | 82/1840 [00:00<00:19, 92.43it/s]
|
1388 |
5%|▌ | 92/1840 [00:00<00:19, 90.95it/s]
|
1389 |
6%|▌ | 102/1840 [00:01<00:18, 92.60it/s]
|
1390 |
6%|▌ | 112/1840 [00:01<00:18, 93.15it/s]
|
1391 |
7%|▋ | 122/1840 [00:01<00:18, 92.68it/s]
|
1392 |
7%|▋ | 132/1840 [00:01<00:18, 92.00it/s]
|
1393 |
8%|▊ | 142/1840 [00:01<00:18, 91.89it/s]
|
1394 |
8%|▊ | 152/1840 [00:01<00:18, 92.54it/s]
|
1395 |
9%|▉ | 162/1840 [00:01<00:18, 89.05it/s]
|
1396 |
9%|▉ | 172/1840 [00:01<00:18, 90.17it/s]
|
1397 |
10%|▉ | 182/1840 [00:01<00:17, 92.61it/s]
|
1398 |
10%|█ | 192/1840 [00:02<00:17, 93.99it/s]
|
1399 |
11%|█ | 202/1840 [00:02<00:17, 92.45it/s]
|
1400 |
12%|█▏ | 212/1840 [00:02<00:17, 92.36it/s]
|
1401 |
12%|█▏ | 222/1840 [00:02<00:17, 91.82it/s]
|
1402 |
13%|█▎ | 232/1840 [00:02<00:17, 90.21it/s]
|
1403 |
13%|█▎ | 242/1840 [00:02<00:18, 87.53it/s]
|
1404 |
14%|█▎ | 251/1840 [00:02<00:18, 87.80it/s]
|
1405 |
14%|█▍ | 260/1840 [00:02<00:18, 87.51it/s]
|
1406 |
15%|█▍ | 270/1840 [00:02<00:17, 89.69it/s]
|
1407 |
15%|█▌ | 280/1840 [00:03<00:16, 92.41it/s]
|
1408 |
16%|█▌ | 290/1840 [00:03<00:16, 92.07it/s]
|
1409 |
16%|█▋ | 300/1840 [00:03<00:16, 93.43it/s]
|
1410 |
17%|█▋ | 310/1840 [00:03<00:16, 93.16it/s]
|
1411 |
17%|█▋ | 320/1840 [00:03<00:16, 93.68it/s]
|
1412 |
18%|█▊ | 330/1840 [00:03<00:15, 95.47it/s]
|
1413 |
18%|█▊ | 340/1840 [00:03<00:15, 95.33it/s]
|
1414 |
19%|█▉ | 350/1840 [00:03<00:15, 95.92it/s]
|
1415 |
20%|█▉ | 360/1840 [00:03<00:15, 93.05it/s]
|
1416 |
20%|██ | 370/1840 [00:04<00:15, 93.79it/s]
|
1417 |
21%|██ | 380/1840 [00:04<00:15, 94.45it/s]
|
1418 |
21%|██ | 390/1840 [00:04<00:15, 94.48it/s]
|
1419 |
22%|██▏ | 400/1840 [00:04<00:15, 94.37it/s]
|
1420 |
22%|██▏ | 410/1840 [00:04<00:15, 94.57it/s]
|
1421 |
23%|██▎ | 420/1840 [00:04<00:15, 90.65it/s]
|
1422 |
23%|██▎ | 430/1840 [00:04<00:15, 90.83it/s]
|
1423 |
24%|██▍ | 440/1840 [00:04<00:15, 92.12it/s]
|
1424 |
24%|██▍ | 450/1840 [00:04<00:15, 91.31it/s]
|
1425 |
25%|██▌ | 460/1840 [00:04<00:14, 93.20it/s]
|
1426 |
26%|██▌ | 470/1840 [00:05<00:14, 94.92it/s]
|
1427 |
26%|██▌ | 480/1840 [00:05<00:15, 90.56it/s]
|
1428 |
27%|██▋ | 490/1840 [00:05<00:14, 91.66it/s]
|
1429 |
27%|██▋ | 500/1840 [00:05<00:14, 90.02it/s]
|
1430 |
28%|██▊ | 510/1840 [00:05<00:14, 90.04it/s]
|
1431 |
28%|██▊ | 520/1840 [00:05<00:14, 91.31it/s]
|
1432 |
29%|██▉ | 530/1840 [00:05<00:14, 93.38it/s]
|
1433 |
29%|██▉ | 540/1840 [00:05<00:14, 91.96it/s]
|
1434 |
30%|██▉ | 550/1840 [00:05<00:13, 92.37it/s]
|
1435 |
30%|███ | 560/1840 [00:06<00:13, 91.82it/s]
|
1436 |
31%|███ | 570/1840 [00:06<00:13, 93.85it/s]
|
1437 |
32%|███▏ | 580/1840 [00:06<00:13, 95.51it/s]
|
1438 |
32%|███▏ | 590/1840 [00:06<00:13, 94.30it/s]
|
1439 |
33%|███▎ | 600/1840 [00:06<00:13, 92.23it/s]
|
1440 |
33%|███▎ | 610/1840 [00:06<00:13, 92.56it/s]
|
1441 |
34%|███▎ | 620/1840 [00:06<00:13, 90.55it/s]
|
1442 |
34%|███▍ | 630/1840 [00:06<00:13, 90.47it/s]
|
1443 |
35%|███▍ | 640/1840 [00:06<00:13, 92.07it/s]
|
1444 |
35%|███▌ | 650/1840 [00:07<00:12, 92.80it/s]
|
1445 |
36%|███▌ | 660/1840 [00:07<00:13, 90.37it/s]
|
1446 |
36%|███▋ | 670/1840 [00:07<00:12, 91.10it/s]
|
1447 |
37%|███▋ | 680/1840 [00:07<00:12, 90.92it/s]
|
1448 |
38%|███▊ | 690/1840 [00:07<00:12, 91.68it/s]
|
1449 |
38%|███▊ | 700/1840 [00:07<00:12, 91.93it/s]
|
1450 |
39%|███▊ | 710/1840 [00:07<00:12, 90.93it/s]
|
1451 |
39%|███▉ | 720/1840 [00:07<00:12, 93.12it/s]
|
1452 |
40%|███▉ | 731/1840 [00:07<00:11, 95.27it/s]
|
1453 |
40%|████ | 741/1840 [00:08<00:11, 96.12it/s]
|
1454 |
41%|████ | 751/1840 [00:08<00:11, 95.23it/s]
|
1455 |
41%|████▏ | 761/1840 [00:08<00:11, 95.80it/s]
|
1456 |
42%|████▏ | 771/1840 [00:08<00:11, 94.59it/s]
|
1457 |
42%|████▏ | 781/1840 [00:08<00:11, 94.61it/s]
|
1458 |
43%|████▎ | 791/1840 [00:08<00:11, 92.06it/s]
|
1459 |
44%|████▎ | 801/1840 [00:08<00:11, 92.15it/s]
|
1460 |
44%|████▍ | 811/1840 [00:08<00:11, 90.36it/s]
|
1461 |
45%|████▍ | 821/1840 [00:08<00:11, 90.07it/s]
|
1462 |
45%|████▌ | 831/1840 [00:09<00:11, 90.22it/s]
|
1463 |
46%|████▌ | 841/1840 [00:09<00:10, 90.83it/s]
|
1464 |
46%|████▋ | 851/1840 [00:09<00:10, 92.27it/s]
|
1465 |
47%|████▋ | 861/1840 [00:09<00:10, 93.30it/s]
|
1466 |
47%|████▋ | 871/1840 [00:09<00:10, 95.19it/s]
|
1467 |
48%|████▊ | 881/1840 [00:09<00:10, 94.02it/s]
|
1468 |
48%|████▊ | 891/1840 [00:09<00:10, 94.19it/s]
|
1469 |
49%|████▉ | 901/1840 [00:09<00:10, 93.31it/s]
|
1470 |
50%|████▉ | 911/1840 [00:09<00:09, 94.54it/s]
|
1471 |
50%|█████ | 921/1840 [00:09<00:09, 94.97it/s]
|
1472 |
51%|█████ | 931/1840 [00:10<00:09, 94.63it/s]
|
1473 |
51%|█████ | 941/1840 [00:10<00:09, 91.83it/s]
|
1474 |
52%|█████▏ | 951/1840 [00:10<00:09, 91.34it/s]
|
1475 |
52%|█████▏ | 961/1840 [00:10<00:09, 90.38it/s]
|
1476 |
53%|█████▎ | 971/1840 [00:10<00:09, 91.94it/s]
|
1477 |
53%|█████▎ | 981/1840 [00:10<00:09, 91.00it/s]
|
1478 |
54%|█████▍ | 991/1840 [00:10<00:09, 92.18it/s]
|
1479 |
54%|█████▍ | 1001/1840 [00:10<00:09, 92.24it/s]
|
1480 |
55%|█████▍ | 1011/1840 [00:10<00:08, 92.66it/s]
|
1481 |
55%|█████▌ | 1021/1840 [00:11<00:08, 94.00it/s]
|
1482 |
56%|█████▌ | 1031/1840 [00:11<00:08, 95.57it/s]
|
1483 |
57%|█████▋ | 1041/1840 [00:11<00:08, 94.84it/s]
|
1484 |
57%|█████▋ | 1051/1840 [00:11<00:08, 94.30it/s]
|
1485 |
58%|█████▊ | 1061/1840 [00:11<00:08, 93.87it/s]
|
1486 |
58%|█████▊ | 1071/1840 [00:11<00:08, 94.72it/s]
|
1487 |
59%|█████▉ | 1081/1840 [00:11<00:08, 85.95it/s]
|
1488 |
59%|█████▉ | 1091/1840 [00:11<00:08, 89.18it/s]
|
1489 |
60%|█████▉ | 1101/1840 [00:11<00:08, 91.72it/s]
|
1490 |
60%|██████ | 1111/1840 [00:12<00:07, 92.63it/s]
|
1491 |
61%|██████ | 1121/1840 [00:12<00:07, 92.18it/s]
|
1492 |
61%|██████▏ | 1131/1840 [00:12<00:07, 92.87it/s]
|
1493 |
62%|██████▏ | 1141/1840 [00:12<00:07, 92.80it/s]
|
1494 |
63%|██████▎ | 1151/1840 [00:12<00:07, 93.16it/s]
|
1495 |
63%|██████▎ | 1161/1840 [00:12<00:07, 94.60it/s]
|
1496 |
64%|██████▎ | 1171/1840 [00:12<00:07, 92.20it/s]
|
1497 |
64%|██████▍ | 1181/1840 [00:12<00:07, 92.67it/s]
|
1498 |
65%|██████▍ | 1191/1840 [00:12<00:07, 90.14it/s]
|
1499 |
65%|██████▌ | 1201/1840 [00:12<00:06, 91.67it/s]
|
1500 |
66%|██████▌ | 1211/1840 [00:13<00:06, 93.09it/s]
|
1501 |
66%|██████▋ | 1221/1840 [00:13<00:06, 91.59it/s]
|
1502 |
67%|██████▋ | 1231/1840 [00:13<00:06, 91.56it/s]
|
1503 |
67%|██████▋ | 1241/1840 [00:13<00:06, 92.51it/s]
|
1504 |
68%|██████▊ | 1251/1840 [00:13<00:06, 93.68it/s]
|
1505 |
69%|██████▊ | 1261/1840 [00:13<00:06, 91.47it/s]
|
1506 |
69%|██████▉ | 1271/1840 [00:13<00:06, 91.63it/s]
|
1507 |
70%|██████▉ | 1281/1840 [00:13<00:06, 92.14it/s]
|
1508 |
70%|███████ | 1291/1840 [00:13<00:05, 93.88it/s]
|
1509 |
71%|███████ | 1301/1840 [00:14<00:05, 94.62it/s]
|
1510 |
71%|███████▏ | 1312/1840 [00:14<00:05, 96.68it/s]
|
1511 |
72%|███████▏ | 1322/1840 [00:14<00:05, 96.97it/s]
|
1512 |
72%|███████▏ | 1332/1840 [00:14<00:05, 96.37it/s]
|
1513 |
73%|███████▎ | 1342/1840 [00:14<00:05, 96.05it/s]
|
1514 |
73%|███████▎ | 1352/1840 [00:14<00:05, 95.04it/s]
|
1515 |
74%|███████▍ | 1362/1840 [00:14<00:05, 94.28it/s]
|
1516 |
75%|███████▍ | 1372/1840 [00:14<00:04, 94.95it/s]
|
1517 |
75%|███████▌ | 1382/1840 [00:14<00:04, 94.71it/s]
|
1518 |
76%|███████▌ | 1393/1840 [00:15<00:04, 96.04it/s]
|
1519 |
76%|███████▋ | 1403/1840 [00:15<00:04, 96.26it/s]
|
1520 |
77%|███████▋ | 1414/1840 [00:15<00:04, 97.83it/s]
|
1521 |
77%|███████▋ | 1425/1840 [00:15<00:04, 99.04it/s]
|
1522 |
78%|███████▊ | 1435/1840 [00:15<00:04, 97.69it/s]
|
1523 |
79%|███████▊ | 1445/1840 [00:15<00:04, 94.55it/s]
|
1524 |
79%|███████▉ | 1455/1840 [00:15<00:04, 93.85it/s]
|
1525 |
80%|███████▉ | 1466/1840 [00:15<00:03, 95.88it/s]
|
1526 |
80%|████████ | 1476/1840 [00:15<00:03, 96.72it/s]
|
1527 |
81%|████████ | 1486/1840 [00:15<00:03, 92.87it/s]
|
1528 |
81%|████████▏ | 1496/1840 [00:16<00:03, 92.19it/s]
|
1529 |
82%|████████▏ | 1506/1840 [00:16<00:03, 90.11it/s]
|
1530 |
82%|████████▏ | 1516/1840 [00:16<00:03, 89.92it/s]
|
1531 |
83%|████████▎ | 1526/1840 [00:16<00:03, 91.95it/s]
|
1532 |
83%|████████▎ | 1536/1840 [00:16<00:03, 90.92it/s]
|
1533 |
84%|████████▍ | 1546/1840 [00:16<00:03, 90.96it/s]
|
1534 |
85%|████████▍ | 1556/1840 [00:16<00:03, 90.87it/s]
|
1535 |
85%|████████▌ | 1566/1840 [00:16<00:02, 91.37it/s]
|
1536 |
86%|████████▌ | 1576/1840 [00:16<00:02, 90.52it/s]
|
1537 |
86%|████████▌ | 1586/1840 [00:17<00:02, 90.93it/s]
|
1538 |
87%|████████▋ | 1596/1840 [00:17<00:02, 91.78it/s]
|
1539 |
87%|████████▋ | 1606/1840 [00:17<00:02, 92.49it/s]
|
1540 |
88%|████████▊ | 1616/1840 [00:17<00:02, 93.36it/s]
|
1541 |
88%|████████▊ | 1626/1840 [00:17<00:02, 92.76it/s]
|
1542 |
89%|████████▉ | 1636/1840 [00:17<00:02, 86.37it/s]
|
1543 |
89%|████████▉ | 1646/1840 [00:17<00:02, 87.94it/s]
|
1544 |
90%|█████████ | 1656/1840 [00:17<00:02, 89.41it/s]
|
1545 |
91%|█████████ | 1666/1840 [00:17<00:01, 90.62it/s]
|
1546 |
91%|█████████ | 1676/1840 [00:18<00:01, 90.24it/s]
|
1547 |
92%|█████████▏| 1686/1840 [00:18<00:01, 90.23it/s]
|
1548 |
92%|█████████▏| 1696/1840 [00:18<00:01, 92.31it/s]
|
1549 |
93%|█████████▎| 1706/1840 [00:18<00:01, 91.76it/s]
|
1550 |
93%|█████████▎| 1716/1840 [00:18<00:01, 91.75it/s]
|
1551 |
94%|█████████▍| 1726/1840 [00:18<00:01, 92.98it/s]
|
1552 |
94%|█████████▍| 1736/1840 [00:18<00:01, 90.98it/s]
|
1553 |
95%|█████████▍| 1746/1840 [00:18<00:01, 91.89it/s]
|
1554 |
95%|█████████▌| 1756/1840 [00:18<00:00, 93.73it/s]
|
1555 |
96%|█████████▌| 1766/1840 [00:19<00:00, 95.14it/s]
|
1556 |
97%|█████████▋| 1776/1840 [00:19<00:00, 94.88it/s]
|
1557 |
97%|█████████▋| 1786/1840 [00:19<00:00, 95.14it/s]
|
1558 |
98%|█████████▊| 1796/1840 [00:19<00:00, 96.49it/s]
|
1559 |
98%|█████████▊| 1806/1840 [00:19<00:00, 94.16it/s]
|
1560 |
99%|█████████▊| 1816/1840 [00:19<00:00, 93.01it/s]
|
1561 |
99%|█████████▉| 1826/1840 [00:19<00:00, 94.52it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1289 |
{'eval_loss': 0.008253143168985844, 'eval_precision': 0.921028466483012, 'eval_recall': 0.934762348555452, 'eval_f1': 0.9278445883441258, 'eval_accuracy': 0.9986883598917199, 'eval_runtime': 13.7257, 'eval_samples_per_second': 506.057, 'eval_steps_per_second': 63.312, 'epoch': 10.0}
|
1290 |
{'train_runtime': 1249.6681, 'train_samples_per_second': 257.924, 'train_steps_per_second': 4.033, 'train_loss': 0.0030765269683407886, 'epoch': 10.0}
|
1291 |
|
1292 |
+
***** train metrics *****
|
1293 |
+
epoch = 10.0
|
1294 |
+
total_flos = 12985623GF
|
1295 |
+
train_loss = 0.0031
|
1296 |
+
train_runtime = 0:20:49.66
|
1297 |
+
train_samples = 32232
|
1298 |
+
train_samples_per_second = 257.924
|
1299 |
+
train_steps_per_second = 4.033
|
1300 |
+
09/05/2024 09:17:04 - INFO - __main__ - *** Evaluate ***
|
1301 |
+
[INFO|trainer.py:811] 2024-09-05 09:17:04,803 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
1302 |
+
[INFO|trainer.py:3819] 2024-09-05 09:17:04,805 >>
|
1303 |
+
***** Running Evaluation *****
|
1304 |
+
[INFO|trainer.py:3821] 2024-09-05 09:17:04,805 >> Num examples = 6946
|
1305 |
+
[INFO|trainer.py:3824] 2024-09-05 09:17:04,805 >> Batch size = 8
|
1306 |
+
|
1307 |
0%| | 0/869 [00:00<?, ?it/s]
|
1308 |
1%| | 10/869 [00:00<00:08, 97.10it/s]
|
1309 |
2%|▏ | 20/869 [00:00<00:09, 90.01it/s]
|
1310 |
3%|▎ | 30/869 [00:00<00:09, 89.18it/s]
|
1311 |
5%|▍ | 40/869 [00:00<00:09, 90.30it/s]
|
1312 |
6%|▌ | 50/869 [00:00<00:08, 92.35it/s]
|
1313 |
7%|▋ | 60/869 [00:00<00:08, 92.23it/s]
|
1314 |
8%|▊ | 70/869 [00:00<00:08, 91.59it/s]
|
1315 |
9%|▉ | 81/869 [00:00<00:08, 94.55it/s]
|
1316 |
10%|█ | 91/869 [00:00<00:08, 96.01it/s]
|
1317 |
12%|█▏ | 101/869 [00:01<00:07, 97.12it/s]
|
1318 |
13%|█▎ | 111/869 [00:01<00:07, 94.84it/s]
|
1319 |
14%|█▍ | 121/869 [00:01<00:07, 95.04it/s]
|
1320 |
15%|█▌ | 131/869 [00:01<00:07, 93.42it/s]
|
1321 |
16%|█▌ | 141/869 [00:01<00:07, 93.24it/s]
|
1322 |
17%|█▋ | 151/869 [00:01<00:07, 93.28it/s]
|
1323 |
19%|█▊ | 161/869 [00:01<00:07, 92.79it/s]
|
1324 |
20%|█▉ | 171/869 [00:01<00:07, 91.74it/s]
|
1325 |
21%|██ | 181/869 [00:01<00:07, 92.14it/s]
|
1326 |
22%|██▏ | 191/869 [00:02<00:07, 91.28it/s]
|
1327 |
23%|██▎ | 201/869 [00:02<00:07, 91.88it/s]
|
1328 |
24%|██▍ | 211/869 [00:02<00:07, 92.23it/s]
|
1329 |
25%|██▌ | 221/869 [00:02<00:06, 93.41it/s]
|
1330 |
27%|██▋ | 231/869 [00:02<00:06, 92.39it/s]
|
1331 |
28%|██▊ | 241/869 [00:02<00:07, 86.85it/s]
|
1332 |
29%|██▉ | 251/869 [00:02<00:06, 89.50it/s]
|
1333 |
30%|███ | 261/869 [00:02<00:06, 92.32it/s]
|
1334 |
31%|███ | 271/869 [00:02<00:06, 92.37it/s]
|
1335 |
32%|███▏ | 281/869 [00:03<00:06, 91.65it/s]
|
1336 |
33%|███▎ | 291/869 [00:03<00:06, 91.10it/s]
|
1337 |
35%|███▍ | 301/869 [00:03<00:06, 90.53it/s]
|
1338 |
36%|███▌ | 311/869 [00:03<00:06, 90.71it/s]
|
1339 |
37%|███▋ | 321/869 [00:03<00:06, 90.54it/s]
|
1340 |
38%|███▊ | 331/869 [00:03<00:05, 92.74it/s]
|
1341 |
39%|███▉ | 341/869 [00:03<00:05, 92.85it/s]
|
1342 |
40%|████ | 351/869 [00:03<00:05, 89.75it/s]
|
1343 |
42%|████▏ | 361/869 [00:03<00:05, 91.39it/s]
|
1344 |
43%|████▎ | 371/869 [00:04<00:05, 92.76it/s]
|
1345 |
44%|████▍ | 381/869 [00:04<00:05, 90.33it/s]
|
1346 |
45%|████▍ | 391/869 [00:04<00:05, 87.76it/s]
|
1347 |
46%|████▌ | 401/869 [00:04<00:05, 89.51it/s]
|
1348 |
47%|████▋ | 411/869 [00:04<00:04, 91.66it/s]
|
1349 |
48%|████▊ | 421/869 [00:04<00:04, 92.04it/s]
|
1350 |
50%|████▉ | 431/869 [00:04<00:04, 93.80it/s]
|
1351 |
51%|█████ | 441/869 [00:04<00:04, 90.42it/s]
|
1352 |
52%|█████▏ | 451/869 [00:04<00:04, 90.86it/s]
|
1353 |
53%|█████▎ | 461/869 [00:05<00:04, 90.51it/s]
|
1354 |
54%|█████▍ | 471/869 [00:05<00:04, 92.69it/s]
|
1355 |
55%|█████▌ | 482/869 [00:05<00:04, 95.27it/s]
|
1356 |
57%|█████▋ | 492/869 [00:05<00:04, 92.82it/s]
|
1357 |
58%|█████▊ | 502/869 [00:05<00:04, 91.33it/s]
|
1358 |
59%|█████▉ | 512/869 [00:05<00:03, 93.04it/s]
|
1359 |
60%|██████ | 522/869 [00:05<00:03, 90.12it/s]
|
1360 |
61%|██████ | 532/869 [00:05<00:03, 91.17it/s]
|
1361 |
62%|██████▏ | 542/869 [00:05<00:03, 90.57it/s]
|
1362 |
64%|██████▎ | 552/869 [00:06<00:03, 92.71it/s]
|
1363 |
65%|██████▍ | 562/869 [00:06<00:03, 94.03it/s]
|
1364 |
66%|██████▌ | 572/869 [00:06<00:03, 93.29it/s]
|
1365 |
67%|██████▋ | 582/869 [00:06<00:03, 91.83it/s]
|
1366 |
68%|██████▊ | 592/869 [00:06<00:03, 91.81it/s]
|
1367 |
69%|██████▉ | 602/869 [00:06<00:02, 93.95it/s]
|
1368 |
70%|███████ | 612/869 [00:06<00:02, 92.85it/s]
|
1369 |
72%|███████▏ | 622/869 [00:06<00:02, 92.26it/s]
|
1370 |
73%|███████▎ | 632/869 [00:06<00:02, 92.74it/s]
|
1371 |
74%|███████▍ | 642/869 [00:06<00:02, 91.09it/s]
|
1372 |
75%|███████▌ | 652/869 [00:07<00:02, 92.01it/s]
|
1373 |
76%|███████▌ | 662/869 [00:07<00:02, 91.29it/s]
|
1374 |
77%|███████▋ | 672/869 [00:07<00:02, 93.02it/s]
|
1375 |
78%|███████▊ | 682/869 [00:07<00:02, 86.69it/s]
|
1376 |
80%|███████▉ | 692/869 [00:07<00:01, 89.55it/s]
|
1377 |
81%|████████ | 702/869 [00:07<00:01, 89.54it/s]
|
1378 |
82%|████████▏ | 712/869 [00:07<00:01, 91.99it/s]
|
1379 |
83%|████████▎ | 722/869 [00:07<00:01, 91.92it/s]
|
1380 |
84%|████████▍ | 732/869 [00:07<00:01, 93.22it/s]
|
1381 |
85%|████████▌ | 742/869 [00:08<00:01, 93.18it/s]
|
1382 |
87%|████████▋ | 752/869 [00:08<00:01, 92.20it/s]
|
1383 |
88%|████████▊ | 763/869 [00:08<00:01, 95.00it/s]
|
1384 |
89%|████████▉ | 773/869 [00:08<00:01, 92.74it/s]
|
1385 |
90%|█████████ | 783/869 [00:08<00:00, 87.07it/s]
|
1386 |
91%|█████████▏| 793/869 [00:08<00:00, 89.65it/s]
|
1387 |
92%|█████████▏| 803/869 [00:08<00:00, 91.00it/s]
|
1388 |
94%|█████████▎| 813/869 [00:08<00:00, 90.83it/s]
|
1389 |
95%|█████████▍| 823/869 [00:08<00:00, 91.40it/s]
|
1390 |
96%|█████████▌| 833/869 [00:09<00:00, 90.29it/s]
|
1391 |
97%|█████████▋| 843/869 [00:09<00:00, 92.01it/s]
|
1392 |
98%|█████████▊| 853/869 [00:09<00:00, 93.22it/s]
|
1393 |
99%|█████████▉| 863/869 [00:09<00:00, 90.24it/s]
|
1394 |
+
***** eval metrics *****
|
1395 |
+
epoch = 10.0
|
1396 |
+
eval_accuracy = 0.9987
|
1397 |
+
eval_f1 = 0.9303
|
1398 |
+
eval_loss = 0.0065
|
1399 |
+
eval_precision = 0.9342
|
1400 |
+
eval_recall = 0.9264
|
1401 |
+
eval_runtime = 0:00:13.18
|
1402 |
+
eval_samples = 6946
|
1403 |
+
eval_samples_per_second = 526.688
|
1404 |
+
eval_steps_per_second = 65.893
|
1405 |
+
09/05/2024 09:17:17 - INFO - __main__ - *** Predict ***
|
1406 |
+
[INFO|trainer.py:811] 2024-09-05 09:17:17,999 >> The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens, id, ner_tags. If tokens, id, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
1407 |
+
[INFO|trainer.py:3819] 2024-09-05 09:17:18,002 >>
|
1408 |
+
***** Running Prediction *****
|
1409 |
+
[INFO|trainer.py:3821] 2024-09-05 09:17:18,002 >> Num examples = 14715
|
1410 |
+
[INFO|trainer.py:3824] 2024-09-05 09:17:18,002 >> Batch size = 8
|
1411 |
+
|
1412 |
0%| | 0/1840 [00:00<?, ?it/s]
|
1413 |
1%| | 11/1840 [00:00<00:17, 101.79it/s]
|
1414 |
1%| | 22/1840 [00:00<00:19, 91.52it/s]
|
1415 |
2%|▏ | 32/1840 [00:00<00:19, 94.89it/s]
|
1416 |
2%|▏ | 42/1840 [00:00<00:18, 94.92it/s]
|
1417 |
3%|▎ | 52/1840 [00:00<00:18, 94.68it/s]
|
1418 |
3%|▎ | 62/1840 [00:00<00:19, 91.54it/s]
|
1419 |
4%|▍ | 72/1840 [00:00<00:19, 91.04it/s]
|
1420 |
4%|▍ | 82/1840 [00:00<00:19, 92.43it/s]
|
1421 |
5%|▌ | 92/1840 [00:00<00:19, 90.95it/s]
|
1422 |
6%|▌ | 102/1840 [00:01<00:18, 92.60it/s]
|
1423 |
6%|▌ | 112/1840 [00:01<00:18, 93.15it/s]
|
1424 |
7%|▋ | 122/1840 [00:01<00:18, 92.68it/s]
|
1425 |
7%|▋ | 132/1840 [00:01<00:18, 92.00it/s]
|
1426 |
8%|▊ | 142/1840 [00:01<00:18, 91.89it/s]
|
1427 |
8%|▊ | 152/1840 [00:01<00:18, 92.54it/s]
|
1428 |
9%|▉ | 162/1840 [00:01<00:18, 89.05it/s]
|
1429 |
9%|▉ | 172/1840 [00:01<00:18, 90.17it/s]
|
1430 |
10%|▉ | 182/1840 [00:01<00:17, 92.61it/s]
|
1431 |
10%|█ | 192/1840 [00:02<00:17, 93.99it/s]
|
1432 |
11%|█ | 202/1840 [00:02<00:17, 92.45it/s]
|
1433 |
12%|█▏ | 212/1840 [00:02<00:17, 92.36it/s]
|
1434 |
12%|█▏ | 222/1840 [00:02<00:17, 91.82it/s]
|
1435 |
13%|█▎ | 232/1840 [00:02<00:17, 90.21it/s]
|
1436 |
13%|█▎ | 242/1840 [00:02<00:18, 87.53it/s]
|
1437 |
14%|█▎ | 251/1840 [00:02<00:18, 87.80it/s]
|
1438 |
14%|█▍ | 260/1840 [00:02<00:18, 87.51it/s]
|
1439 |
15%|█▍ | 270/1840 [00:02<00:17, 89.69it/s]
|
1440 |
15%|█▌ | 280/1840 [00:03<00:16, 92.41it/s]
|
1441 |
16%|█▌ | 290/1840 [00:03<00:16, 92.07it/s]
|
1442 |
16%|█▋ | 300/1840 [00:03<00:16, 93.43it/s]
|
1443 |
17%|█▋ | 310/1840 [00:03<00:16, 93.16it/s]
|
1444 |
17%|█▋ | 320/1840 [00:03<00:16, 93.68it/s]
|
1445 |
18%|█▊ | 330/1840 [00:03<00:15, 95.47it/s]
|
1446 |
18%|█▊ | 340/1840 [00:03<00:15, 95.33it/s]
|
1447 |
19%|█▉ | 350/1840 [00:03<00:15, 95.92it/s]
|
1448 |
20%|█▉ | 360/1840 [00:03<00:15, 93.05it/s]
|
1449 |
20%|██ | 370/1840 [00:04<00:15, 93.79it/s]
|
1450 |
21%|██ | 380/1840 [00:04<00:15, 94.45it/s]
|
1451 |
21%|██ | 390/1840 [00:04<00:15, 94.48it/s]
|
1452 |
22%|██▏ | 400/1840 [00:04<00:15, 94.37it/s]
|
1453 |
22%|██▏ | 410/1840 [00:04<00:15, 94.57it/s]
|
1454 |
23%|██▎ | 420/1840 [00:04<00:15, 90.65it/s]
|
1455 |
23%|██▎ | 430/1840 [00:04<00:15, 90.83it/s]
|
1456 |
24%|██▍ | 440/1840 [00:04<00:15, 92.12it/s]
|
1457 |
24%|██▍ | 450/1840 [00:04<00:15, 91.31it/s]
|
1458 |
25%|██▌ | 460/1840 [00:04<00:14, 93.20it/s]
|
1459 |
26%|██▌ | 470/1840 [00:05<00:14, 94.92it/s]
|
1460 |
26%|██▌ | 480/1840 [00:05<00:15, 90.56it/s]
|
1461 |
27%|██▋ | 490/1840 [00:05<00:14, 91.66it/s]
|
1462 |
27%|██▋ | 500/1840 [00:05<00:14, 90.02it/s]
|
1463 |
28%|██▊ | 510/1840 [00:05<00:14, 90.04it/s]
|
1464 |
28%|██▊ | 520/1840 [00:05<00:14, 91.31it/s]
|
1465 |
29%|██▉ | 530/1840 [00:05<00:14, 93.38it/s]
|
1466 |
29%|██▉ | 540/1840 [00:05<00:14, 91.96it/s]
|
1467 |
30%|██▉ | 550/1840 [00:05<00:13, 92.37it/s]
|
1468 |
30%|███ | 560/1840 [00:06<00:13, 91.82it/s]
|
1469 |
31%|███ | 570/1840 [00:06<00:13, 93.85it/s]
|
1470 |
32%|███▏ | 580/1840 [00:06<00:13, 95.51it/s]
|
1471 |
32%|███▏ | 590/1840 [00:06<00:13, 94.30it/s]
|
1472 |
33%|███▎ | 600/1840 [00:06<00:13, 92.23it/s]
|
1473 |
33%|███▎ | 610/1840 [00:06<00:13, 92.56it/s]
|
1474 |
34%|███▎ | 620/1840 [00:06<00:13, 90.55it/s]
|
1475 |
34%|███▍ | 630/1840 [00:06<00:13, 90.47it/s]
|
1476 |
35%|███▍ | 640/1840 [00:06<00:13, 92.07it/s]
|
1477 |
35%|███▌ | 650/1840 [00:07<00:12, 92.80it/s]
|
1478 |
36%|███▌ | 660/1840 [00:07<00:13, 90.37it/s]
|
1479 |
36%|███▋ | 670/1840 [00:07<00:12, 91.10it/s]
|
1480 |
37%|███▋ | 680/1840 [00:07<00:12, 90.92it/s]
|
1481 |
38%|███▊ | 690/1840 [00:07<00:12, 91.68it/s]
|
1482 |
38%|███▊ | 700/1840 [00:07<00:12, 91.93it/s]
|
1483 |
39%|███▊ | 710/1840 [00:07<00:12, 90.93it/s]
|
1484 |
39%|███▉ | 720/1840 [00:07<00:12, 93.12it/s]
|
1485 |
40%|███▉ | 731/1840 [00:07<00:11, 95.27it/s]
|
1486 |
40%|████ | 741/1840 [00:08<00:11, 96.12it/s]
|
1487 |
41%|████ | 751/1840 [00:08<00:11, 95.23it/s]
|
1488 |
41%|████▏ | 761/1840 [00:08<00:11, 95.80it/s]
|
1489 |
42%|████▏ | 771/1840 [00:08<00:11, 94.59it/s]
|
1490 |
42%|████▏ | 781/1840 [00:08<00:11, 94.61it/s]
|
1491 |
43%|████▎ | 791/1840 [00:08<00:11, 92.06it/s]
|
1492 |
44%|████▎ | 801/1840 [00:08<00:11, 92.15it/s]
|
1493 |
44%|████▍ | 811/1840 [00:08<00:11, 90.36it/s]
|
1494 |
45%|████▍ | 821/1840 [00:08<00:11, 90.07it/s]
|
1495 |
45%|████▌ | 831/1840 [00:09<00:11, 90.22it/s]
|
1496 |
46%|████▌ | 841/1840 [00:09<00:10, 90.83it/s]
|
1497 |
46%|████▋ | 851/1840 [00:09<00:10, 92.27it/s]
|
1498 |
47%|████▋ | 861/1840 [00:09<00:10, 93.30it/s]
|
1499 |
47%|████▋ | 871/1840 [00:09<00:10, 95.19it/s]
|
1500 |
48%|████▊ | 881/1840 [00:09<00:10, 94.02it/s]
|
1501 |
48%|████▊ | 891/1840 [00:09<00:10, 94.19it/s]
|
1502 |
49%|████▉ | 901/1840 [00:09<00:10, 93.31it/s]
|
1503 |
50%|████▉ | 911/1840 [00:09<00:09, 94.54it/s]
|
1504 |
50%|█████ | 921/1840 [00:09<00:09, 94.97it/s]
|
1505 |
51%|█████ | 931/1840 [00:10<00:09, 94.63it/s]
|
1506 |
51%|█████ | 941/1840 [00:10<00:09, 91.83it/s]
|
1507 |
52%|█████▏ | 951/1840 [00:10<00:09, 91.34it/s]
|
1508 |
52%|█████▏ | 961/1840 [00:10<00:09, 90.38it/s]
|
1509 |
53%|█████▎ | 971/1840 [00:10<00:09, 91.94it/s]
|
1510 |
53%|█████▎ | 981/1840 [00:10<00:09, 91.00it/s]
|
1511 |
54%|█████▍ | 991/1840 [00:10<00:09, 92.18it/s]
|
1512 |
54%|█████▍ | 1001/1840 [00:10<00:09, 92.24it/s]
|
1513 |
55%|█████▍ | 1011/1840 [00:10<00:08, 92.66it/s]
|
1514 |
55%|█████▌ | 1021/1840 [00:11<00:08, 94.00it/s]
|
1515 |
56%|█████▌ | 1031/1840 [00:11<00:08, 95.57it/s]
|
1516 |
57%|█████▋ | 1041/1840 [00:11<00:08, 94.84it/s]
|
1517 |
57%|█████▋ | 1051/1840 [00:11<00:08, 94.30it/s]
|
1518 |
58%|█████▊ | 1061/1840 [00:11<00:08, 93.87it/s]
|
1519 |
58%|█████▊ | 1071/1840 [00:11<00:08, 94.72it/s]
|
1520 |
59%|█████▉ | 1081/1840 [00:11<00:08, 85.95it/s]
|
1521 |
59%|█████▉ | 1091/1840 [00:11<00:08, 89.18it/s]
|
1522 |
60%|█████▉ | 1101/1840 [00:11<00:08, 91.72it/s]
|
1523 |
60%|██████ | 1111/1840 [00:12<00:07, 92.63it/s]
|
1524 |
61%|██████ | 1121/1840 [00:12<00:07, 92.18it/s]
|
1525 |
61%|██████▏ | 1131/1840 [00:12<00:07, 92.87it/s]
|
1526 |
62%|██████▏ | 1141/1840 [00:12<00:07, 92.80it/s]
|
1527 |
63%|██████▎ | 1151/1840 [00:12<00:07, 93.16it/s]
|
1528 |
63%|██████▎ | 1161/1840 [00:12<00:07, 94.60it/s]
|
1529 |
64%|██████▎ | 1171/1840 [00:12<00:07, 92.20it/s]
|
1530 |
64%|██████▍ | 1181/1840 [00:12<00:07, 92.67it/s]
|
1531 |
65%|██████▍ | 1191/1840 [00:12<00:07, 90.14it/s]
|
1532 |
65%|██████▌ | 1201/1840 [00:12<00:06, 91.67it/s]
|
1533 |
66%|██████▌ | 1211/1840 [00:13<00:06, 93.09it/s]
|
1534 |
66%|██████▋ | 1221/1840 [00:13<00:06, 91.59it/s]
|
1535 |
67%|██████▋ | 1231/1840 [00:13<00:06, 91.56it/s]
|
1536 |
67%|██████▋ | 1241/1840 [00:13<00:06, 92.51it/s]
|
1537 |
68%|██████▊ | 1251/1840 [00:13<00:06, 93.68it/s]
|
1538 |
69%|██████▊ | 1261/1840 [00:13<00:06, 91.47it/s]
|
1539 |
69%|██████▉ | 1271/1840 [00:13<00:06, 91.63it/s]
|
1540 |
70%|██████▉ | 1281/1840 [00:13<00:06, 92.14it/s]
|
1541 |
70%|███████ | 1291/1840 [00:13<00:05, 93.88it/s]
|
1542 |
71%|███████ | 1301/1840 [00:14<00:05, 94.62it/s]
|
1543 |
71%|███████▏ | 1312/1840 [00:14<00:05, 96.68it/s]
|
1544 |
72%|███████▏ | 1322/1840 [00:14<00:05, 96.97it/s]
|
1545 |
72%|███████▏ | 1332/1840 [00:14<00:05, 96.37it/s]
|
1546 |
73%|███████▎ | 1342/1840 [00:14<00:05, 96.05it/s]
|
1547 |
73%|███████▎ | 1352/1840 [00:14<00:05, 95.04it/s]
|
1548 |
74%|███████▍ | 1362/1840 [00:14<00:05, 94.28it/s]
|
1549 |
75%|███████▍ | 1372/1840 [00:14<00:04, 94.95it/s]
|
1550 |
75%|███████▌ | 1382/1840 [00:14<00:04, 94.71it/s]
|
1551 |
76%|███████▌ | 1393/1840 [00:15<00:04, 96.04it/s]
|
1552 |
76%|███████▋ | 1403/1840 [00:15<00:04, 96.26it/s]
|
1553 |
77%|███████▋ | 1414/1840 [00:15<00:04, 97.83it/s]
|
1554 |
77%|███████▋ | 1425/1840 [00:15<00:04, 99.04it/s]
|
1555 |
78%|███████▊ | 1435/1840 [00:15<00:04, 97.69it/s]
|
1556 |
79%|███████▊ | 1445/1840 [00:15<00:04, 94.55it/s]
|
1557 |
79%|███████▉ | 1455/1840 [00:15<00:04, 93.85it/s]
|
1558 |
80%|███████▉ | 1466/1840 [00:15<00:03, 95.88it/s]
|
1559 |
80%|████████ | 1476/1840 [00:15<00:03, 96.72it/s]
|
1560 |
81%|████████ | 1486/1840 [00:15<00:03, 92.87it/s]
|
1561 |
81%|████████▏ | 1496/1840 [00:16<00:03, 92.19it/s]
|
1562 |
82%|████████▏ | 1506/1840 [00:16<00:03, 90.11it/s]
|
1563 |
82%|████████▏ | 1516/1840 [00:16<00:03, 89.92it/s]
|
1564 |
83%|████████▎ | 1526/1840 [00:16<00:03, 91.95it/s]
|
1565 |
83%|████████▎ | 1536/1840 [00:16<00:03, 90.92it/s]
|
1566 |
84%|████████▍ | 1546/1840 [00:16<00:03, 90.96it/s]
|
1567 |
85%|████████▍ | 1556/1840 [00:16<00:03, 90.87it/s]
|
1568 |
85%|████████▌ | 1566/1840 [00:16<00:02, 91.37it/s]
|
1569 |
86%|████████▌ | 1576/1840 [00:16<00:02, 90.52it/s]
|
1570 |
86%|████████▌ | 1586/1840 [00:17<00:02, 90.93it/s]
|
1571 |
87%|████████▋ | 1596/1840 [00:17<00:02, 91.78it/s]
|
1572 |
87%|████████▋ | 1606/1840 [00:17<00:02, 92.49it/s]
|
1573 |
88%|████████▊ | 1616/1840 [00:17<00:02, 93.36it/s]
|
1574 |
88%|████████▊ | 1626/1840 [00:17<00:02, 92.76it/s]
|
1575 |
89%|████████▉ | 1636/1840 [00:17<00:02, 86.37it/s]
|
1576 |
89%|████████▉ | 1646/1840 [00:17<00:02, 87.94it/s]
|
1577 |
90%|█████████ | 1656/1840 [00:17<00:02, 89.41it/s]
|
1578 |
91%|█████████ | 1666/1840 [00:17<00:01, 90.62it/s]
|
1579 |
91%|█████████ | 1676/1840 [00:18<00:01, 90.24it/s]
|
1580 |
92%|█████████▏| 1686/1840 [00:18<00:01, 90.23it/s]
|
1581 |
92%|█████████▏| 1696/1840 [00:18<00:01, 92.31it/s]
|
1582 |
93%|█████████▎| 1706/1840 [00:18<00:01, 91.76it/s]
|
1583 |
93%|█████████▎| 1716/1840 [00:18<00:01, 91.75it/s]
|
1584 |
94%|█████████▍| 1726/1840 [00:18<00:01, 92.98it/s]
|
1585 |
94%|█████████▍| 1736/1840 [00:18<00:01, 90.98it/s]
|
1586 |
95%|█████████▍| 1746/1840 [00:18<00:01, 91.89it/s]
|
1587 |
95%|█████████▌| 1756/1840 [00:18<00:00, 93.73it/s]
|
1588 |
96%|█████████▌| 1766/1840 [00:19<00:00, 95.14it/s]
|
1589 |
97%|█████████▋| 1776/1840 [00:19<00:00, 94.88it/s]
|
1590 |
97%|█████████▋| 1786/1840 [00:19<00:00, 95.14it/s]
|
1591 |
98%|█████████▊| 1796/1840 [00:19<00:00, 96.49it/s]
|
1592 |
98%|█████████▊| 1806/1840 [00:19<00:00, 94.16it/s]
|
1593 |
99%|█████████▊| 1816/1840 [00:19<00:00, 93.01it/s]
|
1594 |
99%|█████████▉| 1826/1840 [00:19<00:00, 94.52it/s]
|
1595 |
+
[INFO|trainer.py:3503] 2024-09-05 09:17:44,719 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
1596 |
+
[INFO|configuration_utils.py:472] 2024-09-05 09:17:44,720 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
1597 |
+
[INFO|modeling_utils.py:2799] 2024-09-05 09:17:46,010 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
1598 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-05 09:17:46,011 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
1599 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-05 09:17:46,011 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
1600 |
+
***** predict metrics *****
|
1601 |
+
predict_accuracy = 0.9986
|
1602 |
+
predict_f1 = 0.9212
|
1603 |
+
predict_loss = 0.007
|
1604 |
+
predict_precision = 0.9025
|
1605 |
+
predict_recall = 0.9408
|
1606 |
+
predict_runtime = 0:00:26.17
|
1607 |
+
predict_samples_per_second = 562.112
|
1608 |
+
predict_steps_per_second = 70.288
|
1609 |
+
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"total_flos": 1.394320679130096e+16,
|
4 |
+
"train_loss": 0.0030765269683407886,
|
5 |
+
"train_runtime": 1249.6681,
|
6 |
+
"train_samples": 32232,
|
7 |
+
"train_samples_per_second": 257.924,
|
8 |
+
"train_steps_per_second": 4.033
|
9 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.930276087973795,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2016",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 5040,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9920634920634921,
|
13 |
+
"grad_norm": 0.06281786412000656,
|
14 |
+
"learning_rate": 4.503968253968254e-05,
|
15 |
+
"loss": 0.0189,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.9983953339100828,
|
21 |
+
"eval_f1": 0.9040358744394619,
|
22 |
+
"eval_loss": 0.00518822530284524,
|
23 |
+
"eval_precision": 0.8712186689714779,
|
24 |
+
"eval_recall": 0.9394221808014911,
|
25 |
+
"eval_runtime": 13.2131,
|
26 |
+
"eval_samples_per_second": 525.689,
|
27 |
+
"eval_steps_per_second": 65.768,
|
28 |
+
"step": 504
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 1.9841269841269842,
|
32 |
+
"grad_norm": 0.20689290761947632,
|
33 |
+
"learning_rate": 4.007936507936508e-05,
|
34 |
+
"loss": 0.0047,
|
35 |
+
"step": 1000
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.9986883598917199,
|
40 |
+
"eval_f1": 0.9244402985074627,
|
41 |
+
"eval_loss": 0.004834321793168783,
|
42 |
+
"eval_precision": 0.9253034547152195,
|
43 |
+
"eval_recall": 0.923578751164958,
|
44 |
+
"eval_runtime": 13.2434,
|
45 |
+
"eval_samples_per_second": 524.487,
|
46 |
+
"eval_steps_per_second": 65.618,
|
47 |
+
"step": 1008
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.9761904761904763,
|
51 |
+
"grad_norm": 0.02256501279771328,
|
52 |
+
"learning_rate": 3.511904761904762e-05,
|
53 |
+
"loss": 0.0027,
|
54 |
+
"step": 1500
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.9986255686099406,
|
59 |
+
"eval_f1": 0.9239384041063929,
|
60 |
+
"eval_loss": 0.005881821736693382,
|
61 |
+
"eval_precision": 0.9252336448598131,
|
62 |
+
"eval_recall": 0.9226467847157502,
|
63 |
+
"eval_runtime": 13.1715,
|
64 |
+
"eval_samples_per_second": 527.351,
|
65 |
+
"eval_steps_per_second": 65.976,
|
66 |
+
"step": 1512
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 3.9682539682539684,
|
70 |
+
"grad_norm": 0.0326182059943676,
|
71 |
+
"learning_rate": 3.0158730158730158e-05,
|
72 |
+
"loss": 0.0015,
|
73 |
+
"step": 2000
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 4.0,
|
77 |
+
"eval_accuracy": 0.9987162671280663,
|
78 |
+
"eval_f1": 0.930276087973795,
|
79 |
+
"eval_loss": 0.0064844791777431965,
|
80 |
+
"eval_precision": 0.9342105263157895,
|
81 |
+
"eval_recall": 0.9263746505125815,
|
82 |
+
"eval_runtime": 13.5939,
|
83 |
+
"eval_samples_per_second": 510.963,
|
84 |
+
"eval_steps_per_second": 63.926,
|
85 |
+
"step": 2016
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 4.9603174603174605,
|
89 |
+
"grad_norm": 0.1600140929222107,
|
90 |
+
"learning_rate": 2.5198412698412697e-05,
|
91 |
+
"loss": 0.0011,
|
92 |
+
"step": 2500
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 5.0,
|
96 |
+
"eval_accuracy": 0.9986116149917673,
|
97 |
+
"eval_f1": 0.923076923076923,
|
98 |
+
"eval_loss": 0.007346163038164377,
|
99 |
+
"eval_precision": 0.9072907290729073,
|
100 |
+
"eval_recall": 0.9394221808014911,
|
101 |
+
"eval_runtime": 13.3438,
|
102 |
+
"eval_samples_per_second": 520.543,
|
103 |
+
"eval_steps_per_second": 65.124,
|
104 |
+
"step": 2520
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 5.9523809523809526,
|
108 |
+
"grad_norm": 0.09641193598508835,
|
109 |
+
"learning_rate": 2.023809523809524e-05,
|
110 |
+
"loss": 0.0005,
|
111 |
+
"step": 3000
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"epoch": 6.0,
|
115 |
+
"eval_accuracy": 0.998444171573689,
|
116 |
+
"eval_f1": 0.9204281060958585,
|
117 |
+
"eval_loss": 0.009004838764667511,
|
118 |
+
"eval_precision": 0.9191449814126395,
|
119 |
+
"eval_recall": 0.9217148182665424,
|
120 |
+
"eval_runtime": 13.3195,
|
121 |
+
"eval_samples_per_second": 521.491,
|
122 |
+
"eval_steps_per_second": 65.243,
|
123 |
+
"step": 3024
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 6.944444444444445,
|
127 |
+
"grad_norm": 0.029784763231873512,
|
128 |
+
"learning_rate": 1.527777777777778e-05,
|
129 |
+
"loss": 0.0007,
|
130 |
+
"step": 3500
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"epoch": 7.0,
|
134 |
+
"eval_accuracy": 0.9985767309463344,
|
135 |
+
"eval_f1": 0.9190432382704691,
|
136 |
+
"eval_loss": 0.008385799825191498,
|
137 |
+
"eval_precision": 0.9073569482288828,
|
138 |
+
"eval_recall": 0.9310344827586207,
|
139 |
+
"eval_runtime": 13.4485,
|
140 |
+
"eval_samples_per_second": 516.487,
|
141 |
+
"eval_steps_per_second": 64.617,
|
142 |
+
"step": 3528
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 7.936507936507937,
|
146 |
+
"grad_norm": 0.0010318646673113108,
|
147 |
+
"learning_rate": 1.0317460317460318e-05,
|
148 |
+
"loss": 0.0004,
|
149 |
+
"step": 4000
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 8.0,
|
153 |
+
"eval_accuracy": 0.9985558005190746,
|
154 |
+
"eval_f1": 0.9213793103448276,
|
155 |
+
"eval_loss": 0.008501913398504257,
|
156 |
+
"eval_precision": 0.9092558983666061,
|
157 |
+
"eval_recall": 0.9338303821062441,
|
158 |
+
"eval_runtime": 13.3613,
|
159 |
+
"eval_samples_per_second": 519.86,
|
160 |
+
"eval_steps_per_second": 65.039,
|
161 |
+
"step": 4032
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 8.928571428571429,
|
165 |
+
"grad_norm": 0.0005677491426467896,
|
166 |
+
"learning_rate": 5.357142857142857e-06,
|
167 |
+
"loss": 0.0003,
|
168 |
+
"step": 4500
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 9.0,
|
172 |
+
"eval_accuracy": 0.9987023135098931,
|
173 |
+
"eval_f1": 0.9270544783010157,
|
174 |
+
"eval_loss": 0.008021777495741844,
|
175 |
+
"eval_precision": 0.918572735590119,
|
176 |
+
"eval_recall": 0.9356943150046598,
|
177 |
+
"eval_runtime": 13.487,
|
178 |
+
"eval_samples_per_second": 515.014,
|
179 |
+
"eval_steps_per_second": 64.432,
|
180 |
+
"step": 4536
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 9.920634920634921,
|
184 |
+
"grad_norm": 0.0005077613168396056,
|
185 |
+
"learning_rate": 3.9682539682539683e-07,
|
186 |
+
"loss": 0.0002,
|
187 |
+
"step": 5000
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"epoch": 10.0,
|
191 |
+
"eval_accuracy": 0.9986883598917199,
|
192 |
+
"eval_f1": 0.9278445883441258,
|
193 |
+
"eval_loss": 0.008253143168985844,
|
194 |
+
"eval_precision": 0.921028466483012,
|
195 |
+
"eval_recall": 0.934762348555452,
|
196 |
+
"eval_runtime": 13.7257,
|
197 |
+
"eval_samples_per_second": 506.057,
|
198 |
+
"eval_steps_per_second": 63.312,
|
199 |
+
"step": 5040
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"epoch": 10.0,
|
203 |
+
"step": 5040,
|
204 |
+
"total_flos": 1.394320679130096e+16,
|
205 |
+
"train_loss": 0.0030765269683407886,
|
206 |
+
"train_runtime": 1249.6681,
|
207 |
+
"train_samples_per_second": 257.924,
|
208 |
+
"train_steps_per_second": 4.033
|
209 |
+
}
|
210 |
+
],
|
211 |
+
"logging_steps": 500,
|
212 |
+
"max_steps": 5040,
|
213 |
+
"num_input_tokens_seen": 0,
|
214 |
+
"num_train_epochs": 10,
|
215 |
+
"save_steps": 500,
|
216 |
+
"stateful_callbacks": {
|
217 |
+
"TrainerControl": {
|
218 |
+
"args": {
|
219 |
+
"should_epoch_stop": false,
|
220 |
+
"should_evaluate": false,
|
221 |
+
"should_log": false,
|
222 |
+
"should_save": true,
|
223 |
+
"should_training_stop": true
|
224 |
+
},
|
225 |
+
"attributes": {}
|
226 |
+
}
|
227 |
+
},
|
228 |
+
"total_flos": 1.394320679130096e+16,
|
229 |
+
"train_batch_size": 32,
|
230 |
+
"trial_name": null,
|
231 |
+
"trial_params": null
|
232 |
+
}
|