Update README.md
Browse files
README.md
CHANGED
@@ -16,7 +16,11 @@ Sample usage:
|
|
16 |
```python
|
17 |
from transformers import pipeline
|
18 |
|
19 |
-
oracle = pipeline('ner', model='dicta-il/dictabert-ner')
|
|
|
|
|
|
|
|
|
20 |
|
21 |
sentence = 'ืืื ืืจืืื ืฉืืฉ: ืฉืขืจ ืฉื ืกืืจืืง ืืืืืืฃ ืืขื ืืง ืืืื ืืจืื ื ืืฆืืื ืฉื ื ืืฉืืืฉื ืืฉืืงืื ืืขืืืื ืืขื ืืงื ืืืืื.'
|
22 |
oracle(sentence)
|
@@ -26,28 +30,25 @@ Output:
|
|
26 |
```json
|
27 |
[
|
28 |
{
|
29 |
-
"
|
30 |
-
"score": "0.
|
31 |
-
"index": 7,
|
32 |
"word": "ืกืืจืืง",
|
33 |
-
"start": 22,
|
34 |
-
"end": 27
|
35 |
},
|
36 |
{
|
37 |
-
"
|
38 |
-
"score": "0.
|
39 |
-
"index": 10,
|
40 |
"word": "ืืื",
|
41 |
-
"start": 41,
|
42 |
-
"end": 44
|
43 |
},
|
44 |
{
|
45 |
-
"
|
46 |
-
"score": "0.
|
47 |
-
"index": 12,
|
48 |
"word": "ืืจืื",
|
49 |
-
"start": 46,
|
50 |
-
"end": 50
|
51 |
}
|
52 |
]
|
53 |
```
|
|
|
16 |
```python
|
17 |
from transformers import pipeline
|
18 |
|
19 |
+
oracle = pipeline('ner', model='dicta-il/dictabert-ner', aggregation_strategy='simple')
|
20 |
+
|
21 |
+
# if we set aggregation_strategy to simple, we need to define a decoder for the tokenizer. Note that the last wordpiece of a group will still be emitted
|
22 |
+
from tokenizers.decoders import WordPiece
|
23 |
+
oracle.tokenizer.backend_tokenizer.decoder = WordPiece()
|
24 |
|
25 |
sentence = 'ืืื ืืจืืื ืฉืืฉ: ืฉืขืจ ืฉื ืกืืจืืง ืืืืืืฃ ืืขื ืืง ืืืื ืืจืื ื ืืฆืืื ืฉื ื ืืฉืืืฉื ืืฉืืงืื ืืขืืืื ืืขื ืืงื ืืืืื.'
|
26 |
oracle(sentence)
|
|
|
30 |
```json
|
31 |
[
|
32 |
{
|
33 |
+
"entity_group": "PER",
|
34 |
+
"score": "0.99978834",
|
|
|
35 |
"word": "ืกืืจืืง",
|
36 |
+
"start": "22",
|
37 |
+
"end": "27"
|
38 |
},
|
39 |
{
|
40 |
+
"entity_group": "PER",
|
41 |
+
"score": "0.99994457",
|
|
|
42 |
"word": "ืืื",
|
43 |
+
"start": "41",
|
44 |
+
"end": "44"
|
45 |
},
|
46 |
{
|
47 |
+
"entity_group": "PER",
|
48 |
+
"score": "0.99993944",
|
|
|
49 |
"word": "ืืจืื",
|
50 |
+
"start": "46",
|
51 |
+
"end": "50"
|
52 |
}
|
53 |
]
|
54 |
```
|