Shaltiel commited on
Commit
87e6b6c
โ€ข
1 Parent(s): 4e0cd2d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -16
README.md CHANGED
@@ -16,7 +16,11 @@ Sample usage:
16
  ```python
17
  from transformers import pipeline
18
 
19
- oracle = pipeline('ner', model='dicta-il/dictabert-ner')
 
 
 
 
20
 
21
  sentence = 'ื”ื›ื™ ื“ืจืžื˜ื™ ืฉื™ืฉ: ืฉืขืจ ืฉืœ ืกื“ืจื™ืง ื”ืžื—ืœื™ืฃ ื”ืขื ื™ืง ืœื–ื™ื• ืืจื™ื” ื ื™ืฆื—ื•ืŸ ืฉื ื™ ื‘ืฉืœื•ืฉื” ืžืฉื—ืงื™ื ื•ืขืœื™ื™ื” ืžืขืœ ื”ืงื• ื”ืื“ื•ื.'
22
  oracle(sentence)
@@ -26,28 +30,25 @@ Output:
26
  ```json
27
  [
28
  {
29
- "entity": "B-PER",
30
- "score": "0.99827814",
31
- "index": 7,
32
  "word": "ืกื“ืจื™ืง",
33
- "start": 22,
34
- "end": 27
35
  },
36
  {
37
- "entity": "B-PER",
38
- "score": "0.9995943",
39
- "index": 10,
40
  "word": "ืœื–ื™",
41
- "start": 41,
42
- "end": 44
43
  },
44
  {
45
- "entity": "I-PER",
46
- "score": "0.9995154",
47
- "index": 12,
48
  "word": "ืืจื™ื”",
49
- "start": 46,
50
- "end": 50
51
  }
52
  ]
53
  ```
 
16
  ```python
17
  from transformers import pipeline
18
 
19
+ oracle = pipeline('ner', model='dicta-il/dictabert-ner', aggregation_strategy='simple')
20
+
21
+ # if we set aggregation_strategy to simple, we need to define a decoder for the tokenizer. Note that the last wordpiece of a group will still be emitted
22
+ from tokenizers.decoders import WordPiece
23
+ oracle.tokenizer.backend_tokenizer.decoder = WordPiece()
24
 
25
  sentence = 'ื”ื›ื™ ื“ืจืžื˜ื™ ืฉื™ืฉ: ืฉืขืจ ืฉืœ ืกื“ืจื™ืง ื”ืžื—ืœื™ืฃ ื”ืขื ื™ืง ืœื–ื™ื• ืืจื™ื” ื ื™ืฆื—ื•ืŸ ืฉื ื™ ื‘ืฉืœื•ืฉื” ืžืฉื—ืงื™ื ื•ืขืœื™ื™ื” ืžืขืœ ื”ืงื• ื”ืื“ื•ื.'
26
  oracle(sentence)
 
30
  ```json
31
  [
32
  {
33
+ "entity_group": "PER",
34
+ "score": "0.99978834",
 
35
  "word": "ืกื“ืจื™ืง",
36
+ "start": "22",
37
+ "end": "27"
38
  },
39
  {
40
+ "entity_group": "PER",
41
+ "score": "0.99994457",
 
42
  "word": "ืœื–ื™",
43
+ "start": "41",
44
+ "end": "44"
45
  },
46
  {
47
+ "entity_group": "PER",
48
+ "score": "0.99993944",
 
49
  "word": "ืืจื™ื”",
50
+ "start": "46",
51
+ "end": "50"
52
  }
53
  ]
54
  ```