dicta-il
/

dictabert-ner

Token Classification

Inference Endpoints

Model card Files Files and versions Community

Shaltiel commited on Dec 28, 2023

Commit

87e6b6c

•

1 Parent(s): 4e0cd2d

Update README.md

Files changed (1) hide show

README.md +17 -16

README.md CHANGED Viewed

@@ -16,7 +16,11 @@ Sample usage:
 ```python
 from transformers import pipeline
-oracle = pipeline('ner', model='dicta-il/dictabert-ner')
 sentence = 'הכי דרמטי שיש: שער של סדריק המחליף העניק לזיו אריה ניצחון שני בשלושה משחקים ועלייה מעל הקו האדום.'
 oracle(sentence)
@@ -26,28 +30,25 @@ Output:
 ```json
 [
   {
-    "entity": "B-PER",
-    "score": "0.99827814",
-    "index": 7,
     "word": "סדריק",
-    "start": 22,
-    "end": 27
   },
   {
-    "entity": "B-PER",
-    "score": "0.9995943",
-    "index": 10,
     "word": "לזי",
-    "start": 41,
-    "end": 44
   },
   {
-    "entity": "I-PER",
-    "score": "0.9995154",
-    "index": 12,
     "word": "אריה",
-    "start": 46,
-    "end": 50
   }
 ]
 ```

 ```python
 from transformers import pipeline
+oracle = pipeline('ner', model='dicta-il/dictabert-ner', aggregation_strategy='simple')
+# if we set aggregation_strategy to simple, we need to define a decoder for the tokenizer. Note that the last wordpiece of a group will still be emitted
+from tokenizers.decoders import WordPiece
+oracle.tokenizer.backend_tokenizer.decoder = WordPiece()
 sentence = 'הכי דרמטי שיש: שער של סדריק המחליף העניק לזיו אריה ניצחון שני בשלושה משחקים ועלייה מעל הקו האדום.'
 oracle(sentence)
 ```json
 [
   {
+    "entity_group": "PER",
+    "score": "0.99978834",
     "word": "סדריק",
+    "start": "22",
+    "end": "27"
   },
   {
+    "entity_group": "PER",
+    "score": "0.99994457",
     "word": "לזי",
+    "start": "41",
+    "end": "44"
   },
   {
+    "entity_group": "PER",
+    "score": "0.99993944",
     "word": "אריה",
+    "start": "46",
+    "end": "50"
   }
 ]
 ```