Spaces:

vonewman
/

ner_app

Runtime error

vonewman commited on Oct 28, 2023

Commit

fe3337c

1 Parent(s): f5b6e30

add align_word_ids

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,6 +26,35 @@ def load_model():
     tokenizer = AutoTokenizer.from_pretrained("vonewman/wolof-finetuned-ner")
     return trainer, model, tokenizer
 def predict_ner_labels(model, tokenizer, sentence):
     use_cuda = torch.cuda.is_available()
     device = torch.device("cuda" if use_cuda else "cpu")

     tokenizer = AutoTokenizer.from_pretrained("vonewman/wolof-finetuned-ner")
     return trainer, model, tokenizer
+def align_word_ids(texts):
+    tokenized_inputs = tokenizer(texts, padding='max_length', max_length=218, truncation=True)
+    word_ids = tokenized_inputs.word_ids()
+    previous_word_idx = None
+    label_ids = []
+    for word_idx in word_ids:
+        if word_idx is None:
+            label_ids.append(-100)
+        elif word_idx != previous_word_idx:
+            try:
+                label_ids.append(1)
+            except:
+                label_ids.append(-100)
+        else:
+            try:
+                label_ids.append(1 if label_all_tokens else -100)
+            except:
+                label_ids.append(-100)
+        previous_word_idx = word_idx
+    return label_ids
 def predict_ner_labels(model, tokenizer, sentence):
     use_cuda = torch.cuda.is_available()
     device = torch.device("cuda" if use_cuda else "cpu")