add align_word_ids
Browse files
app.py
CHANGED
@@ -26,6 +26,35 @@ def load_model():
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained("vonewman/wolof-finetuned-ner")
|
27 |
return trainer, model, tokenizer
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def predict_ner_labels(model, tokenizer, sentence):
|
30 |
use_cuda = torch.cuda.is_available()
|
31 |
device = torch.device("cuda" if use_cuda else "cpu")
|
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained("vonewman/wolof-finetuned-ner")
|
27 |
return trainer, model, tokenizer
|
28 |
|
29 |
+
def align_word_ids(texts):
|
30 |
+
|
31 |
+
tokenized_inputs = tokenizer(texts, padding='max_length', max_length=218, truncation=True)
|
32 |
+
|
33 |
+
word_ids = tokenized_inputs.word_ids()
|
34 |
+
|
35 |
+
previous_word_idx = None
|
36 |
+
label_ids = []
|
37 |
+
|
38 |
+
for word_idx in word_ids:
|
39 |
+
|
40 |
+
if word_idx is None:
|
41 |
+
label_ids.append(-100)
|
42 |
+
|
43 |
+
elif word_idx != previous_word_idx:
|
44 |
+
try:
|
45 |
+
label_ids.append(1)
|
46 |
+
except:
|
47 |
+
label_ids.append(-100)
|
48 |
+
else:
|
49 |
+
try:
|
50 |
+
label_ids.append(1 if label_all_tokens else -100)
|
51 |
+
except:
|
52 |
+
label_ids.append(-100)
|
53 |
+
previous_word_idx = word_idx
|
54 |
+
|
55 |
+
return label_ids
|
56 |
+
|
57 |
+
|
58 |
def predict_ner_labels(model, tokenizer, sentence):
|
59 |
use_cuda = torch.cuda.is_available()
|
60 |
device = torch.device("cuda" if use_cuda else "cpu")
|