maximuspowers
commited on
Commit
•
bfb7e61
1
Parent(s):
90f989d
Update pipeline.py
Browse files- pipeline.py +3 -10
pipeline.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
-
from typing import
|
2 |
import json
|
3 |
import torch
|
4 |
-
import numpy as np
|
5 |
from transformers import BertTokenizerFast, BertForTokenClassification
|
6 |
|
7 |
-
# this is so that we can use a custom pipeline (mostly parsing outputs) with the pipeline module
|
8 |
-
|
9 |
class BiasNERPipeline:
|
10 |
def __init__(self, model_path: str = 'maximuspowers/bias-detection-ner'):
|
11 |
self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
|
@@ -13,7 +10,6 @@ class BiasNERPipeline:
|
|
13 |
self.model.eval()
|
14 |
self.model.to('cuda' if torch.cuda.is_available() else 'cpu')
|
15 |
|
16 |
-
# label mapping
|
17 |
self.id2label = {
|
18 |
0: 'O',
|
19 |
1: 'B-STEREO',
|
@@ -24,20 +20,17 @@ class BiasNERPipeline:
|
|
24 |
6: 'I-UNFAIR'
|
25 |
}
|
26 |
|
27 |
-
def __call__(self, inputs: str) ->
|
28 |
-
# tokenize
|
29 |
tokenized_inputs = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
30 |
input_ids = tokenized_inputs['input_ids'].to(self.model.device)
|
31 |
attention_mask = tokenized_inputs['attention_mask'].to(self.model.device)
|
32 |
|
33 |
-
# run model
|
34 |
with torch.no_grad():
|
35 |
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
|
36 |
logits = outputs.logits
|
37 |
probabilities = torch.sigmoid(logits)
|
38 |
predicted_labels = (probabilities > 0.5).int()
|
39 |
|
40 |
-
# format output
|
41 |
result = []
|
42 |
tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
|
43 |
for i, token in enumerate(tokens):
|
@@ -46,4 +39,4 @@ class BiasNERPipeline:
|
|
46 |
labels = [self.id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
|
47 |
result.append({"token": token, "labels": labels})
|
48 |
|
49 |
-
return result
|
|
|
1 |
+
from typing import List, Dict
|
2 |
import json
|
3 |
import torch
|
|
|
4 |
from transformers import BertTokenizerFast, BertForTokenClassification
|
5 |
|
|
|
|
|
6 |
class BiasNERPipeline:
|
7 |
def __init__(self, model_path: str = 'maximuspowers/bias-detection-ner'):
|
8 |
self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
|
|
|
10 |
self.model.eval()
|
11 |
self.model.to('cuda' if torch.cuda.is_available() else 'cpu')
|
12 |
|
|
|
13 |
self.id2label = {
|
14 |
0: 'O',
|
15 |
1: 'B-STEREO',
|
|
|
20 |
6: 'I-UNFAIR'
|
21 |
}
|
22 |
|
23 |
+
def __call__(self, inputs: str) -> str:
|
|
|
24 |
tokenized_inputs = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
25 |
input_ids = tokenized_inputs['input_ids'].to(self.model.device)
|
26 |
attention_mask = tokenized_inputs['attention_mask'].to(self.model.device)
|
27 |
|
|
|
28 |
with torch.no_grad():
|
29 |
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
|
30 |
logits = outputs.logits
|
31 |
probabilities = torch.sigmoid(logits)
|
32 |
predicted_labels = (probabilities > 0.5).int()
|
33 |
|
|
|
34 |
result = []
|
35 |
tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
|
36 |
for i, token in enumerate(tokens):
|
|
|
39 |
labels = [self.id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
|
40 |
result.append({"token": token, "labels": labels})
|
41 |
|
42 |
+
return json.dumps(result, indent=4)
|