maximuspowers commited on
Commit
bfb7e61
1 Parent(s): 90f989d

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +3 -10
pipeline.py CHANGED
@@ -1,11 +1,8 @@
1
- from typing import Dict, List
2
  import json
3
  import torch
4
- import numpy as np
5
  from transformers import BertTokenizerFast, BertForTokenClassification
6
 
7
- # this is so that we can use a custom pipeline (mostly parsing outputs) with the pipeline module
8
-
9
  class BiasNERPipeline:
10
  def __init__(self, model_path: str = 'maximuspowers/bias-detection-ner'):
11
  self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
@@ -13,7 +10,6 @@ class BiasNERPipeline:
13
  self.model.eval()
14
  self.model.to('cuda' if torch.cuda.is_available() else 'cpu')
15
 
16
- # label mapping
17
  self.id2label = {
18
  0: 'O',
19
  1: 'B-STEREO',
@@ -24,20 +20,17 @@ class BiasNERPipeline:
24
  6: 'I-UNFAIR'
25
  }
26
 
27
- def __call__(self, inputs: str) -> List[Dict[str, float]]:
28
- # tokenize
29
  tokenized_inputs = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True, max_length=128)
30
  input_ids = tokenized_inputs['input_ids'].to(self.model.device)
31
  attention_mask = tokenized_inputs['attention_mask'].to(self.model.device)
32
 
33
- # run model
34
  with torch.no_grad():
35
  outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
36
  logits = outputs.logits
37
  probabilities = torch.sigmoid(logits)
38
  predicted_labels = (probabilities > 0.5).int()
39
 
40
- # format output
41
  result = []
42
  tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
43
  for i, token in enumerate(tokens):
@@ -46,4 +39,4 @@ class BiasNERPipeline:
46
  labels = [self.id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
47
  result.append({"token": token, "labels": labels})
48
 
49
- return result
 
1
+ from typing import List, Dict
2
  import json
3
  import torch
 
4
  from transformers import BertTokenizerFast, BertForTokenClassification
5
 
 
 
6
  class BiasNERPipeline:
7
  def __init__(self, model_path: str = 'maximuspowers/bias-detection-ner'):
8
  self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 
10
  self.model.eval()
11
  self.model.to('cuda' if torch.cuda.is_available() else 'cpu')
12
 
 
13
  self.id2label = {
14
  0: 'O',
15
  1: 'B-STEREO',
 
20
  6: 'I-UNFAIR'
21
  }
22
 
23
+ def __call__(self, inputs: str) -> str:
 
24
  tokenized_inputs = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True, max_length=128)
25
  input_ids = tokenized_inputs['input_ids'].to(self.model.device)
26
  attention_mask = tokenized_inputs['attention_mask'].to(self.model.device)
27
 
 
28
  with torch.no_grad():
29
  outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
30
  logits = outputs.logits
31
  probabilities = torch.sigmoid(logits)
32
  predicted_labels = (probabilities > 0.5).int()
33
 
 
34
  result = []
35
  tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
36
  for i, token in enumerate(tokens):
 
39
  labels = [self.id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
40
  result.append({"token": token, "labels": labels})
41
 
42
+ return json.dumps(result, indent=4)