maximuspowers
commited on
Commit
•
248b1f9
1
Parent(s):
0e51559
Update README.md
Browse files
README.md
CHANGED
@@ -20,4 +20,65 @@ co2_eq_emissions:
|
|
20 |
training_type: "fine-tuning"
|
21 |
geographical_location: "Phoenix, AZ"
|
22 |
hardware_used: "T4"
|
23 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
training_type: "fine-tuning"
|
21 |
geographical_location: "Phoenix, AZ"
|
22 |
hardware_used: "T4"
|
23 |
+
---
|
24 |
+
|
25 |
+
# Social Bias NER
|
26 |
+
|
27 |
+
This NER model is fine-tuned from BERT, for *multi-label* token classification of:
|
28 |
+
|
29 |
+
- (GEN)eralizations
|
30 |
+
- (UNFAIR)ness
|
31 |
+
- (STEREO)types
|
32 |
+
|
33 |
+
You can [try it out in spaces](https://huggingface.co/spaces/maximuspowers/bias-detection-ner) :).
|
34 |
+
|
35 |
+
## How to Get Started with the Model
|
36 |
+
|
37 |
+
Transformers pipeline doesn't have a class for multi-label token classification, but you can use this code to load the model, and run it, and format the output.
|
38 |
+
|
39 |
+
```
|
40 |
+
import json
|
41 |
+
import torch
|
42 |
+
from transformers import BertTokenizerFast, BertForTokenClassification
|
43 |
+
import gradio as gr
|
44 |
+
|
45 |
+
# init important things
|
46 |
+
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
|
47 |
+
model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
|
48 |
+
model.eval()
|
49 |
+
model.to('cuda' if torch.cuda.is_available() else 'cpu')
|
50 |
+
|
51 |
+
# ids to labels we want to display
|
52 |
+
id2label = {
|
53 |
+
0: 'O',
|
54 |
+
1: 'B-STEREO',
|
55 |
+
2: 'I-STEREO',
|
56 |
+
3: 'B-GEN',
|
57 |
+
4: 'I-GEN',
|
58 |
+
5: 'B-UNFAIR',
|
59 |
+
6: 'I-UNFAIR'
|
60 |
+
}
|
61 |
+
|
62 |
+
# predict function you'll want to use if using in your own code
|
63 |
+
def predict_ner_tags(sentence):
|
64 |
+
inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
65 |
+
input_ids = inputs['input_ids'].to(model.device)
|
66 |
+
attention_mask = inputs['attention_mask'].to(model.device)
|
67 |
+
|
68 |
+
with torch.no_grad():
|
69 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
70 |
+
logits = outputs.logits
|
71 |
+
probabilities = torch.sigmoid(logits)
|
72 |
+
predicted_labels = (probabilities > 0.5).int() # remember to try your own threshold
|
73 |
+
|
74 |
+
result = []
|
75 |
+
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
|
76 |
+
for i, token in enumerate(tokens):
|
77 |
+
if token not in tokenizer.all_special_tokens:
|
78 |
+
label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
|
79 |
+
labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
|
80 |
+
result.append({"token": token, "labels": labels})
|
81 |
+
|
82 |
+
return json.dumps(result, indent=4)
|
83 |
+
```
|
84 |
+
|