Spaces:

adrianmoses
/

hate-speech-detection

Runtime error

App Files Files Community

adrianmoses commited on Nov 5, 2021

Commit

ef4cddb

1 Parent(s): 99dc8a3

this works haha

Browse files

Files changed (2) hide show

app.py +103 -2
requirements.txt +5 -0

app.py CHANGED Viewed

@@ -1,4 +1,105 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+import re
+import torch
+from transformers import AlbertTokenizer, AlbertModel
+import pytorch_lightning as pl
+from huggingface_hub import hf_hub_download
+def download_torch_model():
+    model_path = hf_hub_download(repo_id="adrianmoses/hate-speech-detection", filename="pytorch_hs_model.net")
+    print(model_path)
+    return model_path
+def load_model():
+    model = AlbertModel.from_pretrained("albert-base-v2")
+    return model
+def load_tokenizer():
+    tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
+    return tokenizer
+def clean_tweet(tweet):
+    return re.sub(r'@\w+:?', "", tweet, flags=re.IGNORECASE)
+def tokenize(tweet):
+    tweet = clean_tweet(tweet)
+    tokenizer = load_tokenizer()
+    return tokenizer(tweet, padding=True, truncation=True, max_length=64, return_tensors='pt')
+class HateSpeechClassifier(pl.LightningModule):
+    def __init__(self, albert_model, dropout, hidden_dim, output_dim):
+        super().__init__()
+        self.model = albert_model
+        self.l1 = torch.nn.Linear(hidden_dim, hidden_dim)
+        self.dropout = torch.nn.Dropout(dropout)
+        self.l2 = torch.nn.Linear(hidden_dim, output_dim)
+        self.loss = torch.nn.NLLLoss()
+    def forward(self, input_ids, attention_mask, token_type_ids):
+        x = self.model(input_ids,
+                               attention_mask=attention_mask,
+                               token_type_ids=token_type_ids)[0]
+        x = x[:, 0]
+        x = self.dropout(torch.relu(self.l1(x)))
+        return torch.log_softmax(self.l2(x), dim=1)
+    def training_step(self, batch, batch_idx):
+        input_ids, attention_masks, token_type_ids, y = batch
+        y_hat = self(input_ids, attention_masks, token_type_ids)
+        loss = self.loss(y_hat, y.view(-1))
+        return loss
+    def validation_step(self, batch, batch_idx):
+        input_ids, attention_masks, token_type_ids, y = batch
+        y_hat = self(input_ids, attention_masks, token_type_ids)
+        loss = self.loss(y_hat, y.view(-1))
+        return loss
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=1e-5)
+def setup_model():
+  torch_model_path = download_torch_model()
+  albert_model = load_model()
+  model = HateSpeechClassifier(albert_model, 0.5, 768, 2)
+  model.load_state_dict(torch.load(torch_model_path,  map_location=torch.device('cpu')))
+  model.eval()
+  return model
+model = setup_model()
+st.title("Hate Speech Detection")
+st.title("Text will be truncated to 64 tokens")
+text = st.text_input("Enter text")
+encoded_input = tokenize(text)
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+input_ids = encoded_input['input_ids']
+attention_mask = encoded_input['attention_mask']
+token_type_ids = encoded_input['token_type_ids']
+pred = model(input_ids, attention_mask, token_type_ids)
+print(pred)
+print(pred.data.max(1))
+label = pred.data.max(1)[1]
+print(label)
+is_hate_speech = "YES" if label == 1 else "NO"
+st.write(f"Is this hate speech?: {is_hate_speech}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers==4.12.3
+SentencePiece
+torch
+pytorch-lightning==1.5.0
+huggingface-hub