abdulmatinomotoso commited on
Commit
4d45cee
1 Parent(s): c39b6a4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #importing the necessary libraries
2
+ import gradio as gr
3
+ import numpy as np
4
+ import pandas as pd
5
+ import re
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+ import torch
8
+
9
+ #Defining the labels of the models
10
+ labels = ["Explicit", "Not_Explicit"]
11
+
12
+ #Defining the models and tokenuzer
13
+ model_name = 'valurank/finetuned-distilbert-explicit_content_detection'
14
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+
17
+ #Reading in the text file
18
+ def read_in_text(url):
19
+ with open(url, 'r') as file:
20
+ article = file.read()
21
+
22
+ return article
23
+
24
+ def clean_text(url):
25
+ text = url
26
+ text = text.encode("ascii", errors="ignore").decode(
27
+ "ascii"
28
+ ) # remove non-ascii, Chinese characters
29
+
30
+ text = re.sub(r"\n", " ", text)
31
+ text = re.sub(r"\n\n", " ", text)
32
+ text = re.sub(r"\t", " ", text)
33
+ text = text.strip(" ")
34
+ text = re.sub(
35
+ " +", " ", text
36
+ ).strip() # get rid of multiple spaces and replace with a single
37
+
38
+ text = re.sub(r'Date\s\d{1,2}\/\d{1,2}\/\d{4}', '', text) #remove date
39
+ text = re.sub(r'\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+', '', text) #remove time
40
+
41
+ return text
42
+
43
+ #Defining a function to get the category of the news article
44
+ def get_category(file):
45
+ text = clean_text(file)
46
+
47
+ input_tensor = tokenizer.encode(text, return_tensors='pt', truncation=True)
48
+ logits = model(input_tensor).logits
49
+
50
+ softmax = torch.nn.Softmax(dim=1)
51
+ probs = softmax(logits)[0]
52
+ probs = probs.cpu().detach().numpy()
53
+ max_index = np.argmax(probs)
54
+ emotion = labels[max_index]
55
+
56
+ return emotion
57
+
58
+ #Creating the interface for the radio app
59
+ demo = gr.Interface(get_category, inputs=gr.inputs.Textbox(label='Drop your articles here'),
60
+ outputs = 'text',
61
+ title='Explicit Content Detection')
62
+
63
+ #Launching the gradio app
64
+ if __name__ == '__main__':
65
+ demo.launch(debug=True)