sgonzalezsilot
commited on
Commit
•
65d7807
1
Parent(s):
312338f
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import from_pretrained_keras
|
3 |
+
m = from_pretrained_keras('sgonzalezsilot/FakeNews-Detection-Twitter-Thesis')
|
4 |
+
|
5 |
+
def bert_encode(tokenizer,data,maximum_length) :
|
6 |
+
input_ids = []
|
7 |
+
attention_masks = []
|
8 |
+
|
9 |
+
|
10 |
+
for i in range(len(data)):
|
11 |
+
encoded = tokenizer.encode_plus(
|
12 |
+
|
13 |
+
data[i],
|
14 |
+
add_special_tokens=True,
|
15 |
+
max_length=maximum_length,
|
16 |
+
pad_to_max_length=True,
|
17 |
+
truncation = True,
|
18 |
+
return_attention_mask=True,
|
19 |
+
)
|
20 |
+
|
21 |
+
input_ids.append(encoded['input_ids'])
|
22 |
+
attention_masks.append(encoded['attention_mask'])
|
23 |
+
|
24 |
+
return np.array(input_ids),np.array(attention_masks)
|
25 |
+
|
26 |
+
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
|
27 |
+
test_encodings = tokenizer(test_texts, truncation=True, padding=True)
|
28 |
+
|
29 |
+
MODEL = "digitalepidemiologylab/covid-twitter-bert-v2"
|
30 |
+
tokenizer, roberta_model = getTokenizerAndModel(MODEL, model_normalization=False, from_pt = False, regularization=False)
|
31 |
+
|
32 |
+
sentence_length = 110
|
33 |
+
train_input_ids,train_attention_masks = bert_encode(tokenizer,train_texts,sentence_length)
|
34 |
+
test_input_ids,test_attention_masks = bert_encode(tokenizer,test_texts,sentence_length)
|
35 |
+
|
36 |
+
def get_news(input_text):
|
37 |
+
return sentiment(input_text)
|
38 |
+
|
39 |
+
iface = gr.Interface(fn = get_news,
|
40 |
+
inputs = "text",
|
41 |
+
outputs = ['text'],
|
42 |
+
title = 'Fake News',
|
43 |
+
description="")
|
44 |
+
|
45 |
+
iface.launch(inline = False)
|