File size: 1,165 Bytes
2c330bd 84442d0 2c330bd 84442d0 2c330bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import gradio as gr
import nltk
from fincat_utils import extract_context_words
from fincat_utils import bert_embedding_extract
import pickle
lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
nltk.download('punkt')
def score_fincat(txt):
li = []
highlight = []
txt = " " + txt + " "
k = ''
for word in txt.split():
if any(char.isdigit() for char in word):
if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]:
k = word[-1]
word = word[:-1]
st = txt.find(" " + word + k + " ")+1
k = ''
ed = st + len(word)
x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed}
context_text = extract_context_words(x)
features = bert_embedding_extract(context_text, word)
if(features[0]=='None'):
continue
prediction = lr_clf.predict(features.reshape(1, 768))
prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4))
highlight.append((word, ' In-claim' if prediction==1 else 'Out-of-Claim'))
else:
continue
if(len(highlight)<1):
highlight.append((txt,'None'))
return highlight |