Spaces:
Sleeping
Sleeping
File size: 1,352 Bytes
ee32637 869b44c cb09d55 ee32637 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import streamlit as st
import joblib
import pandas as pd
import string
import re
import nltk
nltk.download('stopwords')
from sklearn.feature_extraction.text import TfidfVectorizer
model = joblib.load("randomforestmodel.pkl")
data = pd.read_csv("text.csv")
ps = nltk.PorterStemmer()
stopwords = nltk.corpus.stopwords.words('english')
def clean_text(text):
text = "".join([word.lower()
for word in text if word not in string.punctuation])
tokens = re.split('\W+', text)
text = [ps.stem(word) for word in tokens if word not in stopwords]
return text
vectoriz = TfidfVectorizer(analyzer=clean_text)
vectorizer = vectoriz.fit(data["text"])
def count_punct(text):
count = sum([1 for char in text if char in string.punctuation])
return round(count/(len(text) - text.count(" ")), 3)*100
st.title("Sentiment analysis classification")
text = st.text_input("Type the text here")
if st.button("Predict"):
#text = str(text)
trans = vectorizer.transform([text])
body_len = len(text) - text.count(" ")
punct = count_punct(text)
k = {"body_len": [body_len], "punc%": [punct]}
df = pd.DataFrame(k)
test_vect = pd.concat([df.reset_index(drop=True),
pd.DataFrame(trans.toarray())], axis=1)
prediction = model.predict(test_vect)
st.write(prediction[0])
|