from tracemalloc import stop import streamlit as st import numpy as np import pandas as pd import re import string import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem.porter import PorterStemmer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestClassifier nltk.download('punkt') nltk.download('stopwords') sw=nltk.corpus.stopwords.words("english") rad=st.sidebar.radio("Navigation",["Home","Spam or Ham Detection","Sentiment Analysis","Stress Detection","Hate and Offensive Content Detection","Sarcasm Detection"]) #Home Page if rad=="Home": st.title("Complete Text Analysis App") st.image("Complete Text Analysis Home Page.jpg") st.text(" ") st.text("The Following Text Analysis Options Are Available->") st.text(" ") st.text("1. Spam or Ham Detection") st.text("2. Sentiment Analysis") st.text("3. Stress Detection") st.text("4. Hate and Offensive Content Detection") st.text("5. Sarcasm Detection") #function to clean and transform the user input which is in raw format def transform_text(text): text=text.lower() text=nltk.word_tokenize(text) y=[] for i in text: if i.isalnum(): y.append(i) text=y[:] y.clear() for i in text: if i not in stopwords.words('english') and i not in string.punctuation: y.append(i) text=y[:] y.clear() ps=PorterStemmer() for i in text: y.append(ps.stem(i)) return " ".join(y) #Spam Detection Prediction tfidf1=TfidfVectorizer(stop_words=sw,max_features=20) def transform1(txt1): txt2=tfidf1.fit_transform(txt1) return txt2.toarray() df1=pd.read_csv("Spam Detection.csv") df1.columns=["Label","Text"] x=transform1(df1["Text"]) y=df1["Label"] x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0) model1=LogisticRegression() model1.fit(x_train1,y_train1) #Spam Detection Analysis Page if rad=="Spam or Ham Detection": st.header("Detect Whether A Text Is Spam Or Ham??") sent1=st.text_area("Enter The Text") transformed_sent1=transform_text(sent1) vector_sent1=tfidf1.transform([transformed_sent1]) prediction1=model1.predict(vector_sent1)[0] if st.button("Predict"): if prediction1=="spam": st.warning("Spam Text!!") elif prediction1=="ham": st.success("Ham Text!!") #Sentiment Analysis Prediction tfidf2=TfidfVectorizer(stop_words=sw,max_features=20) def transform2(txt1): txt2=tfidf2.fit_transform(txt1) return txt2.toarray() df2=pd.read_csv("Sentiment Analysis.csv") df2.columns=["Text","Label"] x=transform2(df2["Text"]) y=df2["Label"] x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0) model2=LogisticRegression() model2.fit(x_train2,y_train2) #Sentiment Analysis Page if rad=="Sentiment Analysis": st.header("Detect The Sentiment Of The Text!!") sent2=st.text_area("Enter The Text") transformed_sent2=transform_text(sent2) vector_sent2=tfidf2.transform([transformed_sent2]) prediction2=model2.predict(vector_sent2)[0] if st.button("Predict"): if prediction2==0: st.warning("Negetive Text!!") elif prediction2==1: st.success("Positive Text!!") #Stress Detection Prediction tfidf3=TfidfVectorizer(stop_words=sw,max_features=20) def transform3(txt1): txt2=tfidf3.fit_transform(txt1) return txt2.toarray() df3=pd.read_csv("Stress Detection.csv") df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1) df3.columns=["Text","Sentiment","Stress Level"] x=transform3(df3["Text"]) y=df3["Stress Level"].to_numpy() x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0) model3=DecisionTreeRegressor(max_leaf_nodes=2000) model3.fit(x_train3,y_train3) #Stress Detection Page if rad=="Stress Detection": st.header("Detect The Amount Of Stress In The Text!!") sent3=st.text_area("Enter The Text") transformed_sent3=transform_text(sent3) vector_sent3=tfidf3.transform([transformed_sent3]) prediction3=model3.predict(vector_sent3)[0] if st.button("Predict"): if prediction3>=0: st.warning("Stressful Text!!") elif prediction3<0: st.success("Not A Stressful Text!!") #Hate & Offensive Content Prediction tfidf4=TfidfVectorizer(stop_words=sw,max_features=20) def transform4(txt1): txt2=tfidf4.fit_transform(txt1) return txt2.toarray() df4=pd.read_csv("Hate Content Detection.csv") df4=df4.drop(["Unnamed: 0","count","neither"],axis=1) df4.columns=["Hate Level","Offensive Level","Class Level","Text"] x=transform4(df4["Text"]) y=df4["Class Level"] x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0) model4=RandomForestClassifier() model4.fit(x_train4,y_train4) #Hate & Offensive Content Page if rad=="Hate and Offensive Content Detection": st.header("Detect The Level Of Hate & Offensive Content In The Text!!") sent4=st.text_area("Enter The Text") transformed_sent4=transform_text(sent4) vector_sent4=tfidf4.transform([transformed_sent4]) prediction4=model4.predict(vector_sent4)[0] if st.button("Predict"): if prediction4==0: st.exception("Highly Offensive Text!!") elif prediction4==1: st.warning("Offensive Text!!") elif prediction4==2: st.success("Non Offensive Text!!") #Sarcasm Detection Prediction tfidf5=TfidfVectorizer(stop_words=sw,max_features=20) def transform5(txt1): txt2=tfidf5.fit_transform(txt1) return txt2.toarray() df5=pd.read_csv("Sarcasm Detection.csv") df5.columns=["Text","Label"] x=transform5(df5["Text"]) y=df5["Label"] x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0) model5=LogisticRegression() model5.fit(x_train5,y_train5) #Sarcasm Detection Page if rad=="Sarcasm Detection": st.header("Detect Whether The Text Is Sarcastic Or Not!!") sent5=st.text_area("Enter The Text") transformed_sent5=transform_text(sent5) vector_sent5=tfidf5.transform([transformed_sent5]) prediction5=model5.predict(vector_sent5)[0] if st.button("Predict"): if prediction5==1: st.exception("Sarcastic Text!!") elif prediction5==0: st.success("Non Sarcastic Text!!")