Spaces:

shubham5027
/

Text_Analysis_NLP

Sleeping

App Files Files Community

shubham5027 commited on Jun 8, 2024

Commit

e6e5910

verified ·

1 Parent(s): 36e8e91

Upload 9 files

Browse files

Files changed (9) hide show

Complete Text Analysis Home Page.jpg +0 -0
Hate Content Detection.csv +0 -0
Sarcasm Detection.csv +0 -0
Sentiment Analysis.csv +0 -0
Spam Detection.csv +0 -0
Stress Detection.csv +0 -0
app.py +198 -0
nltk.txt +2 -0
requirements.txt +5 -0

Complete Text Analysis Home Page.jpg ADDED Viewed

Hate Content Detection.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Sarcasm Detection.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Sentiment Analysis.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Spam Detection.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

Stress Detection.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+from tracemalloc import stop
+import streamlit as st
+import numpy as np
+import pandas as pd
+import re
+import string
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem.porter import PorterStemmer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestClassifier
+nltk.download('punkt')
+nltk.download('stopwords')
+sw=nltk.corpus.stopwords.words("english")
+rad=st.sidebar.radio("Navigation",["Home","Spam or Ham Detection","Sentiment Analysis","Stress Detection","Hate and Offensive Content Detection","Sarcasm Detection"])
+#Home Page
+if rad=="Home":
+    st.title("Complete Text Analysis App")
+    st.image("Complete Text Analysis Home Page.jpg")
+    st.text(" ")
+    st.text("The Following Text Analysis Options Are Available->")
+    st.text(" ")
+    st.text("1. Spam or Ham Detection")
+    st.text("2. Sentiment Analysis")
+    st.text("3. Stress Detection")
+    st.text("4. Hate and Offensive Content Detection")
+    st.text("5. Sarcasm Detection")
+#function to clean and transform the user input which is in raw format
+def transform_text(text):
+    text=text.lower()
+    text=nltk.word_tokenize(text)
+    y=[]
+    for i in text:
+        if i.isalnum():
+            y.append(i)
+    text=y[:]
+    y.clear()
+    for i in text:
+        if i not in stopwords.words('english') and i not in string.punctuation:
+            y.append(i)
+    text=y[:]
+    y.clear()
+    ps=PorterStemmer()
+    for i in text:
+        y.append(ps.stem(i))
+    return " ".join(y)
+#Spam Detection Prediction
+tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform1(txt1):
+    txt2=tfidf1.fit_transform(txt1)
+    return txt2.toarray()
+df1=pd.read_csv("Spam Detection.csv")
+df1.columns=["Label","Text"]
+x=transform1(df1["Text"])
+y=df1["Label"]
+x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
+model1=LogisticRegression()
+model1.fit(x_train1,y_train1)
+#Spam Detection Analysis Page
+if rad=="Spam or Ham Detection":
+    st.header("Detect Whether A Text Is Spam Or Ham??")
+    sent1=st.text_area("Enter The Text")
+    transformed_sent1=transform_text(sent1)
+    vector_sent1=tfidf1.transform([transformed_sent1])
+    prediction1=model1.predict(vector_sent1)[0]
+    if st.button("Predict"):
+        if prediction1=="spam":
+            st.warning("Spam Text!!")
+        elif prediction1=="ham":
+            st.success("Ham Text!!")
+#Sentiment Analysis Prediction
+tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform2(txt1):
+    txt2=tfidf2.fit_transform(txt1)
+    return txt2.toarray()
+df2=pd.read_csv("Sentiment Analysis.csv")
+df2.columns=["Text","Label"]
+x=transform2(df2["Text"])
+y=df2["Label"]
+x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
+model2=LogisticRegression()
+model2.fit(x_train2,y_train2)
+#Sentiment Analysis Page
+if rad=="Sentiment Analysis":
+    st.header("Detect The Sentiment Of The Text!!")
+    sent2=st.text_area("Enter The Text")
+    transformed_sent2=transform_text(sent2)
+    vector_sent2=tfidf2.transform([transformed_sent2])
+    prediction2=model2.predict(vector_sent2)[0]
+    if st.button("Predict"):
+        if prediction2==0:
+            st.warning("Negetive Text!!")
+        elif prediction2==1:
+            st.success("Positive Text!!")
+#Stress Detection Prediction
+tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform3(txt1):
+    txt2=tfidf3.fit_transform(txt1)
+    return txt2.toarray()
+df3=pd.read_csv("Stress Detection.csv")
+df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
+df3.columns=["Text","Sentiment","Stress Level"]
+x=transform3(df3["Text"])
+y=df3["Stress Level"].to_numpy()
+x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
+model3=DecisionTreeRegressor(max_leaf_nodes=2000)
+model3.fit(x_train3,y_train3)
+#Stress Detection Page
+if rad=="Stress Detection":
+    st.header("Detect The Amount Of Stress In The Text!!")
+    sent3=st.text_area("Enter The Text")
+    transformed_sent3=transform_text(sent3)
+    vector_sent3=tfidf3.transform([transformed_sent3])
+    prediction3=model3.predict(vector_sent3)[0]
+    if st.button("Predict"):
+        if prediction3>=0:
+            st.warning("Stressful Text!!")
+        elif prediction3<0:
+            st.success("Not A Stressful Text!!")
+#Hate & Offensive Content Prediction
+tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform4(txt1):
+    txt2=tfidf4.fit_transform(txt1)
+    return txt2.toarray()
+df4=pd.read_csv("Hate Content Detection.csv")
+df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
+df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
+x=transform4(df4["Text"])
+y=df4["Class Level"]
+x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
+model4=RandomForestClassifier()
+model4.fit(x_train4,y_train4)
+#Hate & Offensive Content Page
+if rad=="Hate and Offensive Content Detection":
+    st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
+    sent4=st.text_area("Enter The Text")
+    transformed_sent4=transform_text(sent4)
+    vector_sent4=tfidf4.transform([transformed_sent4])
+    prediction4=model4.predict(vector_sent4)[0]
+    if st.button("Predict"):
+        if prediction4==0:
+            st.exception("Highly Offensive Text!!")
+        elif prediction4==1:
+            st.warning("Offensive Text!!")
+        elif prediction4==2:
+            st.success("Non Offensive Text!!")
+#Sarcasm Detection Prediction
+tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform5(txt1):
+    txt2=tfidf5.fit_transform(txt1)
+    return txt2.toarray()
+df5=pd.read_csv("Sarcasm Detection.csv")
+df5.columns=["Text","Label"]
+x=transform5(df5["Text"])
+y=df5["Label"]
+x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
+model5=LogisticRegression()
+model5.fit(x_train5,y_train5)
+#Sarcasm Detection Page
+if rad=="Sarcasm Detection":
+    st.header("Detect Whether The Text Is Sarcastic Or Not!!")
+    sent5=st.text_area("Enter The Text")
+    transformed_sent5=transform_text(sent5)
+    vector_sent5=tfidf5.transform([transformed_sent5])
+    prediction5=model5.predict(vector_sent5)[0]
+    if st.button("Predict"):
+        if prediction5==1:
+            st.exception("Sarcastic Text!!")
+        elif prediction5==0:
+            st.success("Non Sarcastic Text!!")

nltk.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ stopwords
2	+ punkt

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+nltk==3.6.3
+numpy==1.21.1
+pandas==1.3.0
+scikit_learn==1.0.2
+streamlit==1.3.0