Spaces:

shubham5027
/

Text_Analysis_NLP

Sleeping

App Files Files Community

shubham5027 commited on Jun 8, 2024

Commit

029d4c7

verified ·

1 Parent(s): 5df254f

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -198

app.py CHANGED Viewed

@@ -1,198 +1,198 @@
-from tracemalloc import stop
-import streamlit as st
-import numpy as np
-import pandas as pd
-import re
-import string
-import nltk
-from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
-from nltk.stem.porter import PorterStemmer
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LogisticRegression
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.ensemble import RandomForestClassifier
-nltk.download('punkt')
-nltk.download('stopwords')
-sw=nltk.corpus.stopwords.words("english")
-rad=st.sidebar.radio("Navigation",["Home","Spam or Ham Detection","Sentiment Analysis","Stress Detection","Hate and Offensive Content Detection","Sarcasm Detection"])
-#Home Page
-if rad=="Home":
-    st.title("Complete Text Analysis App")
-    st.image("Complete Text Analysis Home Page.jpg")
-    st.text(" ")
-    st.text("The Following Text Analysis Options Are Available->")
-    st.text(" ")
-    st.text("1. Spam or Ham Detection")
-    st.text("2. Sentiment Analysis")
-    st.text("3. Stress Detection")
-    st.text("4. Hate and Offensive Content Detection")
-    st.text("5. Sarcasm Detection")
-#function to clean and transform the user input which is in raw format
-def transform_text(text):
-    text=text.lower()
-    text=nltk.word_tokenize(text)
-    y=[]
-    for i in text:
-        if i.isalnum():
-            y.append(i)
-    text=y[:]
-    y.clear()
-    for i in text:
-        if i not in stopwords.words('english') and i not in string.punctuation:
-            y.append(i)
-    text=y[:]
-    y.clear()
-    ps=PorterStemmer()
-    for i in text:
-        y.append(ps.stem(i))
-    return " ".join(y)
-#Spam Detection Prediction
-tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
-def transform1(txt1):
-    txt2=tfidf1.fit_transform(txt1)
-    return txt2.toarray()
-df1=pd.read_csv("Spam Detection.csv")
-df1.columns=["Label","Text"]
-x=transform1(df1["Text"])
-y=df1["Label"]
-x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
-model1=LogisticRegression()
-model1.fit(x_train1,y_train1)
-#Spam Detection Analysis Page
-if rad=="Spam or Ham Detection":
-    st.header("Detect Whether A Text Is Spam Or Ham??")
-    sent1=st.text_area("Enter The Text")
-    transformed_sent1=transform_text(sent1)
-    vector_sent1=tfidf1.transform([transformed_sent1])
-    prediction1=model1.predict(vector_sent1)[0]
-    if st.button("Predict"):
-        if prediction1=="spam":
-            st.warning("Spam Text!!")
-        elif prediction1=="ham":
-            st.success("Ham Text!!")
-#Sentiment Analysis Prediction
-tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
-def transform2(txt1):
-    txt2=tfidf2.fit_transform(txt1)
-    return txt2.toarray()
-df2=pd.read_csv("Sentiment Analysis.csv")
-df2.columns=["Text","Label"]
-x=transform2(df2["Text"])
-y=df2["Label"]
-x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
-model2=LogisticRegression()
-model2.fit(x_train2,y_train2)
-#Sentiment Analysis Page
-if rad=="Sentiment Analysis":
-    st.header("Detect The Sentiment Of The Text!!")
-    sent2=st.text_area("Enter The Text")
-    transformed_sent2=transform_text(sent2)
-    vector_sent2=tfidf2.transform([transformed_sent2])
-    prediction2=model2.predict(vector_sent2)[0]
-    if st.button("Predict"):
-        if prediction2==0:
-            st.warning("Negetive Text!!")
-        elif prediction2==1:
-            st.success("Positive Text!!")
-#Stress Detection Prediction
-tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
-def transform3(txt1):
-    txt2=tfidf3.fit_transform(txt1)
-    return txt2.toarray()
-df3=pd.read_csv("Stress Detection.csv")
-df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
-df3.columns=["Text","Sentiment","Stress Level"]
-x=transform3(df3["Text"])
-y=df3["Stress Level"].to_numpy()
-x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
-model3=DecisionTreeRegressor(max_leaf_nodes=2000)
-model3.fit(x_train3,y_train3)
-#Stress Detection Page
-if rad=="Stress Detection":
-    st.header("Detect The Amount Of Stress In The Text!!")
-    sent3=st.text_area("Enter The Text")
-    transformed_sent3=transform_text(sent3)
-    vector_sent3=tfidf3.transform([transformed_sent3])
-    prediction3=model3.predict(vector_sent3)[0]
-    if st.button("Predict"):
-        if prediction3>=0:
-            st.warning("Stressful Text!!")
-        elif prediction3<0:
-            st.success("Not A Stressful Text!!")
-#Hate & Offensive Content Prediction
-tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
-def transform4(txt1):
-    txt2=tfidf4.fit_transform(txt1)
-    return txt2.toarray()
-df4=pd.read_csv("Hate Content Detection.csv")
-df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
-df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
-x=transform4(df4["Text"])
-y=df4["Class Level"]
-x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
-model4=RandomForestClassifier()
-model4.fit(x_train4,y_train4)
-#Hate & Offensive Content Page
-if rad=="Hate and Offensive Content Detection":
-    st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
-    sent4=st.text_area("Enter The Text")
-    transformed_sent4=transform_text(sent4)
-    vector_sent4=tfidf4.transform([transformed_sent4])
-    prediction4=model4.predict(vector_sent4)[0]
-    if st.button("Predict"):
-        if prediction4==0:
-            st.exception("Highly Offensive Text!!")
-        elif prediction4==1:
-            st.warning("Offensive Text!!")
-        elif prediction4==2:
-            st.success("Non Offensive Text!!")
-#Sarcasm Detection Prediction
-tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
-def transform5(txt1):
-    txt2=tfidf5.fit_transform(txt1)
-    return txt2.toarray()
-df5=pd.read_csv("Sarcasm Detection.csv")
-df5.columns=["Text","Label"]
-x=transform5(df5["Text"])
-y=df5["Label"]
-x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
-model5=LogisticRegression()
-model5.fit(x_train5,y_train5)
-#Sarcasm Detection Page
-if rad=="Sarcasm Detection":
-    st.header("Detect Whether The Text Is Sarcastic Or Not!!")
-    sent5=st.text_area("Enter The Text")
-    transformed_sent5=transform_text(sent5)
-    vector_sent5=tfidf5.transform([transformed_sent5])
-    prediction5=model5.predict(vector_sent5)[0]
-    if st.button("Predict"):
-        if prediction5==1:
-            st.exception("Sarcastic Text!!")
-        elif prediction5==0:
-            st.success("Non Sarcastic Text!!")

+from tracemalloc import stop
+import streamlit as st
+import numpy as np
+import pandas as pd
+import re
+import string
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem.porter import PorterStemmer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestClassifier
+nltk.download('punkt')
+nltk.download('stopwords')
+sw=nltk.corpus.stopwords.words("english")
+rad=st.sidebar.radio("Navigation",["Home","Spam or Ham Detection","Sentiment Analysis","Stress Detection","Hate and Offensive Content Detection","Sarcasm Detection"])
+#Home Page
+if rad=="Home":
+    st.title("Complete Text Analysis App")
+    st.image("SEO-articles-V2_Text-Analysis.png")
+    st.text(" ")
+    st.text("The Following Text Analysis Options Are Available->")
+    st.text(" ")
+    st.text("1. Spam or Ham Detection")
+    st.text("2. Sentiment Analysis")
+    st.text("3. Stress Detection")
+    st.text("4. Hate and Offensive Content Detection")
+    st.text("5. Sarcasm Detection")
+#function to clean and transform the user input which is in raw format
+def transform_text(text):
+    text=text.lower()
+    text=nltk.word_tokenize(text)
+    y=[]
+    for i in text:
+        if i.isalnum():
+            y.append(i)
+    text=y[:]
+    y.clear()
+    for i in text:
+        if i not in stopwords.words('english') and i not in string.punctuation:
+            y.append(i)
+    text=y[:]
+    y.clear()
+    ps=PorterStemmer()
+    for i in text:
+        y.append(ps.stem(i))
+    return " ".join(y)
+#Spam Detection Prediction
+tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform1(txt1):
+    txt2=tfidf1.fit_transform(txt1)
+    return txt2.toarray()
+df1=pd.read_csv("Spam Detection.csv")
+df1.columns=["Label","Text"]
+x=transform1(df1["Text"])
+y=df1["Label"]
+x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
+model1=LogisticRegression()
+model1.fit(x_train1,y_train1)
+#Spam Detection Analysis Page
+if rad=="Spam or Ham Detection":
+    st.header("Detect Whether A Text Is Spam Or Ham??")
+    sent1=st.text_area("Enter The Text")
+    transformed_sent1=transform_text(sent1)
+    vector_sent1=tfidf1.transform([transformed_sent1])
+    prediction1=model1.predict(vector_sent1)[0]
+    if st.button("Predict"):
+        if prediction1=="spam":
+            st.warning("Spam Text!!")
+        elif prediction1=="ham":
+            st.success("Ham Text!!")
+#Sentiment Analysis Prediction
+tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform2(txt1):
+    txt2=tfidf2.fit_transform(txt1)
+    return txt2.toarray()
+df2=pd.read_csv("Sentiment Analysis.csv")
+df2.columns=["Text","Label"]
+x=transform2(df2["Text"])
+y=df2["Label"]
+x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
+model2=LogisticRegression()
+model2.fit(x_train2,y_train2)
+#Sentiment Analysis Page
+if rad=="Sentiment Analysis":
+    st.header("Detect The Sentiment Of The Text!!")
+    sent2=st.text_area("Enter The Text")
+    transformed_sent2=transform_text(sent2)
+    vector_sent2=tfidf2.transform([transformed_sent2])
+    prediction2=model2.predict(vector_sent2)[0]
+    if st.button("Predict"):
+        if prediction2==0:
+            st.warning("Negetive Text!!")
+        elif prediction2==1:
+            st.success("Positive Text!!")
+#Stress Detection Prediction
+tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform3(txt1):
+    txt2=tfidf3.fit_transform(txt1)
+    return txt2.toarray()
+df3=pd.read_csv("Stress Detection.csv")
+df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
+df3.columns=["Text","Sentiment","Stress Level"]
+x=transform3(df3["Text"])
+y=df3["Stress Level"].to_numpy()
+x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
+model3=DecisionTreeRegressor(max_leaf_nodes=2000)
+model3.fit(x_train3,y_train3)
+#Stress Detection Page
+if rad=="Stress Detection":
+    st.header("Detect The Amount Of Stress In The Text!!")
+    sent3=st.text_area("Enter The Text")
+    transformed_sent3=transform_text(sent3)
+    vector_sent3=tfidf3.transform([transformed_sent3])
+    prediction3=model3.predict(vector_sent3)[0]
+    if st.button("Predict"):
+        if prediction3>=0:
+            st.warning("Stressful Text!!")
+        elif prediction3<0:
+            st.success("Not A Stressful Text!!")
+#Hate & Offensive Content Prediction
+tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform4(txt1):
+    txt2=tfidf4.fit_transform(txt1)
+    return txt2.toarray()
+df4=pd.read_csv("Hate Content Detection.csv")
+df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
+df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
+x=transform4(df4["Text"])
+y=df4["Class Level"]
+x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
+model4=RandomForestClassifier()
+model4.fit(x_train4,y_train4)
+#Hate & Offensive Content Page
+if rad=="Hate and Offensive Content Detection":
+    st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
+    sent4=st.text_area("Enter The Text")
+    transformed_sent4=transform_text(sent4)
+    vector_sent4=tfidf4.transform([transformed_sent4])
+    prediction4=model4.predict(vector_sent4)[0]
+    if st.button("Predict"):
+        if prediction4==0:
+            st.exception("Highly Offensive Text!!")
+        elif prediction4==1:
+            st.warning("Offensive Text!!")
+        elif prediction4==2:
+            st.success("Non Offensive Text!!")
+#Sarcasm Detection Prediction
+tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform5(txt1):
+    txt2=tfidf5.fit_transform(txt1)
+    return txt2.toarray()
+df5=pd.read_csv("Sarcasm Detection.csv")
+df5.columns=["Text","Label"]
+x=transform5(df5["Text"])
+y=df5["Label"]
+x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
+model5=LogisticRegression()
+model5.fit(x_train5,y_train5)
+#Sarcasm Detection Page
+if rad=="Sarcasm Detection":
+    st.header("Detect Whether The Text Is Sarcastic Or Not!!")
+    sent5=st.text_area("Enter The Text")
+    transformed_sent5=transform_text(sent5)
+    vector_sent5=tfidf5.transform([transformed_sent5])
+    prediction5=model5.predict(vector_sent5)[0]
+    if st.button("Predict"):
+        if prediction5==1:
+            st.exception("Sarcastic Text!!")
+        elif prediction5==0:
+            st.success("Non Sarcastic Text!!")