shubham5027 commited on
Commit
e6e5910
·
verified ·
1 Parent(s): 36e8e91

Upload 9 files

Browse files
Complete Text Analysis Home Page.jpg ADDED
Hate Content Detection.csv ADDED
The diff for this file is too large to render. See raw diff
 
Sarcasm Detection.csv ADDED
The diff for this file is too large to render. See raw diff
 
Sentiment Analysis.csv ADDED
The diff for this file is too large to render. See raw diff
 
Spam Detection.csv ADDED
The diff for this file is too large to render. See raw diff
 
Stress Detection.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tracemalloc import stop
2
+ import streamlit as st
3
+ import numpy as np
4
+ import pandas as pd
5
+ import re
6
+ import string
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+ from nltk.stem.porter import PorterStemmer
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from sklearn.model_selection import train_test_split
13
+ from sklearn.linear_model import LogisticRegression
14
+ from sklearn.tree import DecisionTreeRegressor
15
+ from sklearn.ensemble import RandomForestClassifier
16
+
17
+ nltk.download('punkt')
18
+ nltk.download('stopwords')
19
+ sw=nltk.corpus.stopwords.words("english")
20
+
21
+ rad=st.sidebar.radio("Navigation",["Home","Spam or Ham Detection","Sentiment Analysis","Stress Detection","Hate and Offensive Content Detection","Sarcasm Detection"])
22
+
23
+ #Home Page
24
+ if rad=="Home":
25
+ st.title("Complete Text Analysis App")
26
+ st.image("Complete Text Analysis Home Page.jpg")
27
+ st.text(" ")
28
+ st.text("The Following Text Analysis Options Are Available->")
29
+ st.text(" ")
30
+ st.text("1. Spam or Ham Detection")
31
+ st.text("2. Sentiment Analysis")
32
+ st.text("3. Stress Detection")
33
+ st.text("4. Hate and Offensive Content Detection")
34
+ st.text("5. Sarcasm Detection")
35
+
36
+ #function to clean and transform the user input which is in raw format
37
+ def transform_text(text):
38
+ text=text.lower()
39
+ text=nltk.word_tokenize(text)
40
+ y=[]
41
+ for i in text:
42
+ if i.isalnum():
43
+ y.append(i)
44
+ text=y[:]
45
+ y.clear()
46
+ for i in text:
47
+ if i not in stopwords.words('english') and i not in string.punctuation:
48
+ y.append(i)
49
+ text=y[:]
50
+ y.clear()
51
+ ps=PorterStemmer()
52
+ for i in text:
53
+ y.append(ps.stem(i))
54
+ return " ".join(y)
55
+
56
+ #Spam Detection Prediction
57
+ tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
58
+ def transform1(txt1):
59
+ txt2=tfidf1.fit_transform(txt1)
60
+ return txt2.toarray()
61
+
62
+ df1=pd.read_csv("Spam Detection.csv")
63
+ df1.columns=["Label","Text"]
64
+ x=transform1(df1["Text"])
65
+ y=df1["Label"]
66
+ x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
67
+ model1=LogisticRegression()
68
+ model1.fit(x_train1,y_train1)
69
+
70
+ #Spam Detection Analysis Page
71
+ if rad=="Spam or Ham Detection":
72
+ st.header("Detect Whether A Text Is Spam Or Ham??")
73
+ sent1=st.text_area("Enter The Text")
74
+ transformed_sent1=transform_text(sent1)
75
+ vector_sent1=tfidf1.transform([transformed_sent1])
76
+ prediction1=model1.predict(vector_sent1)[0]
77
+
78
+ if st.button("Predict"):
79
+ if prediction1=="spam":
80
+ st.warning("Spam Text!!")
81
+ elif prediction1=="ham":
82
+ st.success("Ham Text!!")
83
+
84
+ #Sentiment Analysis Prediction
85
+ tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
86
+ def transform2(txt1):
87
+ txt2=tfidf2.fit_transform(txt1)
88
+ return txt2.toarray()
89
+
90
+ df2=pd.read_csv("Sentiment Analysis.csv")
91
+ df2.columns=["Text","Label"]
92
+ x=transform2(df2["Text"])
93
+ y=df2["Label"]
94
+ x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
95
+ model2=LogisticRegression()
96
+ model2.fit(x_train2,y_train2)
97
+
98
+ #Sentiment Analysis Page
99
+ if rad=="Sentiment Analysis":
100
+ st.header("Detect The Sentiment Of The Text!!")
101
+ sent2=st.text_area("Enter The Text")
102
+ transformed_sent2=transform_text(sent2)
103
+ vector_sent2=tfidf2.transform([transformed_sent2])
104
+ prediction2=model2.predict(vector_sent2)[0]
105
+
106
+ if st.button("Predict"):
107
+ if prediction2==0:
108
+ st.warning("Negetive Text!!")
109
+ elif prediction2==1:
110
+ st.success("Positive Text!!")
111
+
112
+ #Stress Detection Prediction
113
+ tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
114
+ def transform3(txt1):
115
+ txt2=tfidf3.fit_transform(txt1)
116
+ return txt2.toarray()
117
+
118
+ df3=pd.read_csv("Stress Detection.csv")
119
+ df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
120
+ df3.columns=["Text","Sentiment","Stress Level"]
121
+ x=transform3(df3["Text"])
122
+ y=df3["Stress Level"].to_numpy()
123
+ x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
124
+ model3=DecisionTreeRegressor(max_leaf_nodes=2000)
125
+ model3.fit(x_train3,y_train3)
126
+
127
+ #Stress Detection Page
128
+ if rad=="Stress Detection":
129
+ st.header("Detect The Amount Of Stress In The Text!!")
130
+ sent3=st.text_area("Enter The Text")
131
+ transformed_sent3=transform_text(sent3)
132
+ vector_sent3=tfidf3.transform([transformed_sent3])
133
+ prediction3=model3.predict(vector_sent3)[0]
134
+
135
+ if st.button("Predict"):
136
+ if prediction3>=0:
137
+ st.warning("Stressful Text!!")
138
+ elif prediction3<0:
139
+ st.success("Not A Stressful Text!!")
140
+
141
+ #Hate & Offensive Content Prediction
142
+ tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
143
+ def transform4(txt1):
144
+ txt2=tfidf4.fit_transform(txt1)
145
+ return txt2.toarray()
146
+
147
+ df4=pd.read_csv("Hate Content Detection.csv")
148
+ df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
149
+ df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
150
+ x=transform4(df4["Text"])
151
+ y=df4["Class Level"]
152
+ x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
153
+ model4=RandomForestClassifier()
154
+ model4.fit(x_train4,y_train4)
155
+
156
+ #Hate & Offensive Content Page
157
+ if rad=="Hate and Offensive Content Detection":
158
+ st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
159
+ sent4=st.text_area("Enter The Text")
160
+ transformed_sent4=transform_text(sent4)
161
+ vector_sent4=tfidf4.transform([transformed_sent4])
162
+ prediction4=model4.predict(vector_sent4)[0]
163
+
164
+ if st.button("Predict"):
165
+ if prediction4==0:
166
+ st.exception("Highly Offensive Text!!")
167
+ elif prediction4==1:
168
+ st.warning("Offensive Text!!")
169
+ elif prediction4==2:
170
+ st.success("Non Offensive Text!!")
171
+
172
+ #Sarcasm Detection Prediction
173
+ tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
174
+ def transform5(txt1):
175
+ txt2=tfidf5.fit_transform(txt1)
176
+ return txt2.toarray()
177
+
178
+ df5=pd.read_csv("Sarcasm Detection.csv")
179
+ df5.columns=["Text","Label"]
180
+ x=transform5(df5["Text"])
181
+ y=df5["Label"]
182
+ x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
183
+ model5=LogisticRegression()
184
+ model5.fit(x_train5,y_train5)
185
+
186
+ #Sarcasm Detection Page
187
+ if rad=="Sarcasm Detection":
188
+ st.header("Detect Whether The Text Is Sarcastic Or Not!!")
189
+ sent5=st.text_area("Enter The Text")
190
+ transformed_sent5=transform_text(sent5)
191
+ vector_sent5=tfidf5.transform([transformed_sent5])
192
+ prediction5=model5.predict(vector_sent5)[0]
193
+
194
+ if st.button("Predict"):
195
+ if prediction5==1:
196
+ st.exception("Sarcastic Text!!")
197
+ elif prediction5==0:
198
+ st.success("Non Sarcastic Text!!")
nltk.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ stopwords
2
+ punkt
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ nltk==3.6.3
2
+ numpy==1.21.1
3
+ pandas==1.3.0
4
+ scikit_learn==1.0.2
5
+ streamlit==1.3.0