shubham5027 commited on
Commit
029d4c7
·
verified ·
1 Parent(s): 5df254f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -198
app.py CHANGED
@@ -1,198 +1,198 @@
1
- from tracemalloc import stop
2
- import streamlit as st
3
- import numpy as np
4
- import pandas as pd
5
- import re
6
- import string
7
- import nltk
8
- from nltk.corpus import stopwords
9
- from nltk.tokenize import word_tokenize
10
- from nltk.stem.porter import PorterStemmer
11
- from sklearn.feature_extraction.text import TfidfVectorizer
12
- from sklearn.model_selection import train_test_split
13
- from sklearn.linear_model import LogisticRegression
14
- from sklearn.tree import DecisionTreeRegressor
15
- from sklearn.ensemble import RandomForestClassifier
16
-
17
- nltk.download('punkt')
18
- nltk.download('stopwords')
19
- sw=nltk.corpus.stopwords.words("english")
20
-
21
- rad=st.sidebar.radio("Navigation",["Home","Spam or Ham Detection","Sentiment Analysis","Stress Detection","Hate and Offensive Content Detection","Sarcasm Detection"])
22
-
23
- #Home Page
24
- if rad=="Home":
25
- st.title("Complete Text Analysis App")
26
- st.image("Complete Text Analysis Home Page.jpg")
27
- st.text(" ")
28
- st.text("The Following Text Analysis Options Are Available->")
29
- st.text(" ")
30
- st.text("1. Spam or Ham Detection")
31
- st.text("2. Sentiment Analysis")
32
- st.text("3. Stress Detection")
33
- st.text("4. Hate and Offensive Content Detection")
34
- st.text("5. Sarcasm Detection")
35
-
36
- #function to clean and transform the user input which is in raw format
37
- def transform_text(text):
38
- text=text.lower()
39
- text=nltk.word_tokenize(text)
40
- y=[]
41
- for i in text:
42
- if i.isalnum():
43
- y.append(i)
44
- text=y[:]
45
- y.clear()
46
- for i in text:
47
- if i not in stopwords.words('english') and i not in string.punctuation:
48
- y.append(i)
49
- text=y[:]
50
- y.clear()
51
- ps=PorterStemmer()
52
- for i in text:
53
- y.append(ps.stem(i))
54
- return " ".join(y)
55
-
56
- #Spam Detection Prediction
57
- tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
58
- def transform1(txt1):
59
- txt2=tfidf1.fit_transform(txt1)
60
- return txt2.toarray()
61
-
62
- df1=pd.read_csv("Spam Detection.csv")
63
- df1.columns=["Label","Text"]
64
- x=transform1(df1["Text"])
65
- y=df1["Label"]
66
- x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
67
- model1=LogisticRegression()
68
- model1.fit(x_train1,y_train1)
69
-
70
- #Spam Detection Analysis Page
71
- if rad=="Spam or Ham Detection":
72
- st.header("Detect Whether A Text Is Spam Or Ham??")
73
- sent1=st.text_area("Enter The Text")
74
- transformed_sent1=transform_text(sent1)
75
- vector_sent1=tfidf1.transform([transformed_sent1])
76
- prediction1=model1.predict(vector_sent1)[0]
77
-
78
- if st.button("Predict"):
79
- if prediction1=="spam":
80
- st.warning("Spam Text!!")
81
- elif prediction1=="ham":
82
- st.success("Ham Text!!")
83
-
84
- #Sentiment Analysis Prediction
85
- tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
86
- def transform2(txt1):
87
- txt2=tfidf2.fit_transform(txt1)
88
- return txt2.toarray()
89
-
90
- df2=pd.read_csv("Sentiment Analysis.csv")
91
- df2.columns=["Text","Label"]
92
- x=transform2(df2["Text"])
93
- y=df2["Label"]
94
- x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
95
- model2=LogisticRegression()
96
- model2.fit(x_train2,y_train2)
97
-
98
- #Sentiment Analysis Page
99
- if rad=="Sentiment Analysis":
100
- st.header("Detect The Sentiment Of The Text!!")
101
- sent2=st.text_area("Enter The Text")
102
- transformed_sent2=transform_text(sent2)
103
- vector_sent2=tfidf2.transform([transformed_sent2])
104
- prediction2=model2.predict(vector_sent2)[0]
105
-
106
- if st.button("Predict"):
107
- if prediction2==0:
108
- st.warning("Negetive Text!!")
109
- elif prediction2==1:
110
- st.success("Positive Text!!")
111
-
112
- #Stress Detection Prediction
113
- tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
114
- def transform3(txt1):
115
- txt2=tfidf3.fit_transform(txt1)
116
- return txt2.toarray()
117
-
118
- df3=pd.read_csv("Stress Detection.csv")
119
- df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
120
- df3.columns=["Text","Sentiment","Stress Level"]
121
- x=transform3(df3["Text"])
122
- y=df3["Stress Level"].to_numpy()
123
- x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
124
- model3=DecisionTreeRegressor(max_leaf_nodes=2000)
125
- model3.fit(x_train3,y_train3)
126
-
127
- #Stress Detection Page
128
- if rad=="Stress Detection":
129
- st.header("Detect The Amount Of Stress In The Text!!")
130
- sent3=st.text_area("Enter The Text")
131
- transformed_sent3=transform_text(sent3)
132
- vector_sent3=tfidf3.transform([transformed_sent3])
133
- prediction3=model3.predict(vector_sent3)[0]
134
-
135
- if st.button("Predict"):
136
- if prediction3>=0:
137
- st.warning("Stressful Text!!")
138
- elif prediction3<0:
139
- st.success("Not A Stressful Text!!")
140
-
141
- #Hate & Offensive Content Prediction
142
- tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
143
- def transform4(txt1):
144
- txt2=tfidf4.fit_transform(txt1)
145
- return txt2.toarray()
146
-
147
- df4=pd.read_csv("Hate Content Detection.csv")
148
- df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
149
- df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
150
- x=transform4(df4["Text"])
151
- y=df4["Class Level"]
152
- x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
153
- model4=RandomForestClassifier()
154
- model4.fit(x_train4,y_train4)
155
-
156
- #Hate & Offensive Content Page
157
- if rad=="Hate and Offensive Content Detection":
158
- st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
159
- sent4=st.text_area("Enter The Text")
160
- transformed_sent4=transform_text(sent4)
161
- vector_sent4=tfidf4.transform([transformed_sent4])
162
- prediction4=model4.predict(vector_sent4)[0]
163
-
164
- if st.button("Predict"):
165
- if prediction4==0:
166
- st.exception("Highly Offensive Text!!")
167
- elif prediction4==1:
168
- st.warning("Offensive Text!!")
169
- elif prediction4==2:
170
- st.success("Non Offensive Text!!")
171
-
172
- #Sarcasm Detection Prediction
173
- tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
174
- def transform5(txt1):
175
- txt2=tfidf5.fit_transform(txt1)
176
- return txt2.toarray()
177
-
178
- df5=pd.read_csv("Sarcasm Detection.csv")
179
- df5.columns=["Text","Label"]
180
- x=transform5(df5["Text"])
181
- y=df5["Label"]
182
- x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
183
- model5=LogisticRegression()
184
- model5.fit(x_train5,y_train5)
185
-
186
- #Sarcasm Detection Page
187
- if rad=="Sarcasm Detection":
188
- st.header("Detect Whether The Text Is Sarcastic Or Not!!")
189
- sent5=st.text_area("Enter The Text")
190
- transformed_sent5=transform_text(sent5)
191
- vector_sent5=tfidf5.transform([transformed_sent5])
192
- prediction5=model5.predict(vector_sent5)[0]
193
-
194
- if st.button("Predict"):
195
- if prediction5==1:
196
- st.exception("Sarcastic Text!!")
197
- elif prediction5==0:
198
- st.success("Non Sarcastic Text!!")
 
1
+ from tracemalloc import stop
2
+ import streamlit as st
3
+ import numpy as np
4
+ import pandas as pd
5
+ import re
6
+ import string
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+ from nltk.stem.porter import PorterStemmer
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from sklearn.model_selection import train_test_split
13
+ from sklearn.linear_model import LogisticRegression
14
+ from sklearn.tree import DecisionTreeRegressor
15
+ from sklearn.ensemble import RandomForestClassifier
16
+
17
+ nltk.download('punkt')
18
+ nltk.download('stopwords')
19
+ sw=nltk.corpus.stopwords.words("english")
20
+
21
+ rad=st.sidebar.radio("Navigation",["Home","Spam or Ham Detection","Sentiment Analysis","Stress Detection","Hate and Offensive Content Detection","Sarcasm Detection"])
22
+
23
+ #Home Page
24
+ if rad=="Home":
25
+ st.title("Complete Text Analysis App")
26
+ st.image("SEO-articles-V2_Text-Analysis.png")
27
+ st.text(" ")
28
+ st.text("The Following Text Analysis Options Are Available->")
29
+ st.text(" ")
30
+ st.text("1. Spam or Ham Detection")
31
+ st.text("2. Sentiment Analysis")
32
+ st.text("3. Stress Detection")
33
+ st.text("4. Hate and Offensive Content Detection")
34
+ st.text("5. Sarcasm Detection")
35
+
36
+ #function to clean and transform the user input which is in raw format
37
+ def transform_text(text):
38
+ text=text.lower()
39
+ text=nltk.word_tokenize(text)
40
+ y=[]
41
+ for i in text:
42
+ if i.isalnum():
43
+ y.append(i)
44
+ text=y[:]
45
+ y.clear()
46
+ for i in text:
47
+ if i not in stopwords.words('english') and i not in string.punctuation:
48
+ y.append(i)
49
+ text=y[:]
50
+ y.clear()
51
+ ps=PorterStemmer()
52
+ for i in text:
53
+ y.append(ps.stem(i))
54
+ return " ".join(y)
55
+
56
+ #Spam Detection Prediction
57
+ tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
58
+ def transform1(txt1):
59
+ txt2=tfidf1.fit_transform(txt1)
60
+ return txt2.toarray()
61
+
62
+ df1=pd.read_csv("Spam Detection.csv")
63
+ df1.columns=["Label","Text"]
64
+ x=transform1(df1["Text"])
65
+ y=df1["Label"]
66
+ x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
67
+ model1=LogisticRegression()
68
+ model1.fit(x_train1,y_train1)
69
+
70
+ #Spam Detection Analysis Page
71
+ if rad=="Spam or Ham Detection":
72
+ st.header("Detect Whether A Text Is Spam Or Ham??")
73
+ sent1=st.text_area("Enter The Text")
74
+ transformed_sent1=transform_text(sent1)
75
+ vector_sent1=tfidf1.transform([transformed_sent1])
76
+ prediction1=model1.predict(vector_sent1)[0]
77
+
78
+ if st.button("Predict"):
79
+ if prediction1=="spam":
80
+ st.warning("Spam Text!!")
81
+ elif prediction1=="ham":
82
+ st.success("Ham Text!!")
83
+
84
+ #Sentiment Analysis Prediction
85
+ tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
86
+ def transform2(txt1):
87
+ txt2=tfidf2.fit_transform(txt1)
88
+ return txt2.toarray()
89
+
90
+ df2=pd.read_csv("Sentiment Analysis.csv")
91
+ df2.columns=["Text","Label"]
92
+ x=transform2(df2["Text"])
93
+ y=df2["Label"]
94
+ x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
95
+ model2=LogisticRegression()
96
+ model2.fit(x_train2,y_train2)
97
+
98
+ #Sentiment Analysis Page
99
+ if rad=="Sentiment Analysis":
100
+ st.header("Detect The Sentiment Of The Text!!")
101
+ sent2=st.text_area("Enter The Text")
102
+ transformed_sent2=transform_text(sent2)
103
+ vector_sent2=tfidf2.transform([transformed_sent2])
104
+ prediction2=model2.predict(vector_sent2)[0]
105
+
106
+ if st.button("Predict"):
107
+ if prediction2==0:
108
+ st.warning("Negetive Text!!")
109
+ elif prediction2==1:
110
+ st.success("Positive Text!!")
111
+
112
+ #Stress Detection Prediction
113
+ tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
114
+ def transform3(txt1):
115
+ txt2=tfidf3.fit_transform(txt1)
116
+ return txt2.toarray()
117
+
118
+ df3=pd.read_csv("Stress Detection.csv")
119
+ df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
120
+ df3.columns=["Text","Sentiment","Stress Level"]
121
+ x=transform3(df3["Text"])
122
+ y=df3["Stress Level"].to_numpy()
123
+ x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
124
+ model3=DecisionTreeRegressor(max_leaf_nodes=2000)
125
+ model3.fit(x_train3,y_train3)
126
+
127
+ #Stress Detection Page
128
+ if rad=="Stress Detection":
129
+ st.header("Detect The Amount Of Stress In The Text!!")
130
+ sent3=st.text_area("Enter The Text")
131
+ transformed_sent3=transform_text(sent3)
132
+ vector_sent3=tfidf3.transform([transformed_sent3])
133
+ prediction3=model3.predict(vector_sent3)[0]
134
+
135
+ if st.button("Predict"):
136
+ if prediction3>=0:
137
+ st.warning("Stressful Text!!")
138
+ elif prediction3<0:
139
+ st.success("Not A Stressful Text!!")
140
+
141
+ #Hate & Offensive Content Prediction
142
+ tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
143
+ def transform4(txt1):
144
+ txt2=tfidf4.fit_transform(txt1)
145
+ return txt2.toarray()
146
+
147
+ df4=pd.read_csv("Hate Content Detection.csv")
148
+ df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
149
+ df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
150
+ x=transform4(df4["Text"])
151
+ y=df4["Class Level"]
152
+ x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
153
+ model4=RandomForestClassifier()
154
+ model4.fit(x_train4,y_train4)
155
+
156
+ #Hate & Offensive Content Page
157
+ if rad=="Hate and Offensive Content Detection":
158
+ st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
159
+ sent4=st.text_area("Enter The Text")
160
+ transformed_sent4=transform_text(sent4)
161
+ vector_sent4=tfidf4.transform([transformed_sent4])
162
+ prediction4=model4.predict(vector_sent4)[0]
163
+
164
+ if st.button("Predict"):
165
+ if prediction4==0:
166
+ st.exception("Highly Offensive Text!!")
167
+ elif prediction4==1:
168
+ st.warning("Offensive Text!!")
169
+ elif prediction4==2:
170
+ st.success("Non Offensive Text!!")
171
+
172
+ #Sarcasm Detection Prediction
173
+ tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
174
+ def transform5(txt1):
175
+ txt2=tfidf5.fit_transform(txt1)
176
+ return txt2.toarray()
177
+
178
+ df5=pd.read_csv("Sarcasm Detection.csv")
179
+ df5.columns=["Text","Label"]
180
+ x=transform5(df5["Text"])
181
+ y=df5["Label"]
182
+ x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
183
+ model5=LogisticRegression()
184
+ model5.fit(x_train5,y_train5)
185
+
186
+ #Sarcasm Detection Page
187
+ if rad=="Sarcasm Detection":
188
+ st.header("Detect Whether The Text Is Sarcastic Or Not!!")
189
+ sent5=st.text_area("Enter The Text")
190
+ transformed_sent5=transform_text(sent5)
191
+ vector_sent5=tfidf5.transform([transformed_sent5])
192
+ prediction5=model5.predict(vector_sent5)[0]
193
+
194
+ if st.button("Predict"):
195
+ if prediction5==1:
196
+ st.exception("Sarcastic Text!!")
197
+ elif prediction5==0:
198
+ st.success("Non Sarcastic Text!!")