Jiranuwat commited on
Commit
e328e30
·
1 Parent(s): fe67cb0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -0
app.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import pandas as pd
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import numpy as np
6
+ import urllib.parse
7
+ import requests
8
+
9
+ #initial state
10
+ if 'state_p1' not in st.session_state:
11
+ st.session_state.state_p1 = 0
12
+ if 'state_p2' not in st.session_state:
13
+ st.session_state.state_p2 = 0
14
+ if 'age' not in st.session_state:
15
+ st.session_state.age = 0
16
+ if 'weight' not in st.session_state:
17
+ st.session_state.weight = 0
18
+ if 'height' not in st.session_state:
19
+ st.session_state.height = 0
20
+ if 'gender' not in st.session_state:
21
+ st.session_state.gender = 0
22
+ if 'food_allergy' not in st.session_state:
23
+ st.session_state.food_allergy = 0
24
+ if 'drug_allergy' not in st.session_state:
25
+ st.session_state.drug_allergy = 0
26
+ if 'congentital_disease' not in st.session_state:
27
+ st.session_state.congentital_disease = 0
28
+ if 'queries' not in st.session_state:
29
+ st.session_state.queries = None
30
+ if 'sbert_searched_df' not in st.session_state:
31
+ st.session_state.sbert_searched_df = None
32
+ if 'queries_p2' not in st.session_state:
33
+ st.session_state.queries_p2 = None
34
+ if 'sbert_searched_df_p2' not in st.session_state:
35
+ st.session_state.sbert_searched_df_p2 = None
36
+ for i in range(10):
37
+ if 'score_'+str(i+1) not in st.session_state:
38
+ st.session_state['score_'+str(i+1)] = 'NA'
39
+ if 'current_page' not in st.session_state:
40
+ st.session_state.current_page = 1
41
+
42
+ def set_state_p1(state):
43
+ st.session_state.state_p1 = state
44
+
45
+ def set_state_p2(state):
46
+ st.session_state.state_p2 = state
47
+
48
+ def split_text(text):
49
+ return text.split(',')
50
+
51
+ #import data
52
+ sbert_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
53
+
54
+ data = pd.read_csv('articles_data.csv')
55
+
56
+ with open('corpus_embeddings.pickle', 'rb') as file:
57
+ corpus_embeddings = pickle.load(file)
58
+
59
+ #local function
60
+ def personal_check(age,weight,height,gender):
61
+
62
+ #age check
63
+ if age >= 60:
64
+ age = 'ผู้สูงอายุ'
65
+ else:
66
+ age = 'ทำงาน'
67
+
68
+ #gender check
69
+ if gender == 'หญิง':
70
+ gender = 'ผู้หญิง'
71
+ else:
72
+ gender = 'ผู้ชาย'
73
+
74
+ #bmi check
75
+ height_meters = height / 100
76
+
77
+ bmi = weight / (height_meters ** 2)
78
+
79
+ if bmi >= 30:
80
+ bmi = 'อ้วนมาก'
81
+ elif bmi >= 23 and bmi <30:
82
+ bmi = 'อ้วน'
83
+ elif bmi >= 18.5 and bmi <23:
84
+ bmi = ''
85
+ else:
86
+ bmi = 'ผอม'
87
+
88
+ return age,gender,bmi
89
+
90
+ def sbert_search(queries,data,embeddiing,sbert_model=sbert_model):
91
+
92
+ index_lst = []
93
+ score_lst = []
94
+
95
+ query_embedding = sbert_model.encode(queries, convert_to_tensor=True)
96
+ hits = util.semantic_search(query_embedding, embeddiing, top_k=10)
97
+ hits = hits[0]
98
+ for hit in hits:
99
+ index_lst.append(hit['corpus_id'])
100
+ score_lst.append(hit['score'])
101
+
102
+ sbert_searched = data.iloc[index_lst]
103
+ sbert_searched['score'] = score_lst
104
+
105
+ return sbert_searched
106
+
107
+ def page1_recommendation():
108
+ #header
109
+ st.markdown("<h1 style='text-align: center; color: black;'>---ระบบแนะนำบทความสุขภาพ---</h1>", unsafe_allow_html=True)
110
+
111
+ with st.form('user_info'):
112
+
113
+ #personal information input
114
+ age = st.slider("อายุ", 10, 100, 25)
115
+
116
+ col1, col2 = st.columns(2)
117
+ with col1:
118
+ weight = st.number_input("น้ำหนัก (Kg.): ",30.0,120.0,step=1.0,value=50.0)
119
+ with col2:
120
+ height = st.number_input("ส่วนสูง (cm.): ",100.0,250.0,step=1.0,value=150.0)
121
+
122
+ col3, col4, col5 = st.columns(3)
123
+ with col3:
124
+ gender = st.selectbox('เพศ',('ชาย', 'หญิง'))
125
+ with col4:
126
+ food_allergy = st.selectbox('แพ้อาหาร?',('ไม่แพ้', 'แพ้อาหาร'))
127
+ with col5:
128
+ drug_allergy = st.selectbox('แพ้ยา?',('ไม่แพ้', 'แพ้ยา'))
129
+ congentital_disease = st.text_input('โรคประจำตัวของคุณ (ถ้าหากไม่มี ไม่ต้องกรอก หรือใส่ "ไม่มี")')
130
+
131
+ st.form_submit_button(on_click=set_state_p1,args=(1,))
132
+
133
+ if st.session_state.state_p1 == 1:
134
+
135
+ #asign state
136
+ st.session_state.age = age
137
+ st.session_state.weight = weight
138
+ st.session_state.height = height
139
+ st.session_state.gender = gender
140
+ st.session_state.food_allergy = food_allergy
141
+ st.session_state.drug_allergy = drug_allergy
142
+ st.session_state.congentital_disease = congentital_disease
143
+
144
+ #algorithm
145
+ age,gender,bmi = personal_check(age,weight,height,gender)
146
+
147
+ if food_allergy == 'ไม่แพ้':
148
+ food_allergy = ''
149
+ if drug_allergy == 'ไม่แพ้':
150
+ drug_allergy = ''
151
+ if congentital_disease == 'ไม่มี':
152
+ congentital_disease = ''
153
+
154
+ if congentital_disease == '':
155
+ queries = gender+age+bmi+food_allergy+drug_allergy
156
+ else:
157
+ queries = congentital_disease
158
+
159
+ #Bertopic search
160
+ sbert_searched = sbert_search(queries,data,corpus_embeddings)
161
+
162
+ st.session_state.sbert_searched_df = sbert_searched
163
+ st.session_state.queries = queries
164
+ st.session_state.state_p1 = 2
165
+
166
+ if st.session_state.state_p1 == 2:
167
+
168
+ with st.form('recommendations'):
169
+ st.markdown("<h1 style='text-align: center; color: black;'>📰บทความสำหรับคุณ😆</h1>", unsafe_allow_html=True)
170
+ st.write("---------------------------------------------------------------------------------------")
171
+
172
+ for i in range(len(st.session_state.sbert_searched_df)):
173
+ st.header(str(i+1)+'. '+st.session_state.sbert_searched_df.iloc[i]['title'])
174
+ st.markdown('**Keywords :** '+ st.session_state.sbert_searched_df.iloc[i]['vote_keywords'])
175
+ st.markdown(f"[Page source (Click here.)]({st.session_state.sbert_searched_df.iloc[i].url})")
176
+
177
+ try:
178
+ banner_url = urllib.parse.quote(st.session_state.sbert_searched_df.iloc[i]['banner'], safe=':/')
179
+ response = requests.get(banner_url,timeout=5)
180
+ st.image(response.content)
181
+ except:
182
+ st.image('https://icon-library.com/images/no-photo-icon/no-photo-icon-1.jpg')
183
+ finally:
184
+ st.write("---------------------------------------------------------------------------------------")
185
+
186
+ st.form_submit_button('Submit',on_click=set_state_p1,args=(0,))
187
+
188
+ def page2_search_engine():
189
+ st.title("Search engine")
190
+
191
+ with st.form('queries'):
192
+ queries = st.text_input('คำหรือหัวข้อที่ต้องการค้นหา')
193
+ st.form_submit_button(on_click=set_state_p2,args=(1,))
194
+
195
+ if st.session_state.state_p2 == 1:
196
+ sbert_searched = sbert_search(queries,data,corpus_embeddings)
197
+
198
+ st.session_state.sbert_searched_df_p2 = sbert_searched
199
+ st.session_state.queries_p2 = queries
200
+ st.session_state.state_p2 = 2
201
+
202
+ if st.session_state.state_p2 == 2:
203
+ with st.form('recommendations'):
204
+ st.markdown("<h1 style='text-align: center; color: black;'>📰บทความสำหรับคุณ😆</h1>", unsafe_allow_html=True)
205
+ st.write("---------------------------------------------------------------------------------------")
206
+
207
+ for i in range(len(st.session_state.sbert_searched_df_p2)):
208
+ st.header(str(i+1)+'. '+st.session_state.sbert_searched_df_p2.iloc[i]['title'])
209
+ st.markdown('**Keywords :** '+ st.session_state.sbert_searched_df_p2.iloc[i]['vote_keywords'])
210
+ st.markdown(f"[Page source (Click here.)]({st.session_state.sbert_searched_df_p2.iloc[i].url})")
211
+
212
+ try:
213
+ banner_url = urllib.parse.quote(st.session_state.sbert_searched_df_p2.iloc[i]['banner'], safe=':/')
214
+ response = requests.get(banner_url,timeout=5)
215
+ st.image(response.content)
216
+ except:
217
+ st.image('https://icon-library.com/images/no-photo-icon/no-photo-icon-1.jpg')
218
+ finally:
219
+ st.write("---------------------------------------------------------------------------------------")
220
+
221
+ st.form_submit_button('Submit',on_click=set_state_p2,args=(0,))
222
+
223
+ #main
224
+ def main():
225
+ st.sidebar.title("Navigation")
226
+ page = st.sidebar.selectbox("Select a page:", ("Recommendation System", "Search Engine"))
227
+
228
+ if page == "Recommendation System":
229
+ st.session_state.current_page = 1
230
+ else:
231
+ st.session_state.current_page = 2
232
+
233
+ if page == "Recommendation System" and st.session_state.current_page == 1:
234
+ page1_recommendation()
235
+ elif page == "Search Engine" and st.session_state.current_page == 2:
236
+ page2_search_engine()
237
+
238
+ if __name__ == "__main__":
239
+ main()