hoson commited on
Commit
8404039
·
verified ·
1 Parent(s): 72ffbd7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -0
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, base64
2
+ import requests, json
3
+ import gradio as gr
4
+
5
+
6
+ GREEN = '\033[1;32m'
7
+ BLUE = '\033[1;34m'
8
+ RESET = '\033[0m'
9
+ URL = "https://ai1071.4dstaging.com/v1/"
10
+
11
+ VALID_ANSWER, QUERY_FAIL, INVALID_ANSWER=0 , 1, 2
12
+
13
+ VICTORIA_HARBOUR, MIC =0, 1 # VICTORIA_HARBOUR=海港飲食集團
14
+ CUSTOMER = MIC
15
+
16
+ MODES = [
17
+ {
18
+ "name": " ",
19
+ "query_mode_indx": 5,
20
+ "retrieval_temperature": 0.2, #EC19Jun2024
21
+ "path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240603_haigang_qa",
22
+ "sample_questions": [
23
+ "這裡可以book位嗎?", "可以book位嗎?", "Hi", "蟹", "魚", "會員", "訂枱"
24
+ "锡我?", "可唔可以幫我寫一張菜單?",
25
+ "可以加大長腳蟹嗎?","想查詢最新堂食優惠",
26
+ "有什麼優惠", "宴會菜單", "有長腳蟹?", "積分如何運作?", "點加入會員?",
27
+ "套餐可轉其他菜式嗎?", "網購限定優惠可以堂食嗎?", "當日海鮮供應情況?"
28
+ ],
29
+
30
+ },{
31
+ "name": "MiC Modular Integrated Construction - HK (Beta)",
32
+ "query_mode_indx": 4,
33
+ "retrieval_temperature": 0.2, #EC19Jun2024
34
+ # "path": r"E:\workspace\RAG_data\20??????????????S",
35
+ "path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240619_mic_demo",
36
+ "sample_questions": [
37
+ "What is MIC?", "優惠措施", "Please introduce CIC", "Key Technologies of MIC",
38
+ "組裝合成建築法", "物料或產品規格", "MIC safety."
39
+ ],
40
+ }
41
+ ]
42
+
43
+
44
+ questions=MODES[CUSTOMER]['sample_questions']
45
+
46
+ def the_answer(response:dict): #extract answer from the response.
47
+ a=response['msg'].split('Answer(GPT4):')[1].split('References:')[0] #get answers
48
+ a.strip() # remove all linefeeds
49
+ return a
50
+
51
+ def the_references(response:dict, user_query: str):
52
+ ref_contents=[]
53
+ if response["code"]==VALID_ANSWER: #
54
+ for ref in response["data"]["source_docs"]:
55
+ content=ref["page_content"]
56
+ # ref_question=content['問題']
57
+ # ref_question_answer=content['回答']
58
+ ref_contents.append(content)
59
+ ref_contents_filtered = filter_repeated(user_query, ref_contents) #EC04Jun2024
60
+ return ref_contents_filtered #ref_contents
61
+
62
+ def filter_repeated(user_query, ref_contents: list):
63
+ # This function help to filter out the reference question that is 100% same as the user's ASKED question.
64
+ #EC04Jun2024
65
+ ref_contents_filtered = []
66
+ for ref in ref_contents:
67
+ # ref_dict = json.loads(ref)
68
+ try:
69
+ question = next(iter(ref.get('問題').values()))
70
+ except StopIteration as e:
71
+ print(e)
72
+ pass
73
+ except Exception as e:
74
+ print(e)
75
+ ref_contents_filtered.append(ref)
76
+ continue
77
+ print(question)
78
+ print("question == user_query: "+str(question == user_query))
79
+ if not question == user_query:
80
+ ref_contents_filtered.append(ref)
81
+ return ref_contents_filtered
82
+
83
+ def get_images_from_source(source_docs):
84
+ image_exts = [".jpg", ".jpeg", ".png"]
85
+ source_list = [doc['source'] for doc in source_docs]
86
+ source_img_list = [source for source in source_list if os.path.splitext(source)[1] in image_exts]
87
+
88
+ buffer_img_str = ""
89
+ for source in source_img_list:
90
+ response = requests.get(URL+f"images?image_id={source}")
91
+ if response.status_code == 200:
92
+ image_data = response.content
93
+ base64_image = base64.b64encode(image_data).decode("utf-8") #image_data.encode("base64").decode("utf-8")
94
+
95
+ # Create an HTML <img> tag
96
+ # img_name = os.path.basename(source)
97
+ img_html = f'<img src="data:image/png;base64,{base64_image}" alt="img_name">'
98
+ buffer_img_str += "\n"+img_html
99
+
100
+ # Print or use img_html as needed
101
+ # print(img_html)
102
+ else:
103
+ print("Error fetching image")
104
+ return buffer_img_str
105
+
106
+ def all_info(response):
107
+ info="\n".join([f"{GREEN}{key}{RESET}: {value}" for key, value in response.items()])
108
+ return info
109
+
110
+ def request_stream_chat(question:str, history):
111
+ global temp_source_docs
112
+
113
+ if not question:
114
+ yield "Hello! What would you like to know?"
115
+ return
116
+
117
+ payload = {
118
+ "prompt": question,
119
+ "retrieval_temperature": 0.2, #MODES[CUSTOMER]['retrieval_temperature'], #EC19Jun2024: from 0.2 -> MODES[CUSTOMER]['retrieval_temperature']
120
+
121
+ # "query_mode_indx": 5,
122
+ # "path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240603_haigang_qa",
123
+
124
+ "query_mode_indx": MODES[CUSTOMER]['query_mode_indx'],
125
+ "path": MODES[CUSTOMER]['path'],
126
+
127
+ "stream": True,
128
+ "LLM_type": "gpt"
129
+ }
130
+ reply_buffer = ""
131
+ with requests.post(url=URL+"query", json=payload, stream=True) as r_stream:
132
+ for line in r_stream.iter_lines():
133
+ if line:
134
+ line = json.loads(line)
135
+ if line['finished']: #all the steamed content
136
+ response = line
137
+ # print(f"{RESET}-end")
138
+ # response=filter_repeated(response)
139
+
140
+ msg = response['msg']
141
+
142
+ if payload['query_mode_indx'] == 5:
143
+ source_docs_content = the_references(response, question)
144
+ source_docs_content_str = "\n".join([str(content) for content in source_docs_content])
145
+ response_str = msg+"\n\nSource documents:\n"+source_docs_content_str
146
+ else:
147
+ response_str = msg+"\n\n"+response.get('reference') #EC19Jun2024: from [] -> .get()
148
+
149
+ source_docs = response['data']['source_docs']
150
+ image_str = get_images_from_source(source_docs)
151
+ response_str += "\n"+image_str
152
+ yield response_str
153
+
154
+ break
155
+ else:
156
+ # yield line
157
+ # print(f"{BLUE}"+line['reply']+f"{RESET}", end="") #steaming chuncks.
158
+ reply_buffer += line['reply']
159
+ yield reply_buffer #line['reply']
160
+
161
+ # response=the_answer(response)+'\n' + str(the_references(response))
162
+ # return response
163
+
164
+ def my_generator(x):
165
+ for i in range(x):
166
+ yield i
167
+
168
+ if __name__ == "__main__":
169
+ # responses=[]; answers=[]; references=[]; all_infos=[]
170
+ # for q in questions:
171
+ # response=request_stream_chat(q, "dummy history")
172
+
173
+ # responses.append(response)
174
+ # all_infos.append(all_info(response))
175
+ # answers.append(the_answer(response))
176
+ # references.append(the_references(response))
177
+
178
+ gr.ChatInterface(
179
+ request_stream_chat, #a4o_response
180
+ examples=questions,
181
+
182
+ chatbot=gr.Chatbot(height=450), #300),
183
+ textbox=gr.Textbox(placeholder="喺呢度問我問題.", container=False, scale=7),
184
+ title=MODES[CUSTOMER]['name'],
185
+ description="智能査詢",
186
+ theme="soft",
187
+ cache_examples=False, #True,
188
+ retry_btn=None,
189
+ undo_btn="Delete Previous",
190
+ clear_btn="Clear",
191
+ fill_height=True,
192
+ ).launch(share=True) #False) #True)