Spaces:
Sleeping
Sleeping
Chananchida
commited on
Commit
•
7501763
1
Parent(s):
bf56fe9
Update app.py
Browse files
app.py
CHANGED
@@ -11,8 +11,6 @@ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
|
|
11 |
from sentence_transformers import SentenceTransformer,util
|
12 |
from pythainlp import Tokenizer
|
13 |
import pickle
|
14 |
-
import evaluate
|
15 |
-
from sklearn.metrics.pairwise import cosine_similarity,euclidean_distances
|
16 |
import gradio as gr
|
17 |
|
18 |
print(torch.cuda.is_available())
|
@@ -148,14 +146,41 @@ class Chatbot:
|
|
148 |
question_vector = self.get_embeddings(message)
|
149 |
question_vector=self.prepare_sentences_vector([question_vector])
|
150 |
similar_questions, similar_contexts, distances,indices = self.faiss_search(question_vector)
|
151 |
-
Answer = self.model_pipeline(
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
"""#Gradio"""
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
11 |
from sentence_transformers import SentenceTransformer,util
|
12 |
from pythainlp import Tokenizer
|
13 |
import pickle
|
|
|
|
|
14 |
import gradio as gr
|
15 |
|
16 |
print(torch.cuda.is_available())
|
|
|
146 |
question_vector = self.get_embeddings(message)
|
147 |
question_vector=self.prepare_sentences_vector([question_vector])
|
148 |
similar_questions, similar_contexts, distances,indices = self.faiss_search(question_vector)
|
149 |
+
Answer = self.model_pipeline(message, similar_contexts)
|
150 |
+
start_index = similar_contexts.find(Answer)
|
151 |
+
end_index = start_index + len(Answer)
|
152 |
+
_time = time.time() - t
|
153 |
+
output = {
|
154 |
+
"user_question": message,
|
155 |
+
"answer": df['Answer'][indices[0][0]],
|
156 |
+
"totaltime": round(_time, 3),
|
157 |
+
"distance": round(distances[0][0], 4),
|
158 |
+
"highlight_start": start_index,
|
159 |
+
"highlight_end": end_index
|
160 |
+
}
|
161 |
+
return output
|
162 |
+
def highlight_text(text, start_index, end_index):
|
163 |
+
if start_index < 0:
|
164 |
+
start_index = 0
|
165 |
+
if end_index > len(text):
|
166 |
+
end_index = len(text)
|
167 |
+
highlighted_text = ""
|
168 |
+
for i, char in enumerate(text):
|
169 |
+
if i == start_index:
|
170 |
+
highlighted_text += "<mark>"
|
171 |
+
highlighted_text += char
|
172 |
+
if i == end_index - 1:
|
173 |
+
highlighted_text += "</mark>"
|
174 |
+
return highlighted_text
|
175 |
+
|
176 |
+
|
177 |
"""#Gradio"""
|
178 |
+
if __name__ == "__main__":
|
179 |
+
bot = ChatbotModel()
|
180 |
+
def chat_interface(question, history):
|
181 |
+
response = bot._chatbot.predict(model, tokenizer, embedding_model, df, question, index)
|
182 |
+
highlighted_answer = highlight_text(response["answer"], response["highlight_start"], response["highlight_end"])
|
183 |
+
return highlighted_answer
|
184 |
+
# EXAMPLE = ["หลิน ไห่เฟิง มีชื่อเรียกอีกชื่อว่าอะไร" , "ใครเป็นผู้ตั้งสภาเศรษฐกิจโลกขึ้นในปี พ.ศ. 2514 โดยทุกปีจะมีการประชุมที่ประเทศสวิตเซอร์แลนด์", "โปรดิวเซอร์ของอัลบั้มตลอดกาล ของวงคีรีบูนคือใคร", "สกุลเดิมของหม่อมครูนุ่ม นวรัตน ณ อยุธยา คืออะไร"]
|
185 |
+
demo = gr.ChatInterface(fn=chat_interface, title="CE66-04_Thai Question Answering System by using Deep Learning")
|
186 |
+
demo.launch()
|