Update core/chatbot/retrieval_chatbot.py
Browse files
core/chatbot/retrieval_chatbot.py
CHANGED
@@ -6,31 +6,9 @@ from models import BaseModel, GPT4Model
|
|
6 |
from prompts import DecomposePrompt, QAPrompt, SummaryPrompt, ReferencePrompt
|
7 |
import ast
|
8 |
from utils.image_encoder import encode_image
|
|
|
|
|
9 |
|
10 |
-
# QA_PROMPT = "\
|
11 |
-
# You are a Question-Answering Chatbot. \
|
12 |
-
# Given some references and a question, please answer the question according to the references. \
|
13 |
-
# If you find the references insufficient, you can answer the question according to your own knowledge. \
|
14 |
-
# ONLY output the answer. \
|
15 |
-
# "
|
16 |
-
# QUESTION_PROMPT = "\
|
17 |
-
# You are a Question Refiner. \
|
18 |
-
# Given a question, you need to break it down to several subquestions and output a list of string: [\"<subquestion1>\", \"<subquestion2>\", ...]. \
|
19 |
-
# MAKE SURE there are no vague concepts in each subquestion that require reference to other subquestions, such as determiners, pronominal and so on. \
|
20 |
-
# If the question cannot be broken down, you need to rephrase it in 3 ways and output a list of string: [\"<rephrase1>\", \"<rephrase2>\", \"<rephrase3>\"]. \
|
21 |
-
# ONLY output the list of subquestions or rephrases. \
|
22 |
-
# "
|
23 |
-
# SUMMARY_PROMPT = "\
|
24 |
-
# You are a Summary Refiner. \
|
25 |
-
# Given a question and several answers to it, you need to organize and summarize the answers to form one coherent answer to the question. \
|
26 |
-
# ONLY output the summarized answer. \
|
27 |
-
# "
|
28 |
-
# REFERENCE_PROMPT = "\
|
29 |
-
# You are a Reference Refiner. \
|
30 |
-
# Given paragraphs extract from a paper, you need to remove the unnecessary and messy symbols to make it more readable. \
|
31 |
-
# But keep the original expression and sentences as much as possible. \
|
32 |
-
# ONLY output the refined paragraphs. \
|
33 |
-
# "
|
34 |
class RetrievalChatbot(BaseChatbot):
|
35 |
def __init__(self,
|
36 |
model: BaseModel = None,
|
@@ -56,57 +34,64 @@ class RetrievalChatbot(BaseChatbot):
|
|
56 |
self.summarizer = summarizer if summarizer \
|
57 |
else SimpleRefiner(model=GPT4Model(), sys_prompt=SummaryPrompt.content)
|
58 |
|
59 |
-
def response(self, message: str,
|
|
|
60 |
print("Query: {message}".format(message=message))
|
61 |
-
question = self.decomposer.refine(message, None,
|
62 |
print(question)
|
63 |
question = question.replace('"', "'").replace("', '", "','").lstrip("['").rstrip("']")
|
64 |
sub_questions = question.split("','")
|
65 |
print("Decomposed your query into subquestions: {sub_questions}".format(sub_questions=sub_questions))
|
66 |
-
|
|
|
67 |
for sub_question in sub_questions:
|
68 |
print("="*20)
|
69 |
-
print(f"Subquestion: {sub_question}")
|
70 |
-
|
71 |
print(f"Retrieving pdf papers for references...\n")
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
sub_answerer_context = "Sub Question References: {sub_retrieve_reference}\nQuestion: {question}\n".format(sub_retrieve_reference=sub_retrieve_reference, question=sub_question)
|
79 |
-
|
80 |
-
print(sub_answerer_context)
|
81 |
-
print(self.memory)
|
82 |
-
print(image_path)
|
83 |
-
sub_answer = self.answerer.refine(sub_answerer_context, self.memory, image_path)
|
84 |
-
|
85 |
-
print(f"Subanswer: {sub_answer}")
|
86 |
-
|
87 |
-
references += "Subquestion: {sub_question}\nSubanswer: {sub_answer}\n\n\n".format(sub_question=sub_question, sub_answer=sub_answer)
|
88 |
-
|
89 |
refs = self.retriever.retrieve(message)
|
90 |
for ref in refs:
|
91 |
references += "Related research for the user query: {ref}\n".format(ref=ref)
|
92 |
|
93 |
summarizer_context = "Question References: {references}\nQuestion: {message}\n".format(references=references, message=message)
|
94 |
-
answer = self.summarizer.refine(summarizer_context, None,
|
95 |
-
|
|
|
96 |
#todo 记忆管理
|
97 |
-
if
|
98 |
self.memory.append([{"role": "user", "content": [
|
99 |
{"type": "text", "text": f"{message}"},
|
100 |
]}, {"role": "assistant", "content": answer}])
|
101 |
else:
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
106 |
print("="*20)
|
107 |
print(f"Final answer: {answer}".format(answer=answer))
|
|
|
|
|
|
|
|
|
108 |
|
109 |
if return_logs:
|
110 |
return answer, references
|
111 |
else:
|
112 |
-
return answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from prompts import DecomposePrompt, QAPrompt, SummaryPrompt, ReferencePrompt
|
7 |
import ast
|
8 |
from utils.image_encoder import encode_image
|
9 |
+
import asyncio
|
10 |
+
import time
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
class RetrievalChatbot(BaseChatbot):
|
13 |
def __init__(self,
|
14 |
model: BaseModel = None,
|
|
|
34 |
self.summarizer = summarizer if summarizer \
|
35 |
else SimpleRefiner(model=GPT4Model(), sys_prompt=SummaryPrompt.content)
|
36 |
|
37 |
+
async def response(self, message: str, image_paths=None, return_logs=False) -> str:
|
38 |
+
time1 = time.time()
|
39 |
print("Query: {message}".format(message=message))
|
40 |
+
question = self.decomposer.refine(message, None, image_paths)
|
41 |
print(question)
|
42 |
question = question.replace('"', "'").replace("', '", "','").lstrip("['").rstrip("']")
|
43 |
sub_questions = question.split("','")
|
44 |
print("Decomposed your query into subquestions: {sub_questions}".format(sub_questions=sub_questions))
|
45 |
+
tasks = []
|
46 |
+
time2 = time.time()
|
47 |
for sub_question in sub_questions:
|
48 |
print("="*20)
|
49 |
+
print(f"Subquestion: {sub_question}")
|
|
|
50 |
print(f"Retrieving pdf papers for references...\n")
|
51 |
+
task = asyncio.create_task(self.subquestion_answerer(sub_question, image_paths))
|
52 |
+
tasks.append(task)
|
53 |
+
results = await asyncio.gather(*tasks)
|
54 |
+
references = "".join(results)
|
55 |
+
time3 = time.time()
|
56 |
+
print("Sub references are ",references)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
refs = self.retriever.retrieve(message)
|
58 |
for ref in refs:
|
59 |
references += "Related research for the user query: {ref}\n".format(ref=ref)
|
60 |
|
61 |
summarizer_context = "Question References: {references}\nQuestion: {message}\n".format(references=references, message=message)
|
62 |
+
answer = self.summarizer.refine(summarizer_context, None, image_paths)
|
63 |
+
time4 = time.time()
|
64 |
+
|
65 |
#todo 记忆管理
|
66 |
+
if image_paths is None:
|
67 |
self.memory.append([{"role": "user", "content": [
|
68 |
{"type": "text", "text": f"{message}"},
|
69 |
]}, {"role": "assistant", "content": answer}])
|
70 |
else:
|
71 |
+
if not isinstance(image_paths, list):
|
72 |
+
image_paths = [image_paths]
|
73 |
+
memory_user = [{"type": "text", "text": f"{message}"},]
|
74 |
+
for image_path in image_paths:
|
75 |
+
memory_user.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path.name)}"}},)
|
76 |
+
self.memory.append([{"role": "user", "content": memory_user}, {"role": "assistant", "content": answer}])
|
77 |
print("="*20)
|
78 |
print(f"Final answer: {answer}".format(answer=answer))
|
79 |
+
|
80 |
+
print(f"Decompose: {time2-time1}")
|
81 |
+
print(f"Answer Subquestions: {time3-time2}")
|
82 |
+
print(f"Summarize: {time4-time3}")
|
83 |
|
84 |
if return_logs:
|
85 |
return answer, references
|
86 |
else:
|
87 |
+
return answer
|
88 |
+
|
89 |
+
async def subquestion_answerer(self, sub_question: str, image_paths=None, return_logs=False) -> str:
|
90 |
+
sub_retrieve_reference=""
|
91 |
+
sub_retrieve = self.retriever.retrieve(sub_question)
|
92 |
+
for ref in sub_retrieve:
|
93 |
+
sub_retrieve_reference += "Related research: {ref}\n".format(ref=ref)
|
94 |
+
sub_answerer_context = "Sub Question References: {sub_retrieve_reference}\nQuestion: {question}\n".format(sub_retrieve_reference=sub_retrieve_reference, question=sub_question)
|
95 |
+
sub_answer = self.answerer.refine(sub_answerer_context, self.memory, image_paths)
|
96 |
+
print(f"Subanswer: {sub_answer}")
|
97 |
+
return "Subquestion: {sub_question}\nSubanswer: {sub_answer}\n\n\n".format(sub_question=sub_question, sub_answer=sub_answer)
|