lindsay-qu commited on
Commit
ea54126
·
verified ·
1 Parent(s): 62bb2b9

Update core/chatbot/retrieval_chatbot.py

Browse files
Files changed (1) hide show
  1. core/chatbot/retrieval_chatbot.py +39 -54
core/chatbot/retrieval_chatbot.py CHANGED
@@ -6,31 +6,9 @@ from models import BaseModel, GPT4Model
6
  from prompts import DecomposePrompt, QAPrompt, SummaryPrompt, ReferencePrompt
7
  import ast
8
  from utils.image_encoder import encode_image
 
 
9
 
10
- # QA_PROMPT = "\
11
- # You are a Question-Answering Chatbot. \
12
- # Given some references and a question, please answer the question according to the references. \
13
- # If you find the references insufficient, you can answer the question according to your own knowledge. \
14
- # ONLY output the answer. \
15
- # "
16
- # QUESTION_PROMPT = "\
17
- # You are a Question Refiner. \
18
- # Given a question, you need to break it down to several subquestions and output a list of string: [\"<subquestion1>\", \"<subquestion2>\", ...]. \
19
- # MAKE SURE there are no vague concepts in each subquestion that require reference to other subquestions, such as determiners, pronominal and so on. \
20
- # If the question cannot be broken down, you need to rephrase it in 3 ways and output a list of string: [\"<rephrase1>\", \"<rephrase2>\", \"<rephrase3>\"]. \
21
- # ONLY output the list of subquestions or rephrases. \
22
- # "
23
- # SUMMARY_PROMPT = "\
24
- # You are a Summary Refiner. \
25
- # Given a question and several answers to it, you need to organize and summarize the answers to form one coherent answer to the question. \
26
- # ONLY output the summarized answer. \
27
- # "
28
- # REFERENCE_PROMPT = "\
29
- # You are a Reference Refiner. \
30
- # Given paragraphs extract from a paper, you need to remove the unnecessary and messy symbols to make it more readable. \
31
- # But keep the original expression and sentences as much as possible. \
32
- # ONLY output the refined paragraphs. \
33
- # "
34
  class RetrievalChatbot(BaseChatbot):
35
  def __init__(self,
36
  model: BaseModel = None,
@@ -56,57 +34,64 @@ class RetrievalChatbot(BaseChatbot):
56
  self.summarizer = summarizer if summarizer \
57
  else SimpleRefiner(model=GPT4Model(), sys_prompt=SummaryPrompt.content)
58
 
59
- def response(self, message: str, image_path=None, return_logs=False) -> str:
 
60
  print("Query: {message}".format(message=message))
61
- question = self.decomposer.refine(message, None, image_path)
62
  print(question)
63
  question = question.replace('"', "'").replace("', '", "','").lstrip("['").rstrip("']")
64
  sub_questions = question.split("','")
65
  print("Decomposed your query into subquestions: {sub_questions}".format(sub_questions=sub_questions))
66
- references = ""
 
67
  for sub_question in sub_questions:
68
  print("="*20)
69
- print(f"Subquestion: {sub_question}")
70
-
71
  print(f"Retrieving pdf papers for references...\n")
72
- sub_retrieve_reference = references
73
- sub_retrieve = self.retriever.retrieve(sub_question)
74
- for ref in sub_retrieve:
75
- sub_retrieve_reference += "Related research: {ref}\n".format(ref=ref)
76
- # context = self.memory.messages + [{"role": "user", "content": "References: {references}\nQuestion: {question}".format(references=reference, question=sub_question)}]
77
- # sub_answer = self.model.respond(context)
78
- sub_answerer_context = "Sub Question References: {sub_retrieve_reference}\nQuestion: {question}\n".format(sub_retrieve_reference=sub_retrieve_reference, question=sub_question)
79
-
80
- print(sub_answerer_context)
81
- print(self.memory)
82
- print(image_path)
83
- sub_answer = self.answerer.refine(sub_answerer_context, self.memory, image_path)
84
-
85
- print(f"Subanswer: {sub_answer}")
86
-
87
- references += "Subquestion: {sub_question}\nSubanswer: {sub_answer}\n\n\n".format(sub_question=sub_question, sub_answer=sub_answer)
88
-
89
  refs = self.retriever.retrieve(message)
90
  for ref in refs:
91
  references += "Related research for the user query: {ref}\n".format(ref=ref)
92
 
93
  summarizer_context = "Question References: {references}\nQuestion: {message}\n".format(references=references, message=message)
94
- answer = self.summarizer.refine(summarizer_context, None, image_path)
95
-
 
96
  #todo 记忆管理
97
- if image_path is None:
98
  self.memory.append([{"role": "user", "content": [
99
  {"type": "text", "text": f"{message}"},
100
  ]}, {"role": "assistant", "content": answer}])
101
  else:
102
- self.memory.append([{"role": "user", "content": [
103
- {"type": "text", "text": f"{message}"},
104
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}},
105
- ]}, {"role": "assistant", "content": answer}])
 
 
106
  print("="*20)
107
  print(f"Final answer: {answer}".format(answer=answer))
 
 
 
 
108
 
109
  if return_logs:
110
  return answer, references
111
  else:
112
- return answer
 
 
 
 
 
 
 
 
 
 
 
6
  from prompts import DecomposePrompt, QAPrompt, SummaryPrompt, ReferencePrompt
7
  import ast
8
  from utils.image_encoder import encode_image
9
+ import asyncio
10
+ import time
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  class RetrievalChatbot(BaseChatbot):
13
  def __init__(self,
14
  model: BaseModel = None,
 
34
  self.summarizer = summarizer if summarizer \
35
  else SimpleRefiner(model=GPT4Model(), sys_prompt=SummaryPrompt.content)
36
 
37
+ async def response(self, message: str, image_paths=None, return_logs=False) -> str:
38
+ time1 = time.time()
39
  print("Query: {message}".format(message=message))
40
+ question = self.decomposer.refine(message, None, image_paths)
41
  print(question)
42
  question = question.replace('"', "'").replace("', '", "','").lstrip("['").rstrip("']")
43
  sub_questions = question.split("','")
44
  print("Decomposed your query into subquestions: {sub_questions}".format(sub_questions=sub_questions))
45
+ tasks = []
46
+ time2 = time.time()
47
  for sub_question in sub_questions:
48
  print("="*20)
49
+ print(f"Subquestion: {sub_question}")
 
50
  print(f"Retrieving pdf papers for references...\n")
51
+ task = asyncio.create_task(self.subquestion_answerer(sub_question, image_paths))
52
+ tasks.append(task)
53
+ results = await asyncio.gather(*tasks)
54
+ references = "".join(results)
55
+ time3 = time.time()
56
+ print("Sub references are ",references)
 
 
 
 
 
 
 
 
 
 
 
57
  refs = self.retriever.retrieve(message)
58
  for ref in refs:
59
  references += "Related research for the user query: {ref}\n".format(ref=ref)
60
 
61
  summarizer_context = "Question References: {references}\nQuestion: {message}\n".format(references=references, message=message)
62
+ answer = self.summarizer.refine(summarizer_context, None, image_paths)
63
+ time4 = time.time()
64
+
65
  #todo 记忆管理
66
+ if image_paths is None:
67
  self.memory.append([{"role": "user", "content": [
68
  {"type": "text", "text": f"{message}"},
69
  ]}, {"role": "assistant", "content": answer}])
70
  else:
71
+ if not isinstance(image_paths, list):
72
+ image_paths = [image_paths]
73
+ memory_user = [{"type": "text", "text": f"{message}"},]
74
+ for image_path in image_paths:
75
+ memory_user.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path.name)}"}},)
76
+ self.memory.append([{"role": "user", "content": memory_user}, {"role": "assistant", "content": answer}])
77
  print("="*20)
78
  print(f"Final answer: {answer}".format(answer=answer))
79
+
80
+ print(f"Decompose: {time2-time1}")
81
+ print(f"Answer Subquestions: {time3-time2}")
82
+ print(f"Summarize: {time4-time3}")
83
 
84
  if return_logs:
85
  return answer, references
86
  else:
87
+ return answer
88
+
89
+ async def subquestion_answerer(self, sub_question: str, image_paths=None, return_logs=False) -> str:
90
+ sub_retrieve_reference=""
91
+ sub_retrieve = self.retriever.retrieve(sub_question)
92
+ for ref in sub_retrieve:
93
+ sub_retrieve_reference += "Related research: {ref}\n".format(ref=ref)
94
+ sub_answerer_context = "Sub Question References: {sub_retrieve_reference}\nQuestion: {question}\n".format(sub_retrieve_reference=sub_retrieve_reference, question=sub_question)
95
+ sub_answer = self.answerer.refine(sub_answerer_context, self.memory, image_paths)
96
+ print(f"Subanswer: {sub_answer}")
97
+ return "Subquestion: {sub_question}\nSubanswer: {sub_answer}\n\n\n".format(sub_question=sub_question, sub_answer=sub_answer)