Corrected bugs causing errors in async mode
Browse files- app.py +73 -31
- climateqa/engine/embeddings.py +3 -3
- climateqa/engine/llm.py +1 -1
- climateqa/engine/prompts.py +0 -1
- climateqa/engine/rag.py +7 -6
- climateqa/engine/reformulation.py +20 -6
- climateqa/engine/retriever.py +6 -4
- climateqa/engine/utils.py +2 -0
- climateqa/engine/vectorstore.py +2 -1
app.py
CHANGED
@@ -104,7 +104,7 @@ def serialize_docs(docs):
|
|
104 |
return new_docs
|
105 |
|
106 |
|
107 |
-
|
108 |
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
|
109 |
(messages in gradio format, messages in langchain format, source documents)"""
|
110 |
|
@@ -144,62 +144,102 @@ async def chat(query,history,audience,sources,reports):
|
|
144 |
# memory.chat_memory.add_message(message)
|
145 |
|
146 |
inputs = {"query": query,"audience": audience_prompt}
|
147 |
-
result = rag_chain.astream_log(inputs)
|
|
|
148 |
|
149 |
reformulated_question_path_id = "/logs/flatten_dict/final_output"
|
150 |
retriever_path_id = "/logs/Retriever/final_output"
|
151 |
streaming_output_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
|
152 |
final_output_path_id = "/streamed_output/-"
|
153 |
|
154 |
-
docs_html = ""
|
155 |
output_query = ""
|
156 |
output_language = ""
|
157 |
gallery = []
|
158 |
-
|
159 |
-
async for op in result:
|
160 |
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
if op['path'] == reformulated_question_path_id: # reforulated question
|
165 |
-
output_language = op['value']["language"] # str
|
166 |
-
output_query = op["value"]["question"]
|
167 |
-
|
168 |
-
elif op['path'] == retriever_path_id: # documents
|
169 |
try:
|
170 |
-
docs =
|
171 |
docs_html = []
|
172 |
for i, d in enumerate(docs, 1):
|
173 |
docs_html.append(make_html_source(d, i))
|
174 |
docs_html = "".join(docs_html)
|
175 |
except TypeError:
|
176 |
print("No documents found")
|
177 |
-
print("op: ",op)
|
178 |
continue
|
179 |
|
180 |
-
|
181 |
-
new_token =
|
182 |
time.sleep(0.03)
|
183 |
answer_yet = history[-1][1] + new_token
|
184 |
answer_yet = parse_output_llm_with_sources(answer_yet)
|
185 |
history[-1] = (query,answer_yet)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
-
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
|
198 |
-
|
199 |
-
|
|
|
|
|
|
|
200 |
|
201 |
-
history = [tuple(x) for x in history]
|
202 |
-
yield history,docs_html,output_query,output_language,gallery
|
203 |
|
204 |
# Log answer on Azure Blob Storage
|
205 |
if os.getenv("GRADIO_ENV") != "local":
|
@@ -295,12 +335,12 @@ def log_on_azure(file, logs, share_client):
|
|
295 |
init_prompt = """
|
296 |
Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
|
297 |
|
298 |
-
How to use
|
299 |
- **Language**: You can ask me your questions in any language.
|
300 |
- **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
|
301 |
- **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
|
302 |
|
303 |
-
Limitations
|
304 |
*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
|
305 |
|
306 |
What do you want to learn ?
|
@@ -326,7 +366,7 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
|
|
326 |
chatbot = gr.Chatbot(
|
327 |
value=[(None,init_prompt)],
|
328 |
show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
|
329 |
-
avatar_images = ("https://i.ibb.co/YNyd5W2/logo4.png"
|
330 |
)#,avatar_images = ("assets/logo4.png",None))
|
331 |
|
332 |
# bot.like(vote,None,None)
|
@@ -408,6 +448,8 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
|
|
408 |
|
409 |
def start_chat(query,history):
|
410 |
history = history + [(query,"")]
|
|
|
|
|
411 |
return (gr.update(interactive = False),gr.update(selected=1),history)
|
412 |
|
413 |
def finish_chat():
|
|
|
104 |
return new_docs
|
105 |
|
106 |
|
107 |
+
def chat(query,history,audience,sources,reports):
|
108 |
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
|
109 |
(messages in gradio format, messages in langchain format, source documents)"""
|
110 |
|
|
|
144 |
# memory.chat_memory.add_message(message)
|
145 |
|
146 |
inputs = {"query": query,"audience": audience_prompt}
|
147 |
+
# result = rag_chain.astream_log(inputs)
|
148 |
+
result = rag_chain.stream(inputs)
|
149 |
|
150 |
reformulated_question_path_id = "/logs/flatten_dict/final_output"
|
151 |
retriever_path_id = "/logs/Retriever/final_output"
|
152 |
streaming_output_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
|
153 |
final_output_path_id = "/streamed_output/-"
|
154 |
|
155 |
+
docs_html = "No sources found for this question"
|
156 |
output_query = ""
|
157 |
output_language = ""
|
158 |
gallery = []
|
|
|
|
|
159 |
|
160 |
+
for output in result:
|
161 |
+
|
162 |
+
if "language" in output:
|
163 |
+
output_language = output["language"]
|
164 |
+
if "question" in output:
|
165 |
+
output_query = output["question"]
|
166 |
+
if "docs" in output:
|
167 |
|
|
|
|
|
|
|
|
|
|
|
168 |
try:
|
169 |
+
docs = output['docs'] # List[Document]
|
170 |
docs_html = []
|
171 |
for i, d in enumerate(docs, 1):
|
172 |
docs_html.append(make_html_source(d, i))
|
173 |
docs_html = "".join(docs_html)
|
174 |
except TypeError:
|
175 |
print("No documents found")
|
|
|
176 |
continue
|
177 |
|
178 |
+
if "answer" in output:
|
179 |
+
new_token = output["answer"] # str
|
180 |
time.sleep(0.03)
|
181 |
answer_yet = history[-1][1] + new_token
|
182 |
answer_yet = parse_output_llm_with_sources(answer_yet)
|
183 |
history[-1] = (query,answer_yet)
|
184 |
+
|
185 |
+
yield history,docs_html,output_query,output_language,gallery
|
186 |
+
|
187 |
+
|
188 |
+
|
189 |
+
# async def fallback_iterator(iterable):
|
190 |
+
# async for item in iterable:
|
191 |
+
# try:
|
192 |
+
# yield item
|
193 |
+
# except Exception as e:
|
194 |
+
# print(f"Error in fallback iterator: {e}")
|
195 |
+
# raise gr.Error(f"ClimateQ&A Error: {e}\nThe error has been noted, try another question and if the error remains, you can contact us :)")
|
196 |
+
|
197 |
+
|
198 |
+
# async for op in fallback_iterator(result):
|
199 |
+
|
200 |
+
# op = op.ops[0]
|
201 |
+
# print("yo",op)
|
202 |
+
|
203 |
+
# if op['path'] == reformulated_question_path_id: # reforulated question
|
204 |
+
# output_language = op['value']["language"] # str
|
205 |
+
# output_query = op["value"]["question"]
|
206 |
|
207 |
+
# elif op['path'] == retriever_path_id: # documents
|
208 |
+
# try:
|
209 |
+
# docs = op['value']['documents'] # List[Document]
|
210 |
+
# docs_html = []
|
211 |
+
# for i, d in enumerate(docs, 1):
|
212 |
+
# docs_html.append(make_html_source(d, i))
|
213 |
+
# docs_html = "".join(docs_html)
|
214 |
+
# except TypeError:
|
215 |
+
# print("No documents found")
|
216 |
+
# print("op: ",op)
|
217 |
+
# continue
|
218 |
+
|
219 |
+
# elif op['path'] == streaming_output_path_id: # final answer
|
220 |
+
# new_token = op['value'] # str
|
221 |
+
# time.sleep(0.03)
|
222 |
+
# answer_yet = history[-1][1] + new_token
|
223 |
+
# answer_yet = parse_output_llm_with_sources(answer_yet)
|
224 |
+
# history[-1] = (query,answer_yet)
|
225 |
+
|
226 |
+
# # elif op['path'] == final_output_path_id:
|
227 |
+
# # final_output = op['value']
|
228 |
|
229 |
+
# # if "answer" in final_output:
|
230 |
|
231 |
+
# # final_output = final_output["answer"]
|
232 |
+
# # print(final_output)
|
233 |
+
# # answer = history[-1][1] + final_output
|
234 |
+
# # answer = parse_output_llm_with_sources(answer)
|
235 |
+
# # history[-1] = (query,answer)
|
236 |
|
237 |
+
# else:
|
238 |
+
# continue
|
239 |
+
|
240 |
+
# history = [tuple(x) for x in history]
|
241 |
+
# yield history,docs_html,output_query,output_language,gallery
|
242 |
|
|
|
|
|
243 |
|
244 |
# Log answer on Azure Blob Storage
|
245 |
if os.getenv("GRADIO_ENV") != "local":
|
|
|
335 |
init_prompt = """
|
336 |
Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
|
337 |
|
338 |
+
❓ How to use
|
339 |
- **Language**: You can ask me your questions in any language.
|
340 |
- **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
|
341 |
- **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
|
342 |
|
343 |
+
⚠️ Limitations
|
344 |
*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
|
345 |
|
346 |
What do you want to learn ?
|
|
|
366 |
chatbot = gr.Chatbot(
|
367 |
value=[(None,init_prompt)],
|
368 |
show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
|
369 |
+
avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
|
370 |
)#,avatar_images = ("assets/logo4.png",None))
|
371 |
|
372 |
# bot.like(vote,None,None)
|
|
|
448 |
|
449 |
def start_chat(query,history):
|
450 |
history = history + [(query,"")]
|
451 |
+
history = [tuple(x) for x in history]
|
452 |
+
print(history)
|
453 |
return (gr.update(interactive = False),gr.update(selected=1),history)
|
454 |
|
455 |
def finish_chat():
|
climateqa/engine/embeddings.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
|
2 |
-
from
|
3 |
-
from
|
4 |
|
5 |
def get_embeddings_function(version = "v1.2"):
|
6 |
|
@@ -22,4 +22,4 @@ def get_embeddings_function(version = "v1.2"):
|
|
22 |
|
23 |
embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
24 |
|
25 |
-
return embeddings_function
|
|
|
1 |
|
2 |
+
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
3 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
|
5 |
def get_embeddings_function(version = "v1.2"):
|
6 |
|
|
|
22 |
|
23 |
embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
24 |
|
25 |
+
return embeddings_function
|
climateqa/engine/llm.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
import os
|
3 |
# LOAD ENVIRONMENT VARIABLES
|
4 |
try:
|
|
|
1 |
+
from langchain_community.chat_models import AzureChatOpenAI
|
2 |
import os
|
3 |
# LOAD ENVIRONMENT VARIABLES
|
4 |
try:
|
climateqa/engine/prompts.py
CHANGED
@@ -63,7 +63,6 @@ Answer in {language} with the passages citations:
|
|
63 |
answer_prompt_without_docs_template = """
|
64 |
You are ClimateQ&A, an AI Assistant created by Ekimetrics. Your role is to explain climate-related questions using info from the IPCC and/or IPBES reports.
|
65 |
Always stay true to climate science and do not make up information. If you do not know the answer, just say you do not know.
|
66 |
-
If the
|
67 |
|
68 |
Guidelines:
|
69 |
- Start by explaining clearly that you could not find the answer in the IPCC/IPBES reports, so your answer is based on your own knowledge and must be taken with great caution because it's AI generated.
|
|
|
63 |
answer_prompt_without_docs_template = """
|
64 |
You are ClimateQ&A, an AI Assistant created by Ekimetrics. Your role is to explain climate-related questions using info from the IPCC and/or IPBES reports.
|
65 |
Always stay true to climate science and do not make up information. If you do not know the answer, just say you do not know.
|
|
|
66 |
|
67 |
Guidelines:
|
68 |
- Start by explaining clearly that you could not find the answer in the IPCC/IPBES reports, so your answer is based on your own knowledge and must be taken with great caution because it's AI generated.
|
climateqa/engine/rag.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1 |
from operator import itemgetter
|
2 |
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from
|
8 |
|
9 |
from climateqa.engine.reformulation import make_reformulation_chain
|
10 |
from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template
|
11 |
from climateqa.engine.utils import pass_values, flatten_dict
|
12 |
|
|
|
13 |
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
|
14 |
|
15 |
def _combine_documents(
|
@@ -72,7 +73,7 @@ def make_rag_chain(retriever,llm):
|
|
72 |
|
73 |
# ------- FINAL CHAIN
|
74 |
# Build the final chain
|
75 |
-
rag_chain = reformulation | find_documents |
|
76 |
|
77 |
return rag_chain
|
78 |
|
|
|
1 |
from operator import itemgetter
|
2 |
|
3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
4 |
+
from langchain_core.output_parsers import StrOutputParser
|
5 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
|
6 |
+
from langchain_core.prompts.prompt import PromptTemplate
|
7 |
+
from langchain_core.prompts.base import format_document
|
8 |
|
9 |
from climateqa.engine.reformulation import make_reformulation_chain
|
10 |
from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template
|
11 |
from climateqa.engine.utils import pass_values, flatten_dict
|
12 |
|
13 |
+
|
14 |
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
|
15 |
|
16 |
def _combine_documents(
|
|
|
73 |
|
74 |
# ------- FINAL CHAIN
|
75 |
# Build the final chain
|
76 |
+
rag_chain = reformulation | find_documents | answer
|
77 |
|
78 |
return rag_chain
|
79 |
|
climateqa/engine/reformulation.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
|
2 |
-
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from langchain.chat_models import ChatOpenAI
|
6 |
|
7 |
from climateqa.engine.prompts import reformulation_prompt_template
|
8 |
-
|
9 |
|
10 |
|
11 |
response_schemas = [
|
@@ -15,6 +14,12 @@ response_schemas = [
|
|
15 |
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
|
16 |
format_instructions = output_parser.get_format_instructions()
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def make_reformulation_chain(llm):
|
20 |
|
@@ -25,4 +30,13 @@ def make_reformulation_chain(llm):
|
|
25 |
)
|
26 |
|
27 |
chain = (prompt | llm.bind(stop=["```"]) | output_parser)
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
+
from langchain.output_parsers.structured import StructuredOutputParser, ResponseSchema
|
3 |
+
from langchain_core.prompts import PromptTemplate
|
4 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
|
|
|
5 |
|
6 |
from climateqa.engine.prompts import reformulation_prompt_template
|
7 |
+
from climateqa.engine.utils import pass_values, flatten_dict
|
8 |
|
9 |
|
10 |
response_schemas = [
|
|
|
14 |
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
|
15 |
format_instructions = output_parser.get_format_instructions()
|
16 |
|
17 |
+
def fallback_default_values(x):
|
18 |
+
if x["question"] is None:
|
19 |
+
x["question"] = x["query"]
|
20 |
+
x["language"] = "english"
|
21 |
+
|
22 |
+
return x
|
23 |
|
24 |
def make_reformulation_chain(llm):
|
25 |
|
|
|
30 |
)
|
31 |
|
32 |
chain = (prompt | llm.bind(stop=["```"]) | output_parser)
|
33 |
+
|
34 |
+
reformulation_chain = (
|
35 |
+
{"reformulation":chain,**pass_values(["query"])}
|
36 |
+
| RunnablePassthrough()
|
37 |
+
| flatten_dict
|
38 |
+
| fallback_default_values
|
39 |
+
)
|
40 |
+
|
41 |
+
|
42 |
+
return reformulation_chain
|
climateqa/engine/retriever.py
CHANGED
@@ -2,10 +2,12 @@
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from
|
8 |
-
from
|
|
|
|
|
9 |
from typing import List
|
10 |
from pydantic import Field
|
11 |
|
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
5 |
+
from langchain_core.retrievers import BaseRetriever
|
6 |
+
from langchain_core.vectorstores import VectorStoreRetriever
|
7 |
+
from langchain_core.documents.base import Document
|
8 |
+
from langchain_core.vectorstores import VectorStore
|
9 |
+
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
|
10 |
+
|
11 |
from typing import List
|
12 |
from pydantic import Field
|
13 |
|
climateqa/engine/utils.py
CHANGED
@@ -48,3 +48,5 @@ def flatten_dict(
|
|
48 |
"""
|
49 |
flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
|
50 |
return flat_dict
|
|
|
|
|
|
48 |
"""
|
49 |
flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
|
50 |
return flat_dict
|
51 |
+
|
52 |
+
|
climateqa/engine/vectorstore.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
# And https://python.langchain.com/docs/integrations/vectorstores/pinecone
|
4 |
import os
|
5 |
import pinecone
|
6 |
-
from
|
7 |
|
8 |
# LOAD ENVIRONMENT VARIABLES
|
9 |
try:
|
@@ -23,6 +23,7 @@ def get_pinecone_vectorstore(embeddings,text_key = "text"):
|
|
23 |
|
24 |
index_name = os.getenv("PINECONE_API_INDEX")
|
25 |
vectorstore = Pinecone.from_existing_index(index_name, embeddings,text_key = text_key)
|
|
|
26 |
return vectorstore
|
27 |
|
28 |
|
|
|
3 |
# And https://python.langchain.com/docs/integrations/vectorstores/pinecone
|
4 |
import os
|
5 |
import pinecone
|
6 |
+
from langchain_community.vectorstores import Pinecone
|
7 |
|
8 |
# LOAD ENVIRONMENT VARIABLES
|
9 |
try:
|
|
|
23 |
|
24 |
index_name = os.getenv("PINECONE_API_INDEX")
|
25 |
vectorstore = Pinecone.from_existing_index(index_name, embeddings,text_key = text_key)
|
26 |
+
|
27 |
return vectorstore
|
28 |
|
29 |
|