update display and fix search only
Browse files- app.py +25 -23
- climateqa/engine/chains/retrieve_documents.py +42 -31
- climateqa/engine/chains/retrieve_papers.py +2 -2
- climateqa/event_handler.py +2 -1
- climateqa/knowledge/openalex.py +3 -2
- front/utils.py +5 -2
app.py
CHANGED
@@ -46,7 +46,7 @@ from climateqa.engine.graph import make_graph_agent
|
|
46 |
from climateqa.engine.embeddings import get_embeddings_function
|
47 |
from climateqa.engine.chains.retrieve_papers import find_papers
|
48 |
|
49 |
-
from front.utils import serialize_docs,process_figures
|
50 |
|
51 |
from climateqa.event_handler import init_audience, handle_retrieved_documents, stream_answer,handle_retrieved_owid_graphs
|
52 |
|
@@ -409,7 +409,6 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
409 |
|
410 |
with gr.Tab("Sources",elem_id = "tab-sources",id = 1) as tab_sources:
|
411 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
412 |
-
docs_textbox = gr.State("")
|
413 |
|
414 |
|
415 |
|
@@ -439,7 +438,6 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
439 |
# Fenêtre simulée pour les Relevant Papers
|
440 |
with gr.Accordion(visible=True, elem_id="papers-relevant-popup",label= "See relevant papers", open= False) as relevant_popup:
|
441 |
papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
|
442 |
-
docs_textbox = gr.State("")
|
443 |
|
444 |
btn_citations_network = gr.Button("Explore papers citations network")
|
445 |
# Fenêtre simulée pour le Citations Network
|
@@ -458,21 +456,15 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
458 |
gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
|
459 |
|
460 |
|
461 |
-
with gr.Row():
|
462 |
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
["IPCC figures","OpenAlex", "OurWorldInData"],
|
471 |
-
label="Select database to search for relevant content",
|
472 |
-
value=["IPCC figures"],
|
473 |
-
interactive=True,
|
474 |
-
)
|
475 |
-
|
476 |
dropdown_reports = gr.Dropdown(
|
477 |
POSSIBLE_REPORTS,
|
478 |
label="Or select specific reports",
|
@@ -480,8 +472,15 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
480 |
value=None,
|
481 |
interactive=True,
|
482 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
483 |
|
484 |
-
search_only = gr.Checkbox(label="Search only without chating", value=False, interactive=True, elem_id="checkbox-chat")
|
485 |
|
486 |
|
487 |
dropdown_audience = gr.Dropdown(
|
@@ -501,7 +500,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
501 |
|
502 |
dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
|
503 |
|
504 |
-
close_config_modal = gr.Button("Close",elem_id="close-config-modal")
|
505 |
close_config_modal.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
|
506 |
# dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
|
507 |
|
@@ -589,9 +588,12 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
589 |
|
590 |
|
591 |
|
592 |
-
def start_chat(query,history):
|
593 |
history = history + [ChatMessage(role="user", content=query)]
|
594 |
-
|
|
|
|
|
|
|
595 |
|
596 |
def finish_chat():
|
597 |
return gr.update(interactive = True,value = "")
|
@@ -630,14 +632,14 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
630 |
return gr.update(label = recommended_content_notif_label), gr.update(label = sources_notif_label), gr.update(label = figures_notif_label), gr.update(label = graphs_notif_label), gr.update(label = papers_notif_label)
|
631 |
|
632 |
(textbox
|
633 |
-
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
634 |
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
|
635 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
636 |
# .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
|
637 |
)
|
638 |
|
639 |
(examples_hidden
|
640 |
-
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
641 |
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
|
642 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
643 |
# .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
|
|
|
46 |
from climateqa.engine.embeddings import get_embeddings_function
|
47 |
from climateqa.engine.chains.retrieve_papers import find_papers
|
48 |
|
49 |
+
from front.utils import serialize_docs,process_figures
|
50 |
|
51 |
from climateqa.event_handler import init_audience, handle_retrieved_documents, stream_answer,handle_retrieved_owid_graphs
|
52 |
|
|
|
409 |
|
410 |
with gr.Tab("Sources",elem_id = "tab-sources",id = 1) as tab_sources:
|
411 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
|
|
412 |
|
413 |
|
414 |
|
|
|
438 |
# Fenêtre simulée pour les Relevant Papers
|
439 |
with gr.Accordion(visible=True, elem_id="papers-relevant-popup",label= "See relevant papers", open= False) as relevant_popup:
|
440 |
papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
|
|
|
441 |
|
442 |
btn_citations_network = gr.Button("Explore papers citations network")
|
443 |
# Fenêtre simulée pour le Citations Network
|
|
|
456 |
gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
|
457 |
|
458 |
|
459 |
+
# with gr.Row():
|
460 |
|
461 |
+
dropdown_sources = gr.CheckboxGroup(
|
462 |
+
["IPCC", "IPBES","IPOS"],
|
463 |
+
label="Select source (by default search in all sources)",
|
464 |
+
value=["IPCC"],
|
465 |
+
interactive=True,
|
466 |
+
)
|
467 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
dropdown_reports = gr.Dropdown(
|
469 |
POSSIBLE_REPORTS,
|
470 |
label="Or select specific reports",
|
|
|
472 |
value=None,
|
473 |
interactive=True,
|
474 |
)
|
475 |
+
|
476 |
+
dropdown_external_sources = gr.CheckboxGroup(
|
477 |
+
["IPCC figures","OpenAlex", "OurWorldInData"],
|
478 |
+
label="Select database to search for relevant content",
|
479 |
+
value=["IPCC figures"],
|
480 |
+
interactive=True,
|
481 |
+
)
|
482 |
|
483 |
+
search_only = gr.Checkbox(label="Search only for recommended content without chating", value=False, interactive=True, elem_id="checkbox-chat")
|
484 |
|
485 |
|
486 |
dropdown_audience = gr.Dropdown(
|
|
|
500 |
|
501 |
dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
|
502 |
|
503 |
+
close_config_modal = gr.Button("Validate and Close",elem_id="close-config-modal")
|
504 |
close_config_modal.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
|
505 |
# dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
|
506 |
|
|
|
588 |
|
589 |
|
590 |
|
591 |
+
def start_chat(query,history,search_only):
|
592 |
history = history + [ChatMessage(role="user", content=query)]
|
593 |
+
if search_only:
|
594 |
+
return (gr.update(interactive = False),gr.update(selected=1),history)
|
595 |
+
else:
|
596 |
+
return (gr.update(interactive = False),gr.update(selected=2),history)
|
597 |
|
598 |
def finish_chat():
|
599 |
return gr.update(interactive = True,value = "")
|
|
|
632 |
return gr.update(label = recommended_content_notif_label), gr.update(label = sources_notif_label), gr.update(label = figures_notif_label), gr.update(label = graphs_notif_label), gr.update(label = papers_notif_label)
|
633 |
|
634 |
(textbox
|
635 |
+
.submit(start_chat, [textbox,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
636 |
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
|
637 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
638 |
# .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
|
639 |
)
|
640 |
|
641 |
(examples_hidden
|
642 |
+
.change(start_chat, [examples_hidden,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
643 |
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
|
644 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
645 |
# .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
|
climateqa/engine/chains/retrieve_documents.py
CHANGED
@@ -115,6 +115,7 @@ async def get_IPCC_relevant_documents(
|
|
115 |
k_images: int = 5,
|
116 |
namespace:str = "vectors",
|
117 |
min_size:int = 200,
|
|
|
118 |
) :
|
119 |
|
120 |
# Check if all elements in the list are either IPCC or IPBES
|
@@ -136,41 +137,49 @@ async def get_IPCC_relevant_documents(
|
|
136 |
docs_full = []
|
137 |
docs_images = []
|
138 |
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
**filters,
|
153 |
-
"chunk_type":"text",
|
154 |
-
"report_type": { "$nin":["SPM"]},
|
155 |
-
}
|
156 |
-
k_full = k_total - len(docs_summaries)
|
157 |
-
docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_full)
|
158 |
-
|
159 |
-
if search_figures:
|
160 |
-
# Images
|
161 |
-
filters_image = {
|
162 |
**filters,
|
163 |
-
"chunk_type":"
|
|
|
164 |
}
|
165 |
-
docs_images = vectorstore.similarity_search_with_score(query=query,filter = filters_image,k = k_images)
|
166 |
|
|
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
return {
|
176 |
"docs_summaries" : docs_summaries,
|
@@ -214,6 +223,7 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
|
|
214 |
related_content = []
|
215 |
|
216 |
search_figures = "IPCC figures" in state["relevant_content_sources"]
|
|
|
217 |
|
218 |
# Get the current question
|
219 |
current_question = state["remaining_questions"][0]
|
@@ -242,6 +252,7 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
|
|
242 |
k_total = k_before_reranking,
|
243 |
k_images = k_images_by_question,
|
244 |
threshold = 0.5,
|
|
|
245 |
)
|
246 |
|
247 |
|
|
|
115 |
k_images: int = 5,
|
116 |
namespace:str = "vectors",
|
117 |
min_size:int = 200,
|
118 |
+
search_only:bool = False,
|
119 |
) :
|
120 |
|
121 |
# Check if all elements in the list are either IPCC or IPBES
|
|
|
137 |
docs_full = []
|
138 |
docs_images = []
|
139 |
|
140 |
+
if search_only:
|
141 |
+
# Only search for images if search_only is True
|
142 |
+
if search_figures:
|
143 |
+
filters_image = {
|
144 |
+
**filters,
|
145 |
+
"chunk_type":"image"
|
146 |
+
}
|
147 |
+
docs_images = vectorstore.similarity_search_with_score(query=query,filter = filters_image,k = k_images)
|
148 |
+
docs_images = _add_metadata_and_score(docs_images)
|
149 |
+
else:
|
150 |
+
# Regular search flow for text and optionally images
|
151 |
+
# Search for k_summary documents in the summaries dataset
|
152 |
+
filters_summaries = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
**filters,
|
154 |
+
"chunk_type":"text",
|
155 |
+
"report_type": { "$in":["SPM"]},
|
156 |
}
|
|
|
157 |
|
158 |
+
docs_summaries = vectorstore.similarity_search_with_score(query=query,filter = filters_summaries,k = k_summary)
|
159 |
+
docs_summaries = [x for x in docs_summaries if x[1] > threshold]
|
160 |
|
161 |
+
# Search for k_total - k_summary documents in the full reports dataset
|
162 |
+
filters_full = {
|
163 |
+
**filters,
|
164 |
+
"chunk_type":"text",
|
165 |
+
"report_type": { "$nin":["SPM"]},
|
166 |
+
}
|
167 |
+
k_full = k_total - len(docs_summaries)
|
168 |
+
docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_full)
|
169 |
+
|
170 |
+
if search_figures:
|
171 |
+
# Images
|
172 |
+
filters_image = {
|
173 |
+
**filters,
|
174 |
+
"chunk_type":"image"
|
175 |
+
}
|
176 |
+
docs_images = vectorstore.similarity_search_with_score(query=query,filter = filters_image,k = k_images)
|
177 |
+
|
178 |
+
docs_summaries, docs_full, docs_images = _add_metadata_and_score(docs_summaries), _add_metadata_and_score(docs_full), _add_metadata_and_score(docs_images)
|
179 |
+
|
180 |
+
# Filter if length are below threshold
|
181 |
+
docs_summaries = [x for x in docs_summaries if len(x.page_content) > min_size]
|
182 |
+
docs_full = [x for x in docs_full if len(x.page_content) > min_size]
|
183 |
|
184 |
return {
|
185 |
"docs_summaries" : docs_summaries,
|
|
|
223 |
related_content = []
|
224 |
|
225 |
search_figures = "IPCC figures" in state["relevant_content_sources"]
|
226 |
+
search_only = state["search_only"]
|
227 |
|
228 |
# Get the current question
|
229 |
current_question = state["remaining_questions"][0]
|
|
|
252 |
k_total = k_before_reranking,
|
253 |
k_images = k_images_by_question,
|
254 |
threshold = 0.5,
|
255 |
+
search_only = search_only,
|
256 |
)
|
257 |
|
258 |
|
climateqa/engine/chains/retrieve_papers.py
CHANGED
@@ -2,7 +2,7 @@ from climateqa.engine.keywords import make_keywords_chain
|
|
2 |
from climateqa.engine.llm import get_llm
|
3 |
from climateqa.knowledge.openalex import OpenAlex
|
4 |
from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
5 |
-
from front.utils import
|
6 |
from climateqa.engine.reranker import get_reranker
|
7 |
|
8 |
oa = OpenAlex()
|
@@ -47,7 +47,7 @@ async def find_papers(query,after, relevant_content_sources, reranker= reranker)
|
|
47 |
df_works = df_works.sort_values("rerank_score",ascending=False)
|
48 |
docs_html = []
|
49 |
for i in range(10):
|
50 |
-
docs_html.append(
|
51 |
docs_html = "".join(docs_html)
|
52 |
G = oa.make_network(df_works)
|
53 |
|
|
|
2 |
from climateqa.engine.llm import get_llm
|
3 |
from climateqa.knowledge.openalex import OpenAlex
|
4 |
from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
5 |
+
from front.utils import make_html_papers
|
6 |
from climateqa.engine.reranker import get_reranker
|
7 |
|
8 |
oa = OpenAlex()
|
|
|
47 |
df_works = df_works.sort_values("rerank_score",ascending=False)
|
48 |
docs_html = []
|
49 |
for i in range(10):
|
50 |
+
docs_html.append(make_html_papers(df_works, i))
|
51 |
docs_html = "".join(docs_html)
|
52 |
G = oa.make_network(df_works)
|
53 |
|
climateqa/event_handler.py
CHANGED
@@ -36,7 +36,8 @@ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage],
|
|
36 |
docs_html.append(make_html_source(d, i))
|
37 |
|
38 |
used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
|
39 |
-
|
|
|
40 |
|
41 |
docs_html = "".join(docs_html)
|
42 |
|
|
|
36 |
docs_html.append(make_html_source(d, i))
|
37 |
|
38 |
used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
|
39 |
+
if used_documents!=[]:
|
40 |
+
history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
|
41 |
|
42 |
docs_html = "".join(docs_html)
|
43 |
|
climateqa/knowledge/openalex.py
CHANGED
@@ -55,8 +55,9 @@ class OpenAlex():
|
|
55 |
df_works["num_tokens"] = df_works["content"].map(lambda x : num_tokens_from_string(x))
|
56 |
|
57 |
df_works = df_works.drop(columns = ["abstract_inverted_index"])
|
58 |
-
|
59 |
-
|
|
|
60 |
return df_works
|
61 |
else:
|
62 |
raise Exception("Keywords must be a string")
|
|
|
55 |
df_works["num_tokens"] = df_works["content"].map(lambda x : num_tokens_from_string(x))
|
56 |
|
57 |
df_works = df_works.drop(columns = ["abstract_inverted_index"])
|
58 |
+
df_works["display_name"] = df_works["primary_location"].apply(lambda x :x["source"] if type(x) == dict and 'source' in x else "").apply(lambda x : x["display_name"] if type(x) == dict and "display_name" in x else "")
|
59 |
+
df_works["subtitle"] = df_works["title"].astype(str) + " - " + df_works["display_name"].astype(str) + " - " + df_works["publication_year"].astype(str)
|
60 |
+
|
61 |
return df_works
|
62 |
else:
|
63 |
raise Exception("Keywords must be a string")
|
front/utils.py
CHANGED
@@ -228,11 +228,12 @@ def make_html_source(source,i):
|
|
228 |
return card
|
229 |
|
230 |
|
231 |
-
def
|
232 |
title = df['title'][i]
|
233 |
content = df['abstract'][i]
|
234 |
url = df['doi'][i]
|
235 |
publication_date = df['publication_year'][i]
|
|
|
236 |
|
237 |
card = f"""
|
238 |
<div class="card" id="doc{i}">
|
@@ -241,8 +242,10 @@ def make_html_df(df,i):
|
|
241 |
<p>{content}</p>
|
242 |
</div>
|
243 |
<div class="card-footer">
|
244 |
-
<span>{
|
245 |
<a href="{url}" target="_blank" class="pdf-link">
|
|
|
|
|
246 |
</div>
|
247 |
</div>
|
248 |
"""
|
|
|
228 |
return card
|
229 |
|
230 |
|
231 |
+
def make_html_papers(df,i):
|
232 |
title = df['title'][i]
|
233 |
content = df['abstract'][i]
|
234 |
url = df['doi'][i]
|
235 |
publication_date = df['publication_year'][i]
|
236 |
+
subtitle = df['subtitle'][i]
|
237 |
|
238 |
card = f"""
|
239 |
<div class="card" id="doc{i}">
|
|
|
242 |
<p>{content}</p>
|
243 |
</div>
|
244 |
<div class="card-footer">
|
245 |
+
<span>{subtitle}</span>
|
246 |
<a href="{url}" target="_blank" class="pdf-link">
|
247 |
+
<span role="img" aria-label="Open paper">🔗</span>
|
248 |
+
</a>
|
249 |
</div>
|
250 |
</div>
|
251 |
"""
|