Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki commited on Nov 19, 2024

Commit

c3b815e

1 Parent(s): 14a5a97

Add OpenAlex papers recommandation

Browse files

Files changed (4) hide show

app.py +136 -34
climateqa/engine/chains/answer_rag.py +19 -17
climateqa/knowledge/openalex.py +3 -4
front/utils.py +22 -0

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from sentence_transformers import CrossEncoder
 oa = OpenAlex()
 import gradio as gr
 import pandas as pd
 import numpy as np
 import os
@@ -42,10 +43,12 @@ from climateqa.sample_questions import QUESTIONS
 from climateqa.constants import POSSIBLE_REPORTS
 from climateqa.utils import get_image_from_azure_blob_storage
 from climateqa.engine.keywords import make_keywords_chain
-# from climateqa.engine.chains.answer_rag import make_rag_papers_chain
 from climateqa.engine.graph import make_graph_agent,display_graph
-from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
 # Load environment variables in local mode
 try:
@@ -106,7 +109,7 @@ CITATION_TEXT = r"""@misc{climateqa,
 # Create vectorstore and retriever
 vectorstore = get_pinecone_vectorstore(embeddings_function)
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
-reranker = get_reranker("large")
 agent = make_graph_agent(llm,vectorstore,reranker)
@@ -326,13 +329,11 @@ def generate_keywords(query):
 papers_cols_widths = {
-    "doc":50,
     "id":100,
     "title":300,
     "doi":100,
     "publication_year":100,
     "abstract":500,
-    "rerank_score":100,
     "is_oa":50,
 }
@@ -340,6 +341,62 @@ papers_cols = list(papers_cols_widths.keys())
 papers_cols_widths = list(papers_cols_widths.values())
 # --------------------------------------------------------------------
 # Gradio
 # --------------------------------------------------------------------
@@ -429,7 +486,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                             samples.append(group_examples)
-                    with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
                         sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
                         docs_textbox = gr.State("")
@@ -437,36 +494,29 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
                     # with Modal(visible = False) as config_modal:
-                    with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
-                        gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
-                        dropdown_sources = gr.CheckboxGroup(
-                            ["IPCC", "IPBES","IPOS"],
-                            label="Select source",
-                            value=["IPCC"],
-                            interactive=True,
-                        )
-                        dropdown_reports = gr.Dropdown(
-                            POSSIBLE_REPORTS,
-                            label="Or select specific reports",
-                            multiselect=True,
-                            value=None,
-                            interactive=True,
-                        )
-                        dropdown_audience = gr.Dropdown(
-                            ["Children","General public","Experts"],
-                            label="Select audience",
-                            value="Experts",
-                            interactive=True,
-                        )
-                        output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
-                        output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
                     with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
                         with Modal(visible=False, elem_id="modal_figure_galery") as modal:
@@ -490,6 +540,38 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
     # with gr.Tab("Figures",elem_id = "tab-images",elem_classes = "max-height other-tabs"):
     #     gallery_component = gr.Gallery(object_fit='cover')
     # with gr.Tab("Papers (beta)",elem_id = "tab-papers",elem_classes = "max-height other-tabs"):
     #     with gr.Row():
@@ -546,6 +628,21 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
     def finish_chat():
         return (gr.update(interactive = True,value = ""))
     (textbox
         .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
@@ -570,6 +667,11 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
     dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
     demo.queue()

 oa = OpenAlex()
 import gradio as gr
+from gradio_modal import Modal
 import pandas as pd
 import numpy as np
 import os
 from climateqa.constants import POSSIBLE_REPORTS
 from climateqa.utils import get_image_from_azure_blob_storage
 from climateqa.engine.keywords import make_keywords_chain
+from climateqa.engine.chains.answer_rag import make_rag_papers_chain
+from climateqa.engine.graph import make_graph_agent,display_graph
 from climateqa.engine.graph import make_graph_agent,display_graph
+from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox,make_html_df
 # Load environment variables in local mode
 try:
 # Create vectorstore and retriever
 vectorstore = get_pinecone_vectorstore(embeddings_function)
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
+reranker = get_reranker("nano")
 agent = make_graph_agent(llm,vectorstore,reranker)
 papers_cols_widths = {
     "id":100,
     "title":300,
     "doi":100,
     "publication_year":100,
     "abstract":500,
     "is_oa":50,
 }
 papers_cols_widths = list(papers_cols_widths.values())
+async def find_papers(query,after):
+    summary = ""
+    keywords = generate_keywords(query)
+    df_works = oa.search(keywords,after = after)
+    df_works = df_works.dropna(subset=["abstract"])
+    df_works = oa.rerank(query,df_works,reranker)
+    df_works = df_works.sort_values("rerank_score",ascending=False)
+    docs_html = []
+    for i in range(10):
+        docs_html.append(make_html_df(df_works, i))
+    docs_html = "".join(docs_html)
+    print(docs_html)
+    G = oa.make_network(df_works)
+    height = "750px"
+    network = oa.show_network(G,color_by = "rerank_score",notebook=False,height = height)
+    network_html = network.generate_html()
+    network_html = network_html.replace("'", "\"")
+    css_to_inject = "<style>#mynetwork { border: none !important; } .card { border: none !important; }</style>"
+    network_html = network_html + css_to_inject
+    network_html = f"""<iframe style="width: 100%; height: {height};margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
+    display-capture; encrypted-media;" sandbox="allow-modals allow-forms
+    allow-scripts allow-same-origin allow-popups
+    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
+    allowpaymentrequest="" frameborder="0" srcdoc='{network_html}'></iframe>"""
+    docs = df_works["content"].head(10).tolist()
+    df_works = df_works.reset_index(drop = True).reset_index().rename(columns = {"index":"doc"})
+    df_works["doc"] = df_works["doc"] + 1
+    df_works = df_works[papers_cols]
+    yield docs_html, network_html, summary
+    chain = make_rag_papers_chain(llm)
+    result = chain.astream_log({"question": query,"docs": docs,"language":"English"})
+    path_answer = "/logs/StrOutputParser/streamed_output/-"
+    async for op in result:
+        op = op.ops[0]
+        if op['path'] == path_answer: # reforulated question
+            new_token = op['value'] # str
+            summary += new_token
+        else:
+            continue
+        yield docs_html, network_html, summary
 # --------------------------------------------------------------------
 # Gradio
 # --------------------------------------------------------------------
                             samples.append(group_examples)
+                    with gr.Tab("Sources",elem_id = "tab-sources",id = 1):
                         sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
                         docs_textbox = gr.State("")
                     # with Modal(visible = False) as config_modal:
+                    with gr.Tab("Papers",elem_id = "tab-citations",id = 4):
+                        btn_summary = gr.Button("Summary")
+                         # Fenêtre simulée pour le Summary
+                        with gr.Group(visible=False, elem_id="papers-summary-popup") as summary_popup:
+                            papers_summary = gr.Markdown("### Summary Content", visible=True, elem_id="papers-summary")
+                        btn_relevant_papers = gr.Button("Relevant papers")
+                        # Fenêtre simulée pour les Relevant Papers
+                        with gr.Group(visible=False, elem_id="papers-relevant-popup") as relevant_popup:
+                            papers_html = gr.HTML(show_label=False, elem_id="sources-textbox")
+                            docs_textbox = gr.State("")
+                        btn_citations_network = gr.Button("Citations network")
+                        # Fenêtre simulée pour le Citations Network
+                        with Modal(visible=False) as modal:
+                            citations_network = gr.HTML("<h3>Citations Network Graph</h3>", visible=True, elem_id="papers-citations-network")
+                        btn_citations_network.click(lambda: Modal(visible=True), None, modal)
                     with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
                         with Modal(visible=False, elem_id="modal_figure_galery") as modal:
     # with gr.Tab("Figures",elem_id = "tab-images",elem_classes = "max-height other-tabs"):
     #     gallery_component = gr.Gallery(object_fit='cover')
+    with gr.Tab("Settings",elem_id = "tab-config",id = 2):
+        gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
+        dropdown_sources = gr.CheckboxGroup(
+            ["IPCC", "IPBES","IPOS", "OpenAlex"],
+            label="Select source",
+            value=["IPCC"],
+            interactive=True,
+        )
+        dropdown_reports = gr.Dropdown(
+            POSSIBLE_REPORTS,
+            label="Or select specific reports",
+            multiselect=True,
+            value=None,
+            interactive=True,
+        )
+        dropdown_audience = gr.Dropdown(
+            ["Children","General public","Experts"],
+            label="Select audience",
+            value="Experts",
+            interactive=True,
+        )
+        after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers")
+        output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
+        output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
     # with gr.Tab("Papers (beta)",elem_id = "tab-papers",elem_classes = "max-height other-tabs"):
     #     with gr.Row():
     def finish_chat():
         return (gr.update(interactive = True,value = ""))
+    # Initialize visibility states
+    summary_visible = False
+    relevant_visible = False
+    # Functions to toggle visibility
+    def toggle_summary_visibility():
+        global summary_visible
+        summary_visible = not summary_visible
+        return gr.update(visible=summary_visible)
+    def toggle_relevant_visibility():
+        global relevant_visible
+        relevant_visible = not relevant_visible
+        return gr.update(visible=relevant_visible)
     (textbox
         .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
     dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
+    textbox.submit(find_papers,[textbox,after], [papers_html,citations_network,papers_summary])
+    examples_hidden.change(find_papers,[examples_hidden,after], [papers_html,citations_network,papers_summary])
+    btn_summary.click(toggle_summary_visibility, outputs=summary_popup)
+    btn_relevant_papers.click(toggle_relevant_visibility, outputs=relevant_popup)
     demo.queue()

climateqa/engine/chains/answer_rag.py CHANGED Viewed

@@ -7,6 +7,8 @@ from langchain_core.prompts.base import format_document
 from climateqa.engine.chains.prompts import answer_prompt_template,answer_prompt_without_docs_template,answer_prompt_images_template
 from climateqa.engine.chains.prompts import papers_prompt_template
 DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
@@ -68,32 +70,32 @@ def make_rag_node(llm,with_docs = True):
-# def make_rag_papers_chain(llm):
-#     prompt = ChatPromptTemplate.from_template(papers_prompt_template)
-#     input_documents = {
-#         "context":lambda x : _combine_documents(x["docs"]),
-#         **pass_values(["question","language"])
-#     }
-#     chain = input_documents | prompt | llm | StrOutputParser()
-#     chain = rename_chain(chain,"answer")
-#     return chain
-# def make_illustration_chain(llm):
-#     prompt_with_images = ChatPromptTemplate.from_template(answer_prompt_images_template)
-#     input_description_images = {
-#         "images":lambda x : _combine_documents(get_image_docs(x["docs"])),
-#         **pass_values(["question","audience","language","answer"]),
-#     }
-#     illustration_chain = input_description_images | prompt_with_images | llm | StrOutputParser()
-#     return illustration_chain

 from climateqa.engine.chains.prompts import answer_prompt_template,answer_prompt_without_docs_template,answer_prompt_images_template
 from climateqa.engine.chains.prompts import papers_prompt_template
+from ..utils import rename_chain, pass_values
 DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
+def make_rag_papers_chain(llm):
+    prompt = ChatPromptTemplate.from_template(papers_prompt_template)
+    input_documents = {
+        "context":lambda x : _combine_documents(x["docs"]),
+        **pass_values(["question","language"])
+    }
+    chain = input_documents | prompt | llm | StrOutputParser()
+    chain = rename_chain(chain,"answer")
+    return chain
+def make_illustration_chain(llm):
+    prompt_with_images = ChatPromptTemplate.from_template(answer_prompt_images_template)
+    input_description_images = {
+        "images":lambda x : _combine_documents(get_image_docs(x["docs"])),
+        **pass_values(["question","audience","language","answer"]),
+    }
+    illustration_chain = input_description_images | prompt_with_images | llm | StrOutputParser()
+    return illustration_chain

climateqa/knowledge/openalex.py CHANGED Viewed

@@ -62,11 +62,10 @@ class OpenAlex():
         scores = reranker.rank(
             query,
-            df["content"].tolist(),
-            top_k = len(df),
         )
-        scores.sort(key = lambda x : x["corpus_id"])
-        scores = [x["score"] for x in scores]
         df["rerank_score"] = scores
         return df

         scores = reranker.rank(
             query,
+            df["content"].tolist()
         )
+        scores = sorted(scores.results, key = lambda x : x.document.doc_id)
+        scores = [x.score for x in scores]
         df["rerank_score"] = scores
         return df

front/utils.py CHANGED Viewed

@@ -108,6 +108,28 @@ def make_html_source(source,i):
     return card
 def make_html_figure_sources(source,i,img_str):
     meta = source.metadata
     content = source.page_content.strip()

     return card
+def make_html_df(df,i):
+    title = df['title'][i]
+    content = df['abstract'][i]
+    url = df['doi'][i]
+    publication_date = df['publication_year'][i]
+    card = f"""
+    <div class="card" id="doc{i}">
+        <div class="card-content">
+            <h2>Doc {i+1} - {title}</h2>
+            <p>{content}</p>
+        </div>
+        <div class="card-footer">
+            <span>{publication_date}</span>
+            <a href="{url}" target="_blank" class="pdf-link">
+        </div>
+    </div>
+        """
+    return card
 def make_html_figure_sources(source,i,img_str):
     meta = source.metadata
     content = source.page_content.strip()