File size: 7,251 Bytes
26ed9d3
 
 
f6a6168
 
26ed9d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f744cd6
 
 
 
 
 
 
26ed9d3
f6a6168
26ed9d3
f6a6168
26ed9d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f744cd6
26ed9d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6a6168
26ed9d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6a6168
 
26ed9d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f744cd6
26ed9d3
f744cd6
 
 
 
 
 
 
 
 
 
 
 
079ec1d
26ed9d3
f744cd6
26ed9d3
 
 
 
 
 
 
f744cd6
 
 
 
 
 
 
 
26ed9d3
 
f6a6168
26ed9d3
 
d75bb08
26ed9d3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
from langchain.memory import ConversationBufferMemory

import pandas as pd
import gradio as gr

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

import os


import pandas as pd
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
import os

from utils import make_html_source, make_pairs, get_llm, reset_textbox

from prompt import PROMPT_INTERPRATE_INTENTION, ANSWER_PROMPT


try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass


# Load your OpenAI API key
import os
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
assert OPENAI_API_KEY, "Please set your OpenAI API key"

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)




new_vector_store = FAISS.load_local(
    "faiss_index", embeddings, allow_dangerous_deserialization=True
)

retriever = new_vector_store.as_retriever()

QUESTIONS = [
    "Give me 10 bullet points to summarize the key decisions of the 94th meeting.",
    "Summarize the policy decisions of the 94th meeting.",
    "Give me an example of a decision that applied a penalty to a country?",
    "Show me the decisions related to end users",
    "Give me a policy on MDI (Metered Dosed Inhalers)",
]


llm = get_llm()

memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)

def make_qa_chain(

) :
    final_inputs = {
        "context": lambda x: x["context"],
        "question": lambda x: x["question"],
    }
        
        
        
    return final_inputs | ANSWER_PROMPT | llm


def load_documents_meeting(meeting_number):
    # Step 1: Load the CSV data
    csv_file_path = "data/mfls.xlsx"  
    df = pd.read_excel(csv_file_path)
    
    df["meeting_number"]= df["Meeting"].apply(lambda x: x.split(" ")[0][:-2])
    df_meeting = df[df["meeting_number"] == meeting_number]
    def combine_title_and_content(row):
        return f"{row['Meeting']} {row['Issues']} {row['Content']}"

    df_meeting['combined'] = df_meeting.apply(combine_title_and_content, axis=1)

    # Step 3: Generate embeddings using OpenAI
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

    # Generate embeddings for each document
    documents = [
        Document(
            page_content=row['combined'], 
            metadata={
                "Issues": row['Issues'], 
                "Title": row['Title'],
                "meeting_number": row["Meeting"].split(" ")[0][:-2],
                "Agencies": row["Agencies"],
                "project": row["Projects"],
            }
        ) for i,row in df_meeting.iterrows()]
    return documents


async def chat(
    query: str,
    history: list = [],
):
    """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
    (messages in gradio format, messages in langchain format, source documents)"""
    source_string = ""
    gradio_format = make_pairs([a.content for a in history]) + [(query, "")]
    qa_chain = make_qa_chain()
    
    # reset memory
    memory.clear()
    for message in history:
        memory.chat_memory.add_message(message)
    
    inputs = {"question": query}
    
    ## INTENT
    intent = await llm.abatch([PROMPT_INTERPRATE_INTENTION.format_prompt(query = query)])
    intent = intent[0].content
    print("intent", intent)
    
    ## RETRIEVER
    if intent.split(" ")[0] == "meeting":
        meeting_number = intent.split(" ")[-1]
        sources = load_documents_meeting(meeting_number)
    else :
        sources = new_vector_store.search(query, search_type="similarity", k=5)
    
    source_string = "\n\n".join([make_html_source(doc, i) for i, doc in enumerate(sources, 1)])

    ## RAG
    inputs_rag = {"question": query, "context": sources}
    
    result = qa_chain.astream_log(inputs_rag)

    reformulated_question_path_id = "/logs/ChatOpenAI/streamed_output_str/-"
    retriever_path_id =             "/logs/VectorStoreRetriever/final_output"
    final_answer_path_id =          "/streamed_output/-"
    
    async for op in result:
        op = op.ops[0]
        # print(op["path"])
        if op['path'] == reformulated_question_path_id: # reforulated question
            new_token = op['value'] # str
            
        elif op['path'] == retriever_path_id: # documents
            sources = op['value']['documents'] # List[Document]
            source_string = "\n\n".join([make_html_source(i, doc) for i, doc in enumerate(sources, 1)])

        elif op['path'] == final_answer_path_id: # final answer
            new_token = op['value'].content # str
            answer_yet = gradio_format[-1][1]
            gradio_format[-1] = (query, answer_yet + new_token  )          

        yield gradio_format, history, source_string

    memory.save_context(inputs, {"answer": gradio_format[-1][1]})
    yield gradio_format, memory.load_memory_variables({})["history"], source_string
    
    
### GRADIO UI
    
theme = gr.themes.Soft(
    primary_hue="sky",
    font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
)

demo_name = "UNEP Q&A"

with gr.Blocks(title=f"{demo_name}", theme=theme, css_paths=os.getcwd()+ "/style.css") as demo:
 
    gr.Markdown(f"<h1><center>{demo_name}</center></h1>")

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(
                value = [("","Hello ! How can I help you today ?")],
                elem_id="chatbot", 
                label=f"{demo_name} chatbot", 
                show_label=False
            )
            state = gr.State([])
 
            with gr.Row():
                ask = gr.Textbox(
                    show_label=False,
                    placeholder="Input your question then press enter",
                )
        
        with gr.Column(scale=1, variant="panel"):
            with gr.Tabs() as tabs:
                with gr.TabItem("Examples", id= 0):   
                    example_hidden = gr.Textbox(visible = False)
                    examples_questions = gr.Examples(
                                            QUESTIONS,
                                            [example_hidden],
                                            run_on_click=False,
                                            elem_id=f"examples",
                                            api_name=f"examples",
                                        )
                with gr.TabItem("Sources", id= 1):    
                    gr.Markdown("### Sources")
                    sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
 
        ask.submit(lambda : gr.update(selected=1), outputs=  [tabs]).then(
            fn=chat,
            inputs=[
                ask,
                state,
            ],
            outputs=[chatbot, state, sources_textbox],
        )
        example_hidden.change(lambda : gr.update(selected=1), outputs= [tabs]).then(
            fn=chat,
            inputs=[
                example_hidden,
                state,
            ],
            outputs=[chatbot, state, sources_textbox],
        )

    ask.submit(reset_textbox, [], [ask])

demo.queue()
demo.launch(
    ssr_mode=False
)