Spaces:
Running
Running
File size: 7,251 Bytes
26ed9d3 f6a6168 26ed9d3 f744cd6 26ed9d3 f6a6168 26ed9d3 f6a6168 26ed9d3 f744cd6 26ed9d3 f6a6168 26ed9d3 f6a6168 26ed9d3 f744cd6 26ed9d3 f744cd6 079ec1d 26ed9d3 f744cd6 26ed9d3 f744cd6 26ed9d3 f6a6168 26ed9d3 d75bb08 26ed9d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
from langchain.memory import ConversationBufferMemory
import pandas as pd
import gradio as gr
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import os
import pandas as pd
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
import os
from utils import make_html_source, make_pairs, get_llm, reset_textbox
from prompt import PROMPT_INTERPRATE_INTENTION, ANSWER_PROMPT
try:
from dotenv import load_dotenv
load_dotenv()
except Exception:
pass
# Load your OpenAI API key
import os
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
assert OPENAI_API_KEY, "Please set your OpenAI API key"
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
new_vector_store = FAISS.load_local(
"faiss_index", embeddings, allow_dangerous_deserialization=True
)
retriever = new_vector_store.as_retriever()
QUESTIONS = [
"Give me 10 bullet points to summarize the key decisions of the 94th meeting.",
"Summarize the policy decisions of the 94th meeting.",
"Give me an example of a decision that applied a penalty to a country?",
"Show me the decisions related to end users",
"Give me a policy on MDI (Metered Dosed Inhalers)",
]
llm = get_llm()
memory = ConversationBufferMemory(
return_messages=True, output_key="answer", input_key="question"
)
def make_qa_chain(
) :
final_inputs = {
"context": lambda x: x["context"],
"question": lambda x: x["question"],
}
return final_inputs | ANSWER_PROMPT | llm
def load_documents_meeting(meeting_number):
# Step 1: Load the CSV data
csv_file_path = "data/mfls.xlsx"
df = pd.read_excel(csv_file_path)
df["meeting_number"]= df["Meeting"].apply(lambda x: x.split(" ")[0][:-2])
df_meeting = df[df["meeting_number"] == meeting_number]
def combine_title_and_content(row):
return f"{row['Meeting']} {row['Issues']} {row['Content']}"
df_meeting['combined'] = df_meeting.apply(combine_title_and_content, axis=1)
# Step 3: Generate embeddings using OpenAI
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
# Generate embeddings for each document
documents = [
Document(
page_content=row['combined'],
metadata={
"Issues": row['Issues'],
"Title": row['Title'],
"meeting_number": row["Meeting"].split(" ")[0][:-2],
"Agencies": row["Agencies"],
"project": row["Projects"],
}
) for i,row in df_meeting.iterrows()]
return documents
async def chat(
query: str,
history: list = [],
):
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
(messages in gradio format, messages in langchain format, source documents)"""
source_string = ""
gradio_format = make_pairs([a.content for a in history]) + [(query, "")]
qa_chain = make_qa_chain()
# reset memory
memory.clear()
for message in history:
memory.chat_memory.add_message(message)
inputs = {"question": query}
## INTENT
intent = await llm.abatch([PROMPT_INTERPRATE_INTENTION.format_prompt(query = query)])
intent = intent[0].content
print("intent", intent)
## RETRIEVER
if intent.split(" ")[0] == "meeting":
meeting_number = intent.split(" ")[-1]
sources = load_documents_meeting(meeting_number)
else :
sources = new_vector_store.search(query, search_type="similarity", k=5)
source_string = "\n\n".join([make_html_source(doc, i) for i, doc in enumerate(sources, 1)])
## RAG
inputs_rag = {"question": query, "context": sources}
result = qa_chain.astream_log(inputs_rag)
reformulated_question_path_id = "/logs/ChatOpenAI/streamed_output_str/-"
retriever_path_id = "/logs/VectorStoreRetriever/final_output"
final_answer_path_id = "/streamed_output/-"
async for op in result:
op = op.ops[0]
# print(op["path"])
if op['path'] == reformulated_question_path_id: # reforulated question
new_token = op['value'] # str
elif op['path'] == retriever_path_id: # documents
sources = op['value']['documents'] # List[Document]
source_string = "\n\n".join([make_html_source(i, doc) for i, doc in enumerate(sources, 1)])
elif op['path'] == final_answer_path_id: # final answer
new_token = op['value'].content # str
answer_yet = gradio_format[-1][1]
gradio_format[-1] = (query, answer_yet + new_token )
yield gradio_format, history, source_string
memory.save_context(inputs, {"answer": gradio_format[-1][1]})
yield gradio_format, memory.load_memory_variables({})["history"], source_string
### GRADIO UI
theme = gr.themes.Soft(
primary_hue="sky",
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
)
demo_name = "UNEP Q&A"
with gr.Blocks(title=f"{demo_name}", theme=theme, css_paths=os.getcwd()+ "/style.css") as demo:
gr.Markdown(f"<h1><center>{demo_name}</center></h1>")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(
value = [("","Hello ! How can I help you today ?")],
elem_id="chatbot",
label=f"{demo_name} chatbot",
show_label=False
)
state = gr.State([])
with gr.Row():
ask = gr.Textbox(
show_label=False,
placeholder="Input your question then press enter",
)
with gr.Column(scale=1, variant="panel"):
with gr.Tabs() as tabs:
with gr.TabItem("Examples", id= 0):
example_hidden = gr.Textbox(visible = False)
examples_questions = gr.Examples(
QUESTIONS,
[example_hidden],
run_on_click=False,
elem_id=f"examples",
api_name=f"examples",
)
with gr.TabItem("Sources", id= 1):
gr.Markdown("### Sources")
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
ask.submit(lambda : gr.update(selected=1), outputs= [tabs]).then(
fn=chat,
inputs=[
ask,
state,
],
outputs=[chatbot, state, sources_textbox],
)
example_hidden.change(lambda : gr.update(selected=1), outputs= [tabs]).then(
fn=chat,
inputs=[
example_hidden,
state,
],
outputs=[chatbot, state, sources_textbox],
)
ask.submit(reset_textbox, [], [ask])
demo.queue()
demo.launch(
ssr_mode=False
) |