Spaces:
Running
Running
from langchain.memory import ConversationBufferMemory | |
import pandas as pd | |
import gradio as gr | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
import os | |
import pandas as pd | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.schema import Document | |
import os | |
from utils import make_html_source, make_pairs, get_llm, reset_textbox | |
from prompt import PROMPT_INTERPRATE_INTENTION, ANSWER_PROMPT | |
try: | |
from dotenv import load_dotenv | |
load_dotenv() | |
except Exception: | |
pass | |
# Load your OpenAI API key | |
import os | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
assert OPENAI_API_KEY, "Please set your OpenAI API key" | |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
new_vector_store = FAISS.load_local( | |
"faiss_index", embeddings, allow_dangerous_deserialization=True | |
) | |
retriever = new_vector_store.as_retriever() | |
llm = get_llm() | |
memory = ConversationBufferMemory( | |
return_messages=True, output_key="answer", input_key="question" | |
) | |
def make_qa_chain( | |
) : | |
final_inputs = { | |
"context": lambda x: x["context"], | |
"question": lambda x: x["question"], | |
} | |
return final_inputs | ANSWER_PROMPT | llm | |
def load_documents_meeting(meeting_number): | |
# Step 1: Load the CSV data | |
csv_file_path = "../data/mfls.xlsx" | |
df = pd.read_excel(csv_file_path) | |
df["meeting_number"]= df["Meeting"].apply(lambda x: x.split(" ")[0][:-2]) | |
df_meeting = df[df["meeting_number"] == meeting_number] | |
def combine_title_and_content(row): | |
return f"{row['Meeting']} {row['Issues']} {row['Content']}" | |
df_meeting['combined'] = df_meeting.apply(combine_title_and_content, axis=1) | |
# Step 3: Generate embeddings using OpenAI | |
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
# Generate embeddings for each document | |
documents = [ | |
Document( | |
page_content=row['combined'], | |
metadata={ | |
"Issues": row['Issues'], | |
"Title": row['Title'], | |
"meeting_number": row["Meeting"].split(" ")[0][:-2], | |
"Agencies": row["Agencies"], | |
"project": row["Projects"], | |
} | |
) for i,row in df_meeting.iterrows()] | |
return documents | |
async def chat( | |
query: str, | |
history: list = [], | |
): | |
"""taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of: | |
(messages in gradio format, messages in langchain format, source documents)""" | |
source_string = "" | |
gradio_format = make_pairs([a.content for a in history]) + [(query, "")] | |
qa_chain = make_qa_chain() | |
# reset memory | |
memory.clear() | |
for message in history: | |
memory.chat_memory.add_message(message) | |
inputs = {"question": query} | |
## INTENT | |
intent = await llm.abatch([PROMPT_INTERPRATE_INTENTION.format_prompt(query = query)]) | |
intent = intent[0].content | |
print("intent", intent) | |
## RETRIEVER | |
if intent.split(" ")[0] == "meeting": | |
meeting_number = intent.split(" ")[-1] | |
sources = load_documents_meeting(meeting_number) | |
else : | |
sources = new_vector_store.search(query, search_type="similarity", k=5) | |
source_string = "\n\n".join([make_html_source(doc, i) for i, doc in enumerate(sources, 1)]) | |
## RAG | |
inputs_rag = {"question": query, "context": sources} | |
result = qa_chain.astream_log(inputs_rag) | |
reformulated_question_path_id = "/logs/ChatOpenAI/streamed_output_str/-" | |
retriever_path_id = "/logs/VectorStoreRetriever/final_output" | |
final_answer_path_id = "/streamed_output/-" | |
async for op in result: | |
op = op.ops[0] | |
# print(op["path"]) | |
if op['path'] == reformulated_question_path_id: # reforulated question | |
new_token = op['value'] # str | |
elif op['path'] == retriever_path_id: # documents | |
sources = op['value']['documents'] # List[Document] | |
source_string = "\n\n".join([make_html_source(i, doc) for i, doc in enumerate(sources, 1)]) | |
elif op['path'] == final_answer_path_id: # final answer | |
new_token = op['value'].content # str | |
answer_yet = gradio_format[-1][1] | |
gradio_format[-1] = (query, answer_yet + new_token ) | |
yield gradio_format, history, source_string | |
memory.save_context(inputs, {"answer": gradio_format[-1][1]}) | |
yield gradio_format, memory.load_memory_variables({})["history"], source_string | |
### GRADIO UI | |
theme = gr.themes.Soft( | |
primary_hue="sky", | |
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"], | |
) | |
demo_name = "UNEP Q&A" | |
with gr.Blocks(title=f"{demo_name}", theme=theme, css_paths=os.getcwd()+ "/style.css") as demo: | |
gr.Markdown(f"<h1><center>{demo_name}</center></h1>") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
chatbot = gr.Chatbot( | |
value = [("","Hello ! How can I help you today ?")], | |
elem_id="chatbot", | |
label=f"{demo_name} chatbot", | |
show_label=False | |
) | |
state = gr.State([]) | |
with gr.Row(): | |
ask = gr.Textbox( | |
show_label=False, | |
placeholder="Input your question then press enter", | |
) | |
with gr.Column(scale=1, variant="panel"): | |
gr.Markdown("### Sources") | |
sources_textbox = gr.HTML(show_label=False) | |
ask.submit( | |
fn=chat, | |
inputs=[ | |
ask, | |
state, | |
], | |
outputs=[chatbot, state, sources_textbox], | |
) | |
ask.submit(reset_textbox, [], [ask]) | |
demo.queue() | |
demo.launch( | |
ssr_mode=False | |
) |