Vidal / app.py
chemouda's picture
Update app.py
073b436 verified
import json
import os
import warnings
from typing import List
from operator import itemgetter
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
langfuse_handler = CallbackHandler(
public_key="pk-lf-92e7e2eb-c5e0-4bbe-9c8d-1ebcb0f4f4cf",
secret_key="sk-lf-5229e3b7-8f01-4d6e-a858-72c3ecb57dd3",
host="https://cloud.langfuse.com"
)
import gradio as gr
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser
from pydantic import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableParallel
from langchain_community.vectorstores import FAISS
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers import EnsembleRetriever
from langchain.retrievers.merger_retriever import MergerRetriever
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever
#from langchain_cohere import CohereEmbeddings, CohereRerank
#from langchain_groq import ChatGroq
load_dotenv()
#os.getenv("COHERE_API_KEY")
#os.getenv("GROQ_API_KEY")
os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
#embeddings_cohere = CohereEmbeddings(model="embed-multilingual-v3.0")
def load_vectorstore(index_name, embeddings, k=20):
return FAISS.load_local(index_name, embeddings, allow_dangerous_deserialization=True).as_retriever(
search_kwargs={ "k": k}
)
retriever_names = ['large']
retrievers = {}
retrievers_docs = {}
for name in retriever_names:
retrievers[name] = load_vectorstore(f"{name}", embeddings)
retrievers_docs[name] = (lambda x: x["input"]) | retrievers[name]
def format_docs_with_id(docs: List[Document]) -> str:
"""
"""
formatted = [
(
f"Metadata: {doc.metadata}\n"
f"Content: {doc.page_content}\n"
)
for doc in docs
]
return "\n\n" + "\n\n".join(formatted)
def prompt_fn():
return (
"You are an expert pharmachemist, answer the question based on the context. Do not answer anything not related to pharmachemistry."
"\n\nHere is the context: "
"{context}"
)
llm = ChatOpenAI(temperature=0, model="gpt-4o")
retrieve_docs = {name: (lambda x: x["input"]) | retrievers[name] for name in retriever_names}
def legal(question):
prompt = ChatPromptTemplate.from_messages([
("system", prompt_fn()),
("human", "{input}"),
])
rag_chain_from_docs = (
RunnablePassthrough.assign(context=(lambda x: format_docs_with_id(x["context"])))
| prompt
| llm
)
chains = {
name: RunnablePassthrough.assign(context=retrieve_docs[name]).assign(answer=rag_chain_from_docs)
for name in retriever_names
}
name = "large"
if name not in chains:
raise ValueError(f"Invalid typologie: {name}")
#try:
#result = chains[name].invoke({"input": question})
result = chains[name].invoke({"input": question}, config={"callbacks": [langfuse_handler]})
return result["answer"].content #result["answer"].articles, result["answer"].citations
#except Exception as e:
# return "Je ne sais pa#| "", ""
with gr.Blocks() as demo:
#gr.Markdown("## OnScent Fragrance Intelligent Library Search")
#gr.Markdown("Developed by ScentGenie")
#gr.Markdown("### Client Brief or Description")
with gr.Row():
input1 = gr.Textbox(label="Question", placeholder="effets indesirables du paracetamol")
#gr.Markdown("### Additional Criterias")
#gr.Markdown("Criterias like Application area, RMC, Notes to exclude etc")
#with gr.Row():
# input2 = gr.Textbox(label="Additional Criterias (can be left empy)", placeholder=" for hair products with RMC under 15$ and without vanilla note")
#with gr.Row():
# input3 = gr.Dropdown(["Advanced", "Fast"], label="Mode", value="Advanced")
#gr.Markdown("## Recommended Formulas")
output1 = gr.Text(label="Reponse")
#output2 = gr.Text(label="Documents Sources")
#output3 = gr.Text(label="Documents IDs")
btn = gr.Button("Submit")
btn.click(legal, inputs=[input1], outputs=[output1])
demo.launch(debug=True)