import json import os import warnings from typing import List from operator import itemgetter from dotenv import load_dotenv from langfuse.callback import CallbackHandler langfuse_handler = CallbackHandler( public_key="pk-lf-92e7e2eb-c5e0-4bbe-9c8d-1ebcb0f4f4cf", secret_key="sk-lf-5229e3b7-8f01-4d6e-a858-72c3ecb57dd3", host="https://cloud.langfuse.com" ) import gradio as gr from langchain_openai import OpenAIEmbeddings from langchain_openai import ChatOpenAI from langchain.docstore.document import Document from langchain.prompts import PromptTemplate from langchain.prompts import ChatPromptTemplate from langchain.chains import RetrievalQA from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser from pydantic import BaseModel, Field from langchain_core.runnables import RunnablePassthrough from langchain_core.runnables import RunnableParallel from langchain_community.vectorstores import FAISS from langchain_community.document_transformers import EmbeddingsRedundantFilter from langchain.retrievers import EnsembleRetriever from langchain.retrievers.merger_retriever import MergerRetriever from langchain.retrievers.document_compressors import DocumentCompressorPipeline from langchain.retrievers import ContextualCompressionRetriever #from langchain_cohere import CohereEmbeddings, CohereRerank #from langchain_groq import ChatGroq load_dotenv() #os.getenv("COHERE_API_KEY") #os.getenv("GROQ_API_KEY") os.getenv("OPENAI_API_KEY") embeddings = OpenAIEmbeddings(model="text-embedding-3-large") #embeddings_cohere = CohereEmbeddings(model="embed-multilingual-v3.0") def load_vectorstore(index_name, embeddings, k=20): return FAISS.load_local(index_name, embeddings, allow_dangerous_deserialization=True).as_retriever( search_kwargs={ "k": k} ) retriever_names = ['large'] retrievers = {} retrievers_docs = {} for name in retriever_names: retrievers[name] = load_vectorstore(f"{name}", embeddings) retrievers_docs[name] = (lambda x: x["input"]) | retrievers[name] def format_docs_with_id(docs: List[Document]) -> str: """ """ formatted = [ ( f"Metadata: {doc.metadata}\n" f"Content: {doc.page_content}\n" ) for doc in docs ] return "\n\n" + "\n\n".join(formatted) def prompt_fn(): return ( "You are an expert pharmachemist, answer the question based on the context. Do not answer anything not related to pharmachemistry." "\n\nHere is the context: " "{context}" ) llm = ChatOpenAI(temperature=0, model="gpt-4o") retrieve_docs = {name: (lambda x: x["input"]) | retrievers[name] for name in retriever_names} def legal(question): prompt = ChatPromptTemplate.from_messages([ ("system", prompt_fn()), ("human", "{input}"), ]) rag_chain_from_docs = ( RunnablePassthrough.assign(context=(lambda x: format_docs_with_id(x["context"]))) | prompt | llm ) chains = { name: RunnablePassthrough.assign(context=retrieve_docs[name]).assign(answer=rag_chain_from_docs) for name in retriever_names } name = "large" if name not in chains: raise ValueError(f"Invalid typologie: {name}") #try: #result = chains[name].invoke({"input": question}) result = chains[name].invoke({"input": question}, config={"callbacks": [langfuse_handler]}) return result["answer"].content #result["answer"].articles, result["answer"].citations #except Exception as e: # return "Je ne sais pa#| "", "" with gr.Blocks() as demo: #gr.Markdown("## OnScent Fragrance Intelligent Library Search") #gr.Markdown("Developed by ScentGenie") #gr.Markdown("### Client Brief or Description") with gr.Row(): input1 = gr.Textbox(label="Question", placeholder="effets indesirables du paracetamol") #gr.Markdown("### Additional Criterias") #gr.Markdown("Criterias like Application area, RMC, Notes to exclude etc") #with gr.Row(): # input2 = gr.Textbox(label="Additional Criterias (can be left empy)", placeholder=" for hair products with RMC under 15$ and without vanilla note") #with gr.Row(): # input3 = gr.Dropdown(["Advanced", "Fast"], label="Mode", value="Advanced") #gr.Markdown("## Recommended Formulas") output1 = gr.Text(label="Reponse") #output2 = gr.Text(label="Documents Sources") #output3 = gr.Text(label="Documents IDs") btn = gr.Button("Submit") btn.click(legal, inputs=[input1], outputs=[output1]) demo.launch(debug=True)