import os os.environ['TOKENIZERS_PARALLELISM'] = 'true' from dotenv import load_dotenv load_dotenv() # load .env api keys mistral_api_key = os.getenv("MISTRAL_API_KEY") print("mistral_api_key", mistral_api_key) from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_mistralai import MistralAIEmbeddings from langchain import hub from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from typing import Literal from langchain_core.prompts import ChatPromptTemplate from langchain_mistralai import ChatMistralAI from pathlib import Path from langchain.retrievers import ( MergerRetriever, ) from huggingface_hub import login login(token=os.getenv("HUGGING_FACE_TOKEN")) def load_chunk_persist_pdf(task) -> Chroma: pdf_folder_path = os.path.join(os.getcwd(),Path(f"data/pdf/{task}")) documents = [] for file in os.listdir(pdf_folder_path): if file.endswith('.pdf'): pdf_path = os.path.join(pdf_folder_path, file) loader = PyPDFLoader(pdf_path) documents.extend(loader.load()) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10) chunked_documents = text_splitter.split_documents(documents) os.makedirs("data/chroma_store/", exist_ok=True) vectorstore = Chroma.from_documents( documents=chunked_documents, embedding=MistralAIEmbeddings(), persist_directory= os.path.join(os.getcwd(),Path("data/chroma_store/")) ) vectorstore.persist() return vectorstore personal_info_vectorstore = load_chunk_persist_pdf("personal_info") zero2hero_vectorstore = load_chunk_persist_pdf("zero2hero") bodyweight_vectorstore = load_chunk_persist_pdf("bodyweight") nutrition_vectorstore = load_chunk_persist_pdf("nutrition") workout_vectorstore = load_chunk_persist_pdf("workout") zero2hero_retriever = zero2hero_vectorstore.as_retriever() nutrition_retriever = nutrition_vectorstore.as_retriever() bodyweight_retriever = bodyweight_vectorstore.as_retriever() workout_retriever = workout_vectorstore.as_retriever() personal_info_retriever = personal_info_vectorstore.as_retriever() llm = ChatMistralAI(model="mistral-large-latest", mistral_api_key=mistral_api_key, temperature=0) prompt = ChatPromptTemplate.from_template( """ You are a professional AI coach specialized in fitness, bodybuilding and nutrition. You must adapt to the user according to personal informations in the context. A You are gentle and motivative. Use the following pieces of retrieved context to answer the question. If you don't know the answer, use your common knowledge. Use three sentences maximum and keep the answer concise. If the user asks you a full program workout, structure your response in this way (this is an example): - First workout : Lower body (1 hour) 1. Barbelle squat / 4 sets of 8 reps / 2'30 recovery 2. Lunges / 4 sets of 10 reps / 2'recovery 3. etc - Second workout .... and so on. Question: {question} Context: {context} Answer: """, ) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) retriever = MergerRetriever(retrievers=[zero2hero_retriever, bodyweight_retriever, nutrition_retriever, workout_retriever, personal_info_retriever]) rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) # print(rag_chain.invoke("WHi I'm Susan. Can you make a fitness program for me please?")) # print(rag_chain.invoke("I am a 45 years old woman and I have to loose weight for the summer. Provide me with a fitness program, and a nutrition program"))