Spaces:
Sleeping
Sleeping
import os | |
os.environ['TOKENIZERS_PARALLELISM'] = 'true' | |
from dotenv import load_dotenv | |
load_dotenv() # load .env api keys | |
mistral_api_key = os.getenv("MISTRAL_API_KEY") | |
print("mistral_api_key", mistral_api_key) | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain_mistralai import MistralAIEmbeddings | |
from langchain import hub | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
from typing import Literal | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_mistralai import ChatMistralAI | |
from pathlib import Path | |
from langchain.retrievers import ( | |
MergerRetriever, | |
) | |
from huggingface_hub import login | |
login(token=os.getenv("HUGGING_FACE_TOKEN")) | |
def load_chunk_persist_pdf(task) -> Chroma: | |
pdf_folder_path = os.path.join(os.getcwd(),Path(f"data/pdf/{task}")) | |
documents = [] | |
for file in os.listdir(pdf_folder_path): | |
if file.endswith('.pdf'): | |
pdf_path = os.path.join(pdf_folder_path, file) | |
loader = PyPDFLoader(pdf_path) | |
documents.extend(loader.load()) | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
chunked_documents = text_splitter.split_documents(documents) | |
os.makedirs("data/chroma_store/", exist_ok=True) | |
vectorstore = Chroma.from_documents( | |
documents=chunked_documents, | |
embedding=MistralAIEmbeddings(), | |
persist_directory= os.path.join(os.getcwd(),Path("data/chroma_store/")) | |
) | |
vectorstore.persist() | |
return vectorstore | |
personal_info_vectorstore = load_chunk_persist_pdf("personal_info") | |
zero2hero_vectorstore = load_chunk_persist_pdf("zero2hero") | |
bodyweight_vectorstore = load_chunk_persist_pdf("bodyweight") | |
nutrition_vectorstore = load_chunk_persist_pdf("nutrition") | |
workout_vectorstore = load_chunk_persist_pdf("workout") | |
zero2hero_retriever = zero2hero_vectorstore.as_retriever() | |
nutrition_retriever = nutrition_vectorstore.as_retriever() | |
bodyweight_retriever = bodyweight_vectorstore.as_retriever() | |
workout_retriever = workout_vectorstore.as_retriever() | |
personal_info_retriever = personal_info_vectorstore.as_retriever() | |
llm = ChatMistralAI(model="mistral-large-latest", mistral_api_key=mistral_api_key, temperature=0) | |
prompt = ChatPromptTemplate.from_template( | |
""" | |
You are a professional AI coach specialized in fitness, bodybuilding and nutrition. | |
You must adapt to the user according to personal informations in the context. A You are gentle and motivative. | |
Use the following pieces of retrieved context to answer the question. | |
If you don't know the answer, use your common knowledge. | |
Use three sentences maximum and keep the answer concise. | |
If the user asks you a full program workout, structure your response in this way (this is an example): | |
- First workout : Lower body (1 hour) | |
1. Barbelle squat / 4 sets of 8 reps / 2'30 recovery | |
2. Lunges / 4 sets of 10 reps / 2'recovery | |
3. etc | |
- Second workout .... and so on. | |
Question: {question} | |
Context: {context} | |
Answer: | |
""", | |
) | |
def format_docs(docs): | |
return "\n\n".join(doc.page_content for doc in docs) | |
retriever = MergerRetriever(retrievers=[zero2hero_retriever, bodyweight_retriever, nutrition_retriever, workout_retriever, personal_info_retriever]) | |
rag_chain = ( | |
{"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| prompt | |
| llm | |
| StrOutputParser() | |
) | |
# print(rag_chain.invoke("WHi I'm Susan. Can you make a fitness program for me please?")) | |
# print(rag_chain.invoke("I am a 45 years old woman and I have to loose weight for the summer. Provide me with a fitness program, and a nutrition program")) |