File size: 3,887 Bytes
3e299e4
 
025e412
 
3e299e4
 
85b06be
025e412
7f184fa
3e299e4
8ee218a
3e299e4
7f184fa
8ee218a
 
3e299e4
8ee218a
3e299e4
9a30a8c
8ee218a
 
 
85b06be
 
 
ed250fe
9a30a8c
8ee218a
7f184fa
 
 
 
 
 
 
 
025e412
7f184fa
 
ed250fe
9a30a8c
7f184fa
 
 
 
ed250fe
8ee218a
 
 
 
 
 
 
 
ed250fe
3e299e4
7f184fa
 
9a30a8c
 
 
ed250fe
9a30a8c
8ee218a
9a30a8c
c104abf
 
 
 
 
 
9a30a8c
 
 
 
 
 
 
7f184fa
 
 
 
 
ed250fe
9a30a8c
7f184fa
 
 
 
 
 
 
9a30a8c
3e299e4
c104abf
8ee218a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
from dotenv import load_dotenv
load_dotenv() # load .env api keys 

mistral_api_key = os.getenv("MISTRAL_API_KEY")
print("mistral_api_key", mistral_api_key)

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_mistralai import MistralAIEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_mistralai import ChatMistralAI
from pathlib import Path
from langchain.retrievers import (
    MergerRetriever,
)
from huggingface_hub import login
login(token=os.getenv("HUGGING_FACE_TOKEN"))

def load_chunk_persist_pdf(task) -> Chroma:
    
    pdf_folder_path = os.path.join(os.getcwd(),Path(f"data/pdf/{task}"))
    documents = []
    for file in os.listdir(pdf_folder_path):
        if file.endswith('.pdf'):
            pdf_path = os.path.join(pdf_folder_path, file)
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load())
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
    chunked_documents = text_splitter.split_documents(documents)
    os.makedirs("data/chroma_store/", exist_ok=True)
    vectorstore = Chroma.from_documents(
        documents=chunked_documents,
        embedding=MistralAIEmbeddings(),
        persist_directory= os.path.join(os.getcwd(),Path("data/chroma_store/"))
    )
    vectorstore.persist()
    return vectorstore

personal_info_vectorstore = load_chunk_persist_pdf("personal_info")
zero2hero_vectorstore = load_chunk_persist_pdf("zero2hero")
bodyweight_vectorstore = load_chunk_persist_pdf("bodyweight")
nutrition_vectorstore = load_chunk_persist_pdf("nutrition")
workout_vectorstore = load_chunk_persist_pdf("workout")
zero2hero_retriever = zero2hero_vectorstore.as_retriever()
nutrition_retriever = nutrition_vectorstore.as_retriever()
bodyweight_retriever = bodyweight_vectorstore.as_retriever()
workout_retriever = workout_vectorstore.as_retriever()
personal_info_retriever = personal_info_vectorstore.as_retriever()

llm = ChatMistralAI(model="mistral-large-latest", mistral_api_key=mistral_api_key, temperature=0)

prompt = ChatPromptTemplate.from_template(
    """
    You are a professional AI coach specialized in fitness, bodybuilding and nutrition.
    You must adapt to the user according to personal informations in the context. A You are gentle and motivative.
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, use your common knowledge.
    Use three sentences maximum and keep the answer concise.
    If the user asks you a full program workout, structure your response in this way (this is an example):
    - First workout : Lower body (1 hour)
    1. Barbelle squat / 4 sets of 8 reps / 2'30 recovery
    2. Lunges / 4 sets of 10 reps / 2'recovery
    3. etc
    - Second workout  .... and so on.

    Question: {question} 

    Context: {context} 

    Answer:
    """, 
)

def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)
    
retriever = MergerRetriever(retrievers=[zero2hero_retriever, bodyweight_retriever, nutrition_retriever, workout_retriever, personal_info_retriever])

rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )



# print(rag_chain.invoke("WHi I'm Susan. Can you make a fitness program for me please?"))

# print(rag_chain.invoke("I am a 45 years old woman and I have to loose weight for the summer. Provide me with a fitness program, and a nutrition program"))