import os os.environ['TOKENIZERS_PARALLELISM'] = 'true' from dotenv import load_dotenv load_dotenv() # load .env api keys mistral_api_key = os.getenv("MISTRAL_API_KEY") print("mistral_api_key", mistral_api_key) import pandas as pd from langchain.output_parsers import PandasDataFrameOutputParser from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_mistralai import MistralAIEmbeddings from langchain import hub from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from typing import Literal from langchain_core.prompts import PromptTemplate from langchain_mistralai import ChatMistralAI from pathlib import Path from langchain.retrievers import ( MergerRetriever, ) import pprint from typing import Any, Dict from huggingface_hub import login login(token=os.getenv("HUGGING_FACE_TOKEN")) def load_chunk_persist_pdf(task) -> Chroma: pdf_folder_path = os.path.join(os.getcwd(),Path(f"data/pdf/{task}")) documents = [] for file in os.listdir(pdf_folder_path): if file.endswith('.pdf'): pdf_path = os.path.join(pdf_folder_path, file) loader = PyPDFLoader(pdf_path) documents.extend(loader.load()) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10) chunked_documents = text_splitter.split_documents(documents) os.makedirs("data/chroma_store/", exist_ok=True) vectorstore = Chroma.from_documents( documents=chunked_documents, embedding=MistralAIEmbeddings(), persist_directory= os.path.join(os.getcwd(),Path("data/chroma_store/")) ) vectorstore.persist() return vectorstore df = pd.DataFrame( { "exercise": ["Squat","Bench Press","Lunges","Pull ups"], "sets": [4, 4, 3, 3], "repetitions": [10, 8, 8, 8], "rest":["2:30","2:00","1:30","2:00"] } ) # parser = PandasDataFrameOutputParser(dataframe=df) # personal_info_vectorstore = load_chunk_persist_pdf("personal_info") # zero2hero_vectorstore = load_chunk_persist_pdf("zero2hero") # bodyweight_vectorstore = load_chunk_persist_pdf("bodyweight") # nutrition_vectorstore = load_chunk_persist_pdf("nutrition") # workout_vectorstore = load_chunk_persist_pdf("workout") # zero2hero_retriever = zero2hero_vectorstore.as_retriever() # nutrition_retriever = nutrition_vectorstore.as_retriever() # bodyweight_retriever = bodyweight_vectorstore.as_retriever() # workout_retriever = workout_vectorstore.as_retriever() # personal_info_retriever = personal_info_vectorstore.as_retriever() llm = ChatMistralAI(model="mistral-large-latest", mistral_api_key=mistral_api_key, temperature=0) # prompt = PromptTemplate( # template=""" # You are a professional AI coach specialized in building fitness plans, full workout programs. # You must adapt to the user according to personal informations in the context. A You are gentle and motivative. # Use the following pieces of retrieved context to answer the user's query. # Context: {context} # \n{format_instructions}\n{question}\n # """, # input_variables=["question","context"], # partial_variables={"format_instructions": parser.get_format_instructions()}, # ) # def format_docs(docs): # return "\n\n".join(doc.page_content for doc in docs) # def format_parser_output(parser_output: Dict[str, Any]) -> None: # for key in parser_output.keys(): # parser_output[key] = parser_output[key].to_dict() # return pprint.PrettyPrinter(width=4, compact=True).pprint(parser_output) # retriever = MergerRetriever(retrievers=[zero2hero_retriever, bodyweight_retriever, nutrition_retriever, workout_retriever, personal_info_retriever]) # chain = ( # {"context": zero2hero_retriever | format_docs, "question": RunnablePassthrough()} # | prompt # | llm # | parser # ) # # chain = prompt | llm | parser # format_parser_output(chain.invoke("Build me a full body workout plan for summer body.")) from pydantic import BaseModel, Field from typing import List from langchain_core.output_parsers import JsonOutputParser class Exercise(BaseModel): exercice: str = Field(description="Name of the exercise") nombre_series: int = Field(description="Number of sets for the exercise") nombre_repetitions: int = Field(description="Number of repetitions for the exercise") temps_repos: str = Field(description="Rest time between sets") class MusculationProgram(BaseModel): exercises: List[Exercise] from langchain.prompts import PromptTemplate # Define your query to get a musculation program. musculation_query = "Provide a musculation program with exercises, number of sets, number of repetitions, and rest time between sets." # Set up a parser + inject instructions into the prompt template. parser = JsonOutputParser(pydantic_object=MusculationProgram) prompt = PromptTemplate( template="Answer the user query.\n{format_instructions}\n{query}\n", input_variables=["query"], partial_variables={"format_instructions": parser.get_format_instructions()}, ) # Set up a chain to invoke the language model with the prompt and parser. workout_chain = prompt | llm | parser