from langchain_community.document_loaders import PyPDFLoader from langchain.docstore.document import Document import os from rag import Rag pdf_folder_path = 'files' def get_documents_from_path(pdf_folder_path: str = pdf_folder_path) -> list: documents = [] for pdf_file in os.listdir(pdf_folder_path): if pdf_file.endswith('.pdf'): loader = PyPDFLoader(os.path.join(pdf_folder_path, pdf_file)) pdf_documents = loader.load() file_name_without_extension = os.path.splitext(pdf_file)[0] for doc in pdf_documents: documents.append(Document(page_content=doc.page_content, metadata={"source": file_name_without_extension})) return documents if __name__ == "__main__": try: rag_llm = Rag() documents = get_documents_from_path() rag_llm.storeDocumentsInVectorstore(documents) print("Store PDFS Completed") except Exception as e: print(e)