from langchain_openai import OpenAIEmbeddings from langchain_chroma import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.docstore.document import Document import os from config import PERSIST_DIRECTORY def process_safety_with_chroma(text): """ Processes and stores the given text into ChromaDB. Args: text (str): Text to be embedded and stored. Returns: Chroma: The Chroma vector store object. """ if os.path.exists(PERSIST_DIRECTORY): vector_store = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=OpenAIEmbeddings()) else: text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100) text_chunks = text_splitter.split_text(text) documents = [Document(page_content=chunk, metadata={"source": f"chunk_{i}"}) for i, chunk in enumerate(text_chunks)] embeddings = OpenAIEmbeddings() vector_store = Chroma.from_documents(documents, embeddings, persist_directory=PERSIST_DIRECTORY) return vector_store