from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.schema import Document from typing import List class Retrieval: def __init__(self, model_name, max_model_tokens=384): """ Initialize Retrieval class with HuggingFace embeddings and FAISS vector store. Parameters: model_name (str): The name of the HuggingFace model to use for embeddings. max_model_tokens (int, optional): The maximum number of tokens to use for encoding. Defaults to 384. Returns: None """ self.model_name = model_name self.embeddings = HuggingFaceEmbeddings( model_name=model_name, encode_kwargs={"max_length": max_model_tokens, "truncation": True}, ) def create_vector_store(self, chunks: List[Document]): """Creates a new vector store for similarity search""" self.chunks = chunks # Create FAISS vector store self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings) def search(self, query, k=10) -> List[Document]: """Search top matching documents""" # Retrieve top 10 similar chunks similar_docs = self.vectorstore.similarity_search(query, k) return similar_docs