|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain.schema import Document |
|
from typing import List |
|
|
|
|
|
class Retrieval: |
|
def __init__(self, model_name, max_model_tokens=384): |
|
""" |
|
Initialize Retrieval class with HuggingFace embeddings and FAISS vector store. |
|
|
|
Parameters: |
|
model_name (str): The name of the HuggingFace model to use for embeddings. |
|
max_model_tokens (int, optional): The maximum number of tokens to use for encoding. Defaults to 384. |
|
|
|
Returns: |
|
None |
|
""" |
|
self.model_name = model_name |
|
self.embeddings = HuggingFaceEmbeddings( |
|
model_name=model_name, |
|
encode_kwargs={"max_length": max_model_tokens, "truncation": True}, |
|
) |
|
|
|
def create_vector_store(self, chunks: List[Document]): |
|
"""Creates a new vector store for similarity search""" |
|
self.chunks = chunks |
|
|
|
self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings) |
|
|
|
def search(self, query, k=10) -> List[Document]: |
|
"""Search top matching documents""" |
|
|
|
similar_docs = self.vectorstore.similarity_search(query, k) |
|
|
|
return similar_docs |
|
|