File size: 1,323 Bytes
cd6cddb
 
 
 
 
69992ee
cd6cddb
69992ee
24412da
 
 
 
 
 
 
 
 
 
cd6cddb
69992ee
 
 
 
cd6cddb
 
24412da
cd6cddb
 
 
 
69992ee
24412da
cd6cddb
 
 
69992ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from typing import List


class Retrieval:
    def __init__(self, model_name, max_model_tokens=384):
        """
        Initialize Retrieval class with HuggingFace embeddings and FAISS vector store.

        Parameters:
        model_name (str): The name of the HuggingFace model to use for embeddings.
        max_model_tokens (int, optional): The maximum number of tokens to use for encoding. Defaults to 384.

        Returns:
        None
        """
        self.model_name = model_name
        self.embeddings = HuggingFaceEmbeddings(
            model_name=model_name,
            encode_kwargs={"max_length": max_model_tokens, "truncation": True},
        )

    def create_vector_store(self, chunks: List[Document]):
        """Creates a new vector store for similarity search"""
        self.chunks = chunks
        # Create FAISS vector store
        self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings)

    def search(self, query, k=10) -> List[Document]:
        """Search top matching documents"""
        # Retrieve top 10 similar chunks
        similar_docs = self.vectorstore.similarity_search(query, k)

        return similar_docs