File size: 862 Bytes
cd6cddb
 
 
 
 
69992ee
cd6cddb
69992ee
cd6cddb
69992ee
 
 
 
cd6cddb
 
 
 
 
 
 
69992ee
cd6cddb
 
 
69992ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from typing import List


class Retrieval:
    def __init__(self, model_name, max_model_tokens=384):
        self.model_name = model_name
        self.embeddings = HuggingFaceEmbeddings(
            model_name=model_name,
            encode_kwargs={"max_length": max_model_tokens, "truncation": True},
        )

    def create_vector_store(self, chunks: List[Document]):

        self.chunks = chunks
        # Create FAISS vector store
        self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings)

    def search(self, query, k=10) -> List[Document]:
        # Retrieve top 10 similar chunks
        similar_docs = self.vectorstore.similarity_search(query, k)

        return similar_docs