File size: 1,323 Bytes
cd6cddb 69992ee cd6cddb 69992ee 24412da cd6cddb 69992ee cd6cddb 24412da cd6cddb 69992ee 24412da cd6cddb 69992ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from typing import List
class Retrieval:
def __init__(self, model_name, max_model_tokens=384):
"""
Initialize Retrieval class with HuggingFace embeddings and FAISS vector store.
Parameters:
model_name (str): The name of the HuggingFace model to use for embeddings.
max_model_tokens (int, optional): The maximum number of tokens to use for encoding. Defaults to 384.
Returns:
None
"""
self.model_name = model_name
self.embeddings = HuggingFaceEmbeddings(
model_name=model_name,
encode_kwargs={"max_length": max_model_tokens, "truncation": True},
)
def create_vector_store(self, chunks: List[Document]):
"""Creates a new vector store for similarity search"""
self.chunks = chunks
# Create FAISS vector store
self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings)
def search(self, query, k=10) -> List[Document]:
"""Search top matching documents"""
# Retrieve top 10 similar chunks
similar_docs = self.vectorstore.similarity_search(query, k)
return similar_docs
|