Spaces:

agoyal496
/

AskMyPDF

Sleeping

AskMyPDF / utils /retrieval.py

Documentation

24412da 29 days ago

1.32 kB

	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.schema import Document
	from typing import List


	class Retrieval:
	def __init__(self, model_name, max_model_tokens=384):
	"""
	Initialize Retrieval class with HuggingFace embeddings and FAISS vector store.

	Parameters:
	model_name (str): The name of the HuggingFace model to use for embeddings.
	max_model_tokens (int, optional): The maximum number of tokens to use for encoding. Defaults to 384.

	Returns:
	None
	"""
	self.model_name = model_name
	self.embeddings = HuggingFaceEmbeddings(
	model_name=model_name,
	encode_kwargs={"max_length": max_model_tokens, "truncation": True},
	)

	def create_vector_store(self, chunks: List[Document]):
	"""Creates a new vector store for similarity search"""
	self.chunks = chunks
	# Create FAISS vector store
	self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings)

	def search(self, query, k=10) -> List[Document]:
	"""Search top matching documents"""
	# Retrieve top 10 similar chunks
	similar_docs = self.vectorstore.similarity_search(query, k)

	return similar_docs