added retrieval
Browse files- utils/retrieval.py +22 -0
utils/retrieval.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
2 |
+
from langchain_community.vectorstores import FAISS
|
3 |
+
from langchain.schema import Document
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
class Retrieval:
|
7 |
+
def __init__(self, model_name):
|
8 |
+
self.model_name = model_name
|
9 |
+
self.embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
10 |
+
|
11 |
+
|
12 |
+
def create_vector_store(self, chunks: List[Document]):
|
13 |
+
|
14 |
+
self.chunks = chunks
|
15 |
+
# Create FAISS vector store
|
16 |
+
self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings)
|
17 |
+
|
18 |
+
def search(self,query, k=10) -> List[Document]:
|
19 |
+
# Retrieve top 10 similar chunks
|
20 |
+
similar_docs = self.vectorstore.similarity_search(query, k)
|
21 |
+
|
22 |
+
return similar_docs
|