timeki's picture
add dora graph recommandation
6b43c86
raw
history blame
1.67 kB
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents.base import Document
from langchain_core.vectorstores import VectorStore
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
from typing import List
class GraphRetriever(BaseRetriever):
vectorstore:VectorStore
sources:list = ["OWID"] # plus tard ajouter OurWorldInData # faudra integrate avec l'autre retriever
threshold:float = 0.5
k_total:int = 10
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
# Check if all elements in the list are IEA or OWID
assert isinstance(self.sources,list)
assert self.sources
assert any([x in ["OWID"] for x in self.sources])
# Prepare base search kwargs
filters = {}
filters["source"] = {"$in": self.sources}
docs = self.vectorstore.similarity_search_with_score(query=query, filter=filters, k=self.k_total)
# Filter if scores are below threshold
docs = [x for x in docs if x[1] > self.threshold]
# Remove duplicate documents
unique_docs = []
seen_docs = []
for i, doc in enumerate(docs):
if doc[0].page_content not in seen_docs:
unique_docs.append(doc)
seen_docs.append(doc[0].page_content)
# Add score to metadata
results = []
for i,(doc,score) in enumerate(unique_docs):
doc.metadata["similarity_score"] = score
doc.metadata["content"] = doc.page_content
results.append(doc)
return results