# import sys | |
# import os | |
# from contextlib import contextmanager | |
# from ..reranker import rerank_docs | |
# from ...knowledge.retriever import ClimateQARetriever | |
# def divide_into_parts(target, parts): | |
# # Base value for each part | |
# base = target // parts | |
# # Remainder to distribute | |
# remainder = target % parts | |
# # List to hold the result | |
# result = [] | |
# for i in range(parts): | |
# if i < remainder: | |
# # These parts get base value + 1 | |
# result.append(base + 1) | |
# else: | |
# # The rest get the base value | |
# result.append(base) | |
# return result | |
# @contextmanager | |
# def suppress_output(): | |
# # Open a null device | |
# with open(os.devnull, 'w') as devnull: | |
# # Store the original stdout and stderr | |
# old_stdout = sys.stdout | |
# old_stderr = sys.stderr | |
# # Redirect stdout and stderr to the null device | |
# sys.stdout = devnull | |
# sys.stderr = devnull | |
# try: | |
# yield | |
# finally: | |
# # Restore stdout and stderr | |
# sys.stdout = old_stdout | |
# sys.stderr = old_stderr | |
# def make_retriever_node(vectorstore,reranker,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5): | |
# def retrieve_documents(state): | |
# POSSIBLE_SOURCES = ["IPCC","IPBES","IPOS"] # ,"OpenAlex"] | |
# questions = state["questions"] | |
# # Use sources from the user input or from the LLM detection | |
# if "sources_input" not in state or state["sources_input"] is None: | |
# sources_input = ["auto"] | |
# else: | |
# sources_input = state["sources_input"] | |
# auto_mode = "auto" in sources_input | |
# # There are several options to get the final top k | |
# # Option 1 - Get 100 documents by question and rerank by question | |
# # Option 2 - Get 100/n documents by question and rerank the total | |
# if rerank_by_question: | |
# k_by_question = divide_into_parts(k_final,len(questions)) | |
# docs = [] | |
# for i,q in enumerate(questions): | |
# sources = q["sources"] | |
# question = q["question"] | |
# # If auto mode, we use the sources detected by the LLM | |
# if auto_mode: | |
# sources = [x for x in sources if x in POSSIBLE_SOURCES] | |
# # Otherwise, we use the config | |
# else: | |
# sources = sources_input | |
# # Search the document store using the retriever | |
# # Configure high top k for further reranking step | |
# retriever = ClimateQARetriever( | |
# vectorstore=vectorstore, | |
# sources = sources, | |
# # reports = ias_reports, | |
# min_size = 200, | |
# k_summary = k_summary, | |
# k_total = k_before_reranking, | |
# threshold = 0.5, | |
# ) | |
# docs_question = retriever.get_relevant_documents(question) | |
# # Rerank | |
# if reranker is not None: | |
# with suppress_output(): | |
# docs_question = rerank_docs(reranker,docs_question,question) | |
# else: | |
# # Add a default reranking score | |
# for doc in docs_question: | |
# doc.metadata["reranking_score"] = doc.metadata["similarity_score"] | |
# # If rerank by question we select the top documents for each question | |
# if rerank_by_question: | |
# docs_question = docs_question[:k_by_question[i]] | |
# # Add sources used in the metadata | |
# for doc in docs_question: | |
# doc.metadata["sources_used"] = sources | |
# # Add to the list of docs | |
# docs.extend(docs_question) | |
# # Sorting the list in descending order by rerank_score | |
# # Then select the top k | |
# docs = sorted(docs, key=lambda x: x.metadata["reranking_score"], reverse=True) | |
# docs = docs[:k_final] | |
# new_state = {"documents":docs} | |
# return new_state | |
# return retrieve_documents | |