# import sys # import os # from contextlib import contextmanager # from ..reranker import rerank_docs # from ...knowledge.retriever import ClimateQARetriever # def divide_into_parts(target, parts): # # Base value for each part # base = target // parts # # Remainder to distribute # remainder = target % parts # # List to hold the result # result = [] # for i in range(parts): # if i < remainder: # # These parts get base value + 1 # result.append(base + 1) # else: # # The rest get the base value # result.append(base) # return result # @contextmanager # def suppress_output(): # # Open a null device # with open(os.devnull, 'w') as devnull: # # Store the original stdout and stderr # old_stdout = sys.stdout # old_stderr = sys.stderr # # Redirect stdout and stderr to the null device # sys.stdout = devnull # sys.stderr = devnull # try: # yield # finally: # # Restore stdout and stderr # sys.stdout = old_stdout # sys.stderr = old_stderr # def make_retriever_node(vectorstore,reranker,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5): # def retrieve_documents(state): # POSSIBLE_SOURCES = ["IPCC","IPBES","IPOS"] # ,"OpenAlex"] # questions = state["questions"] # # Use sources from the user input or from the LLM detection # if "sources_input" not in state or state["sources_input"] is None: # sources_input = ["auto"] # else: # sources_input = state["sources_input"] # auto_mode = "auto" in sources_input # # There are several options to get the final top k # # Option 1 - Get 100 documents by question and rerank by question # # Option 2 - Get 100/n documents by question and rerank the total # if rerank_by_question: # k_by_question = divide_into_parts(k_final,len(questions)) # docs = [] # for i,q in enumerate(questions): # sources = q["sources"] # question = q["question"] # # If auto mode, we use the sources detected by the LLM # if auto_mode: # sources = [x for x in sources if x in POSSIBLE_SOURCES] # # Otherwise, we use the config # else: # sources = sources_input # # Search the document store using the retriever # # Configure high top k for further reranking step # retriever = ClimateQARetriever( # vectorstore=vectorstore, # sources = sources, # # reports = ias_reports, # min_size = 200, # k_summary = k_summary, # k_total = k_before_reranking, # threshold = 0.5, # ) # docs_question = retriever.get_relevant_documents(question) # # Rerank # if reranker is not None: # with suppress_output(): # docs_question = rerank_docs(reranker,docs_question,question) # else: # # Add a default reranking score # for doc in docs_question: # doc.metadata["reranking_score"] = doc.metadata["similarity_score"] # # If rerank by question we select the top documents for each question # if rerank_by_question: # docs_question = docs_question[:k_by_question[i]] # # Add sources used in the metadata # for doc in docs_question: # doc.metadata["sources_used"] = sources # # Add to the list of docs # docs.extend(docs_question) # # Sorting the list in descending order by rerank_score # # Then select the top k # docs = sorted(docs, key=lambda x: x.metadata["reranking_score"], reverse=True) # docs = docs[:k_final] # new_state = {"documents":docs} # return new_state # return retrieve_documents