Spaces:
Paused
Paused
##################################################### | |
### DOCUMENT PROCESSOR [AGENT] | |
##################################################### | |
### Jonathan Wang | |
# ABOUT: | |
# This creates an app to chat with PDFs. | |
# This is the AGENT | |
# which handles complex questions about the PDF. | |
##################################################### | |
### TODO Board: | |
# https://docs.llamaindex.ai/en/stable/examples/agent/agent_runner/agent_runner_rag_controllable/#setup-human-in-the-loop-chat | |
# Investigate ObjectIndex and retrievers? https://docs.llamaindex.ai/en/stable/examples/agent/multi_document_agents/ | |
# https://docs.llamaindex.ai/en/stable/module_guides/storing/chat_stores/ | |
##################################################### | |
### IMPORTS | |
from typing import List | |
from streamlit import session_state as ss | |
from llama_index.core.settings import Settings | |
from llama_index.core.tools import QueryEngineTool, ToolMetadata | |
from llama_index.core.query_engine import SubQuestionQueryEngine | |
# Own Modules | |
from full_doc import FullDocument | |
##################################################### | |
### CODE | |
ALLOWED_DOCUMENT_TOOLS = ['engine', 'subquestion_engine'] | |
ALLOWED_TOOLS = ALLOWED_DOCUMENT_TOOLS | |
def _build_tool_from_fulldoc(fulldoc: FullDocument, tool_name: str) -> QueryEngineTool: | |
"""Given a Full Document, build a QueryEngineTool from the specified engine. | |
Args: | |
fulldoc (FullDocument): The FullDocument (doc + query engines) | |
tool_name (str): The engine to use. | |
Returns: | |
QueryEngineTool: A query engine wrapper around the tool. | |
""" | |
if (tool_name.lower() not in ALLOWED_DOCUMENT_TOOLS): | |
raise ValueError("`tool_name` must be one of {ALLOWED_DOCUMENT_TOOLS}") | |
if (getattr(fulldoc, tool_name, None) is None): | |
raise ValueError(f"`{tool_name}` must be created from the document first.") | |
# Build Tool | |
tool_description = '' | |
if tool_name == 'engine': | |
tool_description += 'A tool that answers simple questions about the following document:\n' + fulldoc.summary_oneline | |
elif tool_name == 'subquestion_engine': | |
tool_description += 'A tool that answers complex questions about the following document:\n' + fulldoc.summary_oneline | |
tool = QueryEngineTool( | |
query_engine=getattr(fulldoc, tool_name), | |
metadata=ToolMetadata( | |
name=tool_name, | |
description=tool_description | |
), | |
) | |
return tool | |
def doclist_to_agent(doclist: List[FullDocument], fulldoc_tools_to_use: List[str]=['engine']) -> SubQuestionQueryEngine: # ReActAgent: | |
# Agent Tools | |
agent_tools = [] | |
# Remove any tools that are not in the allowed list using | |
tools_to_use = list(set(fulldoc_tools_to_use).intersection(set(ALLOWED_DOCUMENT_TOOLS))) | |
if (len(tools_to_use) < len(fulldoc_tools_to_use)): | |
removed_tools = set(fulldoc_tools_to_use) - set(ALLOWED_DOCUMENT_TOOLS) | |
Warning(f"Tools {removed_tools} are not in the allowed list of tools. Skipping...") | |
del removed_tools | |
for tool in tools_to_use: | |
for doc in doclist: | |
agent_tools.append(_build_tool_from_fulldoc(doc, tool)) | |
# Agent | |
# agent = ReActAgent.from_tools( | |
agent = SubQuestionQueryEngine.from_defaults( | |
# tools=agent_tools, | |
query_engine_tools=agent_tools, | |
llm=Settings.llm or ss.llm, | |
verbose=True, | |
# max_iterations=5 | |
) | |
return agent | |