mlara's picture
first commit
f4b691b
raw
history blame
2.34 kB
from aimakerspace.text_utils import TextFileLoader, CharacterTextSplitter
from aimakerspace.vectordatabase import VectorDatabase
from aimakerspace.openai_utils.prompts import (
UserRolePrompt,
SystemRolePrompt,
AssistantRolePrompt,
)
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
import asyncio
import nest_asyncio
nest_asyncio.apply()
TEXT_DOCUMENTS = [
"data/KingLear.txt",
]
RAQA_PROMPT_TEMPLATE = """
Use the provided context to answer the user's query.
You may not answer the user's query unless there is specific context in the following text.
If you do not know the answer, or cannot answer, please respond with "I don't know".
Context:
{context}
"""
raqa_prompt = SystemRolePrompt(RAQA_PROMPT_TEMPLATE)
USER_PROMPT_TEMPLATE = """
User Query:
{user_query}
"""
user_prompt = UserRolePrompt(USER_PROMPT_TEMPLATE)
class RetrievalAugmentedQAPipeline:
def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
self.llm = llm
self.vector_db_retriever = vector_db_retriever
def run_pipeline(self, user_query: str) -> str:
context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
context_prompt = ""
for context in context_list:
context_prompt += context[0] + "\n"
formatted_system_prompt = raqa_prompt.create_message(context=context_prompt)
formatted_user_prompt = user_prompt.create_message(user_query=user_query)
return self.llm.run([formatted_system_prompt, formatted_user_prompt])
def _split_documents():
split_documents = []
for doc in TEXT_DOCUMENTS:
# Load the text file
loader = TextFileLoader(doc)
documents = loader.load_documents()
# Split the text file into characters
splitter = CharacterTextSplitter()
split_documents.extend(splitter.split_texts(documents))
return split_documents
def _build_vector_db():
vector_db = VectorDatabase()
split_documents = _split_documents()
vector_db = asyncio.run(vector_db.abuild_from_list(split_documents))
return vector_db
def retrieval_augmented_qa_pipeline(client):
vector_db = _build_vector_db()
pipeline = RetrievalAugmentedQAPipeline(
llm=client,
vector_db_retriever=vector_db)
return pipeline