Spaces:

mlara
/

llm-cohort3-week-1-2

Sleeping

App Files Files Community

llm-cohort3-week-1-2 / rag.py

mlara

first commit

f4b691b about 1 year ago

raw

history blame

2.34 kB

	from aimakerspace.text_utils import TextFileLoader, CharacterTextSplitter
	from aimakerspace.vectordatabase import VectorDatabase
	from aimakerspace.openai_utils.prompts import (
	UserRolePrompt,
	SystemRolePrompt,
	AssistantRolePrompt,
	)
	from aimakerspace.openai_utils.chatmodel import ChatOpenAI

	import asyncio
	import nest_asyncio
	nest_asyncio.apply()


	TEXT_DOCUMENTS = [
	"data/KingLear.txt",
	]

	RAQA_PROMPT_TEMPLATE = """
	Use the provided context to answer the user's query.

	You may not answer the user's query unless there is specific context in the following text.

	If you do not know the answer, or cannot answer, please respond with "I don't know".

	Context:
	{context}
	"""

	raqa_prompt = SystemRolePrompt(RAQA_PROMPT_TEMPLATE)

	USER_PROMPT_TEMPLATE = """
	User Query:
	{user_query}
	"""

	user_prompt = UserRolePrompt(USER_PROMPT_TEMPLATE)

	class RetrievalAugmentedQAPipeline:
	def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
	self.llm = llm
	self.vector_db_retriever = vector_db_retriever

	def run_pipeline(self, user_query: str) -> str:
	context_list = self.vector_db_retriever.search_by_text(user_query, k=4)

	context_prompt = ""
	for context in context_list:
	context_prompt += context[0] + "\n"

	formatted_system_prompt = raqa_prompt.create_message(context=context_prompt)

	formatted_user_prompt = user_prompt.create_message(user_query=user_query)

	return self.llm.run([formatted_system_prompt, formatted_user_prompt])

	def _split_documents():
	split_documents = []
	for doc in TEXT_DOCUMENTS:
	# Load the text file
	loader = TextFileLoader(doc)
	documents = loader.load_documents()
	# Split the text file into characters
	splitter = CharacterTextSplitter()
	split_documents.extend(splitter.split_texts(documents))

	return split_documents

	def _build_vector_db():
	vector_db = VectorDatabase()
	split_documents = _split_documents()
	vector_db = asyncio.run(vector_db.abuild_from_list(split_documents))
	return vector_db

	def retrieval_augmented_qa_pipeline(client):
	vector_db = _build_vector_db()
	pipeline = RetrievalAugmentedQAPipeline(
	llm=client,
	vector_db_retriever=vector_db)
	return pipeline