Spaces:

arabellastrange
/

search-assistant

Sleeping

App Files Files

search-assistant / generate_response.py

arabellastrange

editing conversation history use

00af1a2 5 months ago

raw

history blame

7.29 kB

	import logging

	from llama_index.core import ServiceContext, set_global_service_context, PromptTemplate
	from llama_index.core.base.embeddings.base import BaseEmbedding
	from llama_index.core.base.llms.base import BaseLLM
	from llama_index.core.base.llms.generic_utils import messages_to_history_str
	from llama_index.core.base.llms.types import ChatMessage, MessageRole
	from llama_index.core.chat_engine.types import ChatMode
	from llama_index.embeddings.mistralai import MistralAIEmbedding
	from llama_index.embeddings.openai import OpenAIEmbedding
	from llama_index.llms.mistralai import MistralAI
	from llama_index.llms.openai import OpenAI

	llm: BaseLLM
	embed_model: BaseEmbedding
	logger = logging.getLogger("agent_logger")


	# TODO why is my system prompt being ignored?
	def set_llm(model, key, temperature):
	global llm
	global embed_model

	logger.info(f'Setting up LLM with {model} and associated embedding model...')

	if "gpt" in model:
	llm = OpenAI(api_key=key, temperature=temperature, model=model, )
	embed_model = OpenAIEmbedding(api_key=key)
	elif "mistral" in model:
	llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
	embed_model = MistralAIEmbedding(api_key=key)
	else:
	llm = OpenAI(api_key=key, model="gpt-3.5-turbo", temperature=0)
	embed_model = OpenAIEmbedding(api_key=key)

	# deprecated call should migrate
	service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
	set_global_service_context(service_context)


	def get_llm():
	return llm


	def generate_query_response(index, message):
	string_output = ""

	logger.info("Creating query engine with index...")
	query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)

	logger.info(f'Input user message: {message}')
	response = query_engine.query(f"Write a comprehensive but concise response to this query, if conversation history "
	f"is irrelevant to this query, ignore conversation history: \n '{message}'")

	response_text = []
	for text in response.response_gen:
	response_text.append(text)
	string_output = ''.join(response_text)
	yield string_output
	logger.info(f'Assistant response: {string_output}')


	def generate_chat_response_with_history(message, history):
	string_output = ""

	messages = collect_history(message, history)

	response = llm.stream_chat(messages)
	response_text = []
	for text in response:
	response_text.append(text.delta)
	string_output = ''.join(response_text)
	yield string_output
	logger.info(f'Assistant response: {string_output}')


	def generate_chat_response_with_history_rag_return_response(index, message, history):
	logger.info("Generating chat response with history and rag...")
	message = (f"Write a comprehensive but concise response to this query, if conversation history is irrelevant to "
	f"this query, ignore conversation history: \n '{message}'")
	messages = collect_history(message, history)

	logger.info("Creating query engine with index...")
	query_engine = index.as_chat_engine(chat_mode=ChatMode.CONDENSE_QUESTION, streaming=True)
	return query_engine.stream_chat(messages)


	def generate_chat_response_with_history_rag_yield_string(index, message, history):
	logger.info("Generating chat response with history and rag...")
	message = (f"Write a comprehensive but concise response to this query, if conversation history is irrelevant to "
	f"this query, ignore conversation history: \n '{message}'")

	string_output = ""

	messages = collect_history(message, history)

	logger.info("Creating query engine with index...")
	query_engine = index.as_chat_engine(chat_mode=ChatMode.CONDENSE_QUESTION, streaming=True)

	response = query_engine.stream_chat(messages)

	response_text = []
	for text in response.response_gen:
	response_text.append(text)
	string_output = ''.join(response_text)
	yield string_output

	logger.info(f'Assistant response: {string_output}')


	def is_greeting(message):
	response = llm.complete(
	f'Is the user message a greeting? Answer True or False only. For example: \n User message: "Hello" \n '
	f'Assistant response: True \n User message "Where do pears grow?" Assistant response: False \n. User message: "{message}"')
	if any(x in response.text.lower() for x in ["true", "yes", "is a greeting"]):
	return True
	return False


	def is_closing(message):
	# TODO
	return False


	def is_search_query(message):
	response = llm.complete(
	f'Is the user message a request for factual information? Answer True or False only. For example: \n User '
	f'message: "Where do watermelons grow?" \n Assistant response: True \n User message "Do you like watermelons?" '
	f'Assistant response: False \n. User message: "Hello" \n Assistant response: False \n User message: "My code '
	f'is not working. How do I implement logging correctly in python?" \n Assistant response: True \n User '
	f'message: "{message}"')
	if any(x in response.text.lower() for x in ["true", "yes", "is a request"]):
	logger.info(f'Message: {message} is a request...')
	return True
	return False


	def collect_history(message, history):
	logger.info(f'Input user message: {message}')

	def message_generator():
	messages = []
	logger.info("Fetching message history...")
	for message_pair in history:
	if message_pair[0] is not None:
	messages.append(ChatMessage(role=MessageRole.USER, content=message_pair[0]))
	if message_pair[1] is not None:
	messages.append(ChatMessage(role=MessageRole.ASSISTANT, content=message_pair[1]))
	logger.info(f'{len(messages)} messages in message history...')
	return messages

	messages = message_generator()
	messages.append(ChatMessage(role=MessageRole.USER, content=message))

	return messages


	def google_question(message, history):
	DEFAULT_TEMPLATE = """\
	Given a conversation (between Human and Assistant) and a follow up message from Human, \
	rewrite the message to be a standalone web engine search query that captures all relevant context \
	from the conversation in keywords if the previous conversation is relevant to the new message.

	<Chat History>
	{chat_history}

	<Follow Up Message>
	{question}

	<Standalone question>
	"""
	condense_question_prompt = PromptTemplate(DEFAULT_TEMPLATE)

	messages = collect_history(message, history)
	chat_history_str = messages_to_history_str(messages)

	question = llm.predict(condense_question_prompt, question=message, chat_history=chat_history_str)

	return question


	def condense_context(context):
	logger.info("Condensing input text with LLM complete...")

	response = llm.complete(f'Rewrite the input to be a concise summary that captures '
	f'all relevant context from the original text. \n'
	f'Original Text: {context}')

	return response.text