Spaces:
Sleeping
Sleeping
File size: 6,631 Bytes
6855b1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import logging
from llama_index.core import ServiceContext, set_global_service_context, PromptTemplate
from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.core.base.llms.base import BaseLLM
from llama_index.core.base.llms.generic_utils import messages_to_history_str
from llama_index.core.base.llms.types import ChatMessage, MessageRole
from llama_index.core.chat_engine.types import ChatMode
from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI
llm: BaseLLM
embed_model: BaseEmbedding
logger = logging.getLogger("agent_logger")
# TODO why is my system prompt being ignored?
def set_llm(model, key, temperature):
global llm
global embed_model
logger.info(f'Setting up LLM with {model} and associated embedding model...')
if "gpt" in model:
llm = OpenAI(api_key=key, temperature=temperature, model=model)
embed_model = OpenAIEmbedding(api_key=key)
elif "mistral" in model:
llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
embed_model = MistralAIEmbedding(api_key=key)
else:
llm = OpenAI(api_key=key, model="gpt-3.5-turbo", temperature=0)
embed_model = OpenAIEmbedding(api_key=key)
# deprecated call should migrate
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
set_global_service_context(service_context)
def get_llm():
return llm
def generate_query_response(index, message):
string_output = ""
logger.info("Creating query engine with index...")
query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)
logger.info(f'Input user message: {message}')
response = query_engine.query(message)
response_text = []
for text in response.response_gen:
response_text.append(text)
string_output = ''.join(response_text)
yield string_output
logger.info(f'Assistant response: {string_output}')
def generate_chat_response_with_history(message, history):
string_output = ""
messages = collect_history(message, history)
response = llm.stream_chat(messages)
response_text = []
for text in response:
response_text.append(text.delta)
string_output = ''.join(response_text)
yield string_output
logger.info(f'Assistant response: {string_output}')
def generate_chat_response_with_history_rag_return_response(index, message, history):
logger.info("Generating chat response with history and rag...")
messages = collect_history(message, history)
logger.info("Creating query engine with index...")
query_engine = index.as_chat_engine(chat_mode=ChatMode.CONDENSE_QUESTION, streaming=True)
return query_engine.stream_chat(messages)
def generate_chat_response_with_history_rag_yield_string(index, message, history):
logger.info("Generating chat response with history and rag...")
string_output = ""
messages = collect_history(message, history)
logger.info("Creating query engine with index...")
query_engine = index.as_chat_engine(chat_mode=ChatMode.CONDENSE_QUESTION, streaming=True)
response = query_engine.stream_chat(messages)
response_text = []
for text in response.response_gen:
response_text.append(text)
string_output = ''.join(response_text)
yield string_output
logger.info(f'Assistant response: {string_output}')
def is_greeting(message):
response = llm.complete(
f'Is the user message a greeting? Answer True or False only. For example: \n User message: "Hello" \n '
f'Assistant response: True \n User message "Where do pears grow?" Assistant response: False \n. User message: "{message}"')
if any(x in response.text.lower() for x in ["true", "yes", "is a greeting"]):
return True
return False
def is_closing(message):
# TODO
return False
def is_search_query(message):
response = llm.complete(
f'Is the user message a request for factual information? Answer True or False only. For example: \n User '
f'message: "Where do watermelons grow?" \n Assistant response: True \n User message "Do you like watermelons?" '
f'Assistant response: False \n. User message: "Hello" \n Assistant response: False \n User message: "My code '
f'is not working. How do I implement logging correctly in python?" \n Assistant response: True \n User '
f'message: "{message}"')
if any(x in response.text.lower() for x in ["true", "yes", "is a request"]):
logger.info(f'Message: {message} is a request...')
return True
return False
def collect_history(message, history):
logger.info(f'Input user message: {message}')
def message_generator():
messages = []
logger.info("Fetching message history...")
for message_pair in history:
if message_pair[0] is not None:
messages.append(ChatMessage(role=MessageRole.USER, content=message_pair[0]))
if message_pair[1] is not None:
messages.append(ChatMessage(role=MessageRole.ASSISTANT, content=message_pair[1]))
logger.info(f'{len(messages)} messages in message history...')
return messages
messages = message_generator()
messages.append(ChatMessage(role=MessageRole.USER, content=message))
return messages
def condense_question(message, history):
DEFAULT_TEMPLATE = """\
Given a conversation (between Human and Assistant) and a follow up message from Human, \
rewrite the message to be a standalone question that captures all relevant context \
from the conversation.
<Chat History>
{chat_history}
<Follow Up Message>
{question}
<Standalone question>
"""
condense_question_prompt = PromptTemplate(DEFAULT_TEMPLATE)
messages = collect_history(message, history)
chat_history_str = messages_to_history_str(messages)
question = llm.predict(condense_question_prompt, question=message, chat_history=chat_history_str)
return question
def condense_context(context):
logger.info("Condensing input text with LLM complete...")
response = llm.complete(f'Rewrite the input to be a concise summary that captures '
f'all relevant context from the original text. \n'
f'Original Text: {context}')
return response.text
|