Spaces:

isl-research
/

KCE_NVIDIA

Sleeping

App Files Files Community

KCE_NVIDIA / app.py

teddyllm

Update app.py

ad24004 verified 4 months ago

raw

history blame contribute delete

5.03 kB

	import streamlit as st
	import os

	from langchain_community.vectorstores import FAISS
	from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
	from langchain.retrievers import ContextualCompressionRetriever
	from langchain.retrievers.document_compressors import FlashrankRerank
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_nvidia_ai_endpoints import ChatNVIDIA


	def format_docs(docs):
	print("-------- Documents ------------")
	print(docs)
	return "\n\n".join(doc.page_content for doc in docs)


	embeddings = NVIDIAEmbeddings(model="nvidia/nv-embedqa-mistral-7b-v2")
	db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
	retriever = db.as_retriever()
	compressor = FlashrankRerank()
	compression_retriever = ContextualCompressionRetriever(
	base_compressor=compressor, base_retriever=retriever
	)

	st.title("KCE Chatbot")
	with st.expander("Disclaimer", icon="ℹ️"):
	st.info("""
	We appreciate your engagement with our chatbot! We hope this chatbot can help you with the questions you have regarding with the KCE company.
	This chatbot is a demonstration preview. While the system is designed to provide helpful and informative responses by retrieving and generating relevant information, it is important to note the following:
	1. Potential for Inaccuracies: The chatbot may sometimes produce incorrect or misleading information. The responses generated by the LLM are based on patterns in the data it has been trained on and the information retrieved, which might not always be accurate or up-to-date.
	2. Hallucinations: The LLM might generate responses that seem plausible but are entirely fabricated. These "hallucinations" are a known limitation of current LLM technology and can occur despite the retrieval mechanism.\n
	By interacting with this chatbot, you acknowledge and accept these limitations and agree to use the information provided responsibly.
	""")

	models_dict = {
	"meta/llama-3.1-405b": "meta/llama-3.1-405b-instruct",
	"meta/llama-3.1-70b": "meta/llama-3.1-70b-instruct",
	"meta/llama3.1-8b": "meta/llama-3.1-8b-instruct",
	"google/gemma-2-27b": "google/gemma-2-27b-it",
	"google/gemma-7b": "google/gemma-7b",
	"microsoft/phi-3-mini-128k": "microsoft/phi-3-mini-128k-instruct",
	"microsoft/phi-3-medium-4k": "microsoft/phi-3-medium-4k-instruct"
	}


	# openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
	model = st.sidebar.selectbox(
	"Choose model",
	tuple(models_dict.keys()),
	label_visibility="visible",
	)
	st.sidebar.write(f"Selected model: {model}")



	def response_generator(message):
	llm = ChatNVIDIA(model=models_dict[model])
	prompt = ChatPromptTemplate.from_messages([
	('system',
	"You are a KCE chatbot, and you are assisting customers with the inquires about the company."
	"Answer the questions witht the provided context. Do not include based on the context or based on the documents in your answer."
	"Remember that your job is to represent KCE company."
	"Please say you do not know if you do not know or cannot find the information needed."
	"\n Question: {question} \nContext: {context}"),
	('user', "{question}")
	])

	rag_chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)
	# response = f"Response to: {message}"
	# for word in response.split():
	# yield word + " "
	# time.sleep(0.5)
	partial_message=""
	for chunk in rag_chain.stream(message):
	# partial_message = partial_message + chunk
	yield partial_message + chunk
	# response = random.choice(
	# [
	# 'Hello there! How can I asist you today?',
	# 'Hi, human! Is there anything I can help you with?',
	# 'Do you need any help?'
	# ]
	# )

	# for word in response.split():
	# yield word + " "
	# time.sleep(0.05)


	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Accept user input
	if prompt := st.chat_input("Please type your question here"):
	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})
	# Display user message in chat message container
	with st.chat_message("user"):
	st.markdown(prompt)

	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	response = st.write_stream(response_generator(prompt))
	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": response})