import together # set your API key together.api_key = "c9909567768fbf1a69fbd94c758e432f0a05a6755c32dced992ac6640a8cfd79" # list available models and descriptons models = together.Models.list() together.Models.start("togethercomputer/llama-2-7b-chat") from langchain.llms import Together llm = Together( model="togethercomputer/llama-2-7b-chat", temperature=0.7, max_tokens=128, top_k=1, together_api_key="c9909567768fbf1a69fbd94c758e432f0a05a6755c32dced992ac6640a8cfd79" ) from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.document_loaders import TextLoader from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import DirectoryLoader loader = PyPDFLoader('/Production-Table - Sheet1 (2).pdf') documents = loader.load() #splitting the text into text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) texts = text_splitter.split_documents(documents) from langchain.embeddings import HuggingFaceBgeEmbeddings model_name = "BAAI/bge-base-en" encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity model_norm = HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs={'device': 'cuda'}, encode_kwargs=encode_kwargs ) # Embed and store the texts # Supplying a persist_directory will store the embeddings on disk persist_directory = 'db' ## Here is the nmew embeddings being used embedding = model_norm vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory) retriever = vectordb.as_retriever(search_kwargs={"k": 5}) ## Default LLaMA-2 prompt style B_INST, E_INST = "[INST]", "[/INST]" B_SYS, E_SYS = "<>\n", "\n<>\n\n" DEFAULT_SYSTEM_PROMPT = """\ You are a helpful, respectful and honest assistant of a production company. You should honestly answer the user's query using the knowledge of the company's production documents uploaded. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""" def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ): SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST return prompt_template sys_prompt = """You are a helpful, respectful and honest assistant of a production company. You should honestly answer the user's query using the knowledge of the company's production documents uploaded. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""" instruction = """CONTEXT:/n/n {context}/n Question: {question}""" get_prompt(instruction, sys_prompt) from langchain.prompts import PromptTemplate prompt_template = get_prompt(instruction, sys_prompt) llama_prompt = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) from langchain.schema import prompt # create the chain to answer questions qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, chain_type_kwargs=chain_type_kwargs, return_source_documents=True) ## Cite sources import textwrap def wrap_text_preserve_newlines(text, width=110): # Split the input text into lines based on newline characters lines = text.split('\n') # Wrap each line individually wrapped_lines = [textwrap.fill(line, width=width) for line in lines] # Join the wrapped lines back together using newline characters wrapped_text = '\n'.join(wrapped_lines) return wrapped_text def process_llm_response(llm_response): print(wrap_text_preserve_newlines(llm_response['result'])) print('\n\nSources:') for source in llm_response["source_documents"]: print(source.metadata['source']) import gradio as gr with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") def user(user_message, history): return "", history + [[user_message, None]] def bot(history): print("Question: ", history[-1][0]) #wrap_text_preserve_newlines(llm_response['result']) #bot_message = process_llm_response(qa_chain(history[-1][0])) bot_message = wrap_text_preserve_newlines((qa_chain(history[-1][0]))['result']) print("Response: ", bot_message) history[-1][1] = "" history[-1][1] += bot_message return history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() demo.launch(debug = True)