Spaces:
Runtime error
Runtime error
Sathyapriyaa
commited on
Commit
•
58cb744
1
Parent(s):
6185eca
Upload 3 files
Browse files- Production-Table - Sheet1 (2).pdf +0 -0
- app.py +142 -0
- requirements.txt +9 -0
Production-Table - Sheet1 (2).pdf
ADDED
Binary file (32.5 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import together
|
2 |
+
|
3 |
+
# set your API key
|
4 |
+
together.api_key = "c9909567768fbf1a69fbd94c758e432f0a05a6755c32dced992ac6640a8cfd79"
|
5 |
+
|
6 |
+
# list available models and descriptons
|
7 |
+
models = together.Models.list()
|
8 |
+
|
9 |
+
together.Models.start("togethercomputer/llama-2-7b-chat")
|
10 |
+
|
11 |
+
from langchain.llms import Together
|
12 |
+
|
13 |
+
|
14 |
+
llm = Together(
|
15 |
+
model="togethercomputer/llama-2-7b-chat",
|
16 |
+
temperature=0.7,
|
17 |
+
max_tokens=128,
|
18 |
+
top_k=1,
|
19 |
+
together_api_key="c9909567768fbf1a69fbd94c758e432f0a05a6755c32dced992ac6640a8cfd79"
|
20 |
+
)
|
21 |
+
|
22 |
+
from langchain.vectorstores import Chroma
|
23 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
24 |
+
|
25 |
+
from langchain.chains import RetrievalQA
|
26 |
+
from langchain.document_loaders import TextLoader
|
27 |
+
from langchain.document_loaders import PyPDFLoader
|
28 |
+
from langchain.document_loaders import DirectoryLoader
|
29 |
+
|
30 |
+
loader = PyPDFLoader('/Production-Table - Sheet1 (2).pdf')
|
31 |
+
|
32 |
+
documents = loader.load()
|
33 |
+
#splitting the text into
|
34 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
35 |
+
texts = text_splitter.split_documents(documents)
|
36 |
+
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
37 |
+
|
38 |
+
model_name = "BAAI/bge-base-en"
|
39 |
+
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
|
40 |
+
|
41 |
+
model_norm = HuggingFaceBgeEmbeddings(
|
42 |
+
model_name=model_name,
|
43 |
+
model_kwargs={'device': 'cuda'},
|
44 |
+
encode_kwargs=encode_kwargs
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
# Embed and store the texts
|
49 |
+
# Supplying a persist_directory will store the embeddings on disk
|
50 |
+
|
51 |
+
persist_directory = 'db'
|
52 |
+
|
53 |
+
## Here is the nmew embeddings being used
|
54 |
+
embedding = model_norm
|
55 |
+
|
56 |
+
vectordb = Chroma.from_documents(documents=texts,
|
57 |
+
embedding=embedding,
|
58 |
+
persist_directory=persist_directory)
|
59 |
+
|
60 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": 5})
|
61 |
+
## Default LLaMA-2 prompt style
|
62 |
+
B_INST, E_INST = "[INST]", "[/INST]"
|
63 |
+
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
64 |
+
DEFAULT_SYSTEM_PROMPT = """\
|
65 |
+
You are a helpful, respectful and honest assistant of a production company. You should honestly answer the user's query using the knowledge of the company's production documents uploaded.
|
66 |
+
|
67 |
+
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
|
68 |
+
|
69 |
+
def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
|
70 |
+
SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
|
71 |
+
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
|
72 |
+
return prompt_template
|
73 |
+
|
74 |
+
sys_prompt = """You are a helpful, respectful and honest assistant of a production company. You should honestly answer the user's query using the knowledge of the company's production documents uploaded.
|
75 |
+
|
76 |
+
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
|
77 |
+
|
78 |
+
instruction = """CONTEXT:/n/n {context}/n
|
79 |
+
|
80 |
+
Question: {question}"""
|
81 |
+
get_prompt(instruction, sys_prompt)
|
82 |
+
|
83 |
+
from langchain.prompts import PromptTemplate
|
84 |
+
prompt_template = get_prompt(instruction, sys_prompt)
|
85 |
+
|
86 |
+
llama_prompt = PromptTemplate(
|
87 |
+
template=prompt_template, input_variables=["context", "question"]
|
88 |
+
)
|
89 |
+
from langchain.schema import prompt
|
90 |
+
# create the chain to answer questions
|
91 |
+
qa_chain = RetrievalQA.from_chain_type(llm=llm,
|
92 |
+
chain_type="stuff",
|
93 |
+
retriever=retriever,
|
94 |
+
chain_type_kwargs=chain_type_kwargs,
|
95 |
+
return_source_documents=True)
|
96 |
+
## Cite sources
|
97 |
+
|
98 |
+
import textwrap
|
99 |
+
|
100 |
+
def wrap_text_preserve_newlines(text, width=110):
|
101 |
+
# Split the input text into lines based on newline characters
|
102 |
+
lines = text.split('\n')
|
103 |
+
|
104 |
+
# Wrap each line individually
|
105 |
+
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
|
106 |
+
|
107 |
+
# Join the wrapped lines back together using newline characters
|
108 |
+
wrapped_text = '\n'.join(wrapped_lines)
|
109 |
+
|
110 |
+
return wrapped_text
|
111 |
+
|
112 |
+
def process_llm_response(llm_response):
|
113 |
+
print(wrap_text_preserve_newlines(llm_response['result']))
|
114 |
+
print('\n\nSources:')
|
115 |
+
for source in llm_response["source_documents"]:
|
116 |
+
print(source.metadata['source'])
|
117 |
+
import gradio as gr
|
118 |
+
|
119 |
+
with gr.Blocks() as demo:
|
120 |
+
chatbot = gr.Chatbot()
|
121 |
+
msg = gr.Textbox()
|
122 |
+
clear = gr.Button("Clear")
|
123 |
+
|
124 |
+
def user(user_message, history):
|
125 |
+
return "", history + [[user_message, None]]
|
126 |
+
|
127 |
+
def bot(history):
|
128 |
+
print("Question: ", history[-1][0])
|
129 |
+
#wrap_text_preserve_newlines(llm_response['result'])
|
130 |
+
#bot_message = process_llm_response(qa_chain(history[-1][0]))
|
131 |
+
bot_message = wrap_text_preserve_newlines((qa_chain(history[-1][0]))['result'])
|
132 |
+
print("Response: ", bot_message)
|
133 |
+
history[-1][1] = ""
|
134 |
+
history[-1][1] += bot_message
|
135 |
+
return history
|
136 |
+
|
137 |
+
|
138 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
|
139 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
140 |
+
|
141 |
+
demo.queue()
|
142 |
+
demo.launch(debug = True)
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
huggingface_hub
|
3 |
+
tiktoken
|
4 |
+
chromadb
|
5 |
+
PyPDF2
|
6 |
+
pypdf
|
7 |
+
sentence_transformers
|
8 |
+
together
|
9 |
+
gradio
|