Spaces:
Build error
Build error
Harrison Chase
commited on
Commit
•
291f7b1
1
Parent(s):
1637fcf
cr
Browse files- app.py +102 -0
- cli_app.py +17 -0
- ingest_data.py +23 -0
- query_data.py +34 -0
- requirements.txt +5 -0
- vectorstore.pkl +0 -0
app.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Optional, Tuple
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import pickle
|
6 |
+
from query_data import get_chain
|
7 |
+
from threading import Lock
|
8 |
+
|
9 |
+
with open("vectorstore.pkl", "rb") as f:
|
10 |
+
vectorstore = pickle.load(f)
|
11 |
+
|
12 |
+
|
13 |
+
def set_openai_api_key(api_key: str):
|
14 |
+
"""Set the api key and return chain.
|
15 |
+
If no api_key, then None is returned.
|
16 |
+
"""
|
17 |
+
if api_key:
|
18 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
19 |
+
chain = get_chain(vectorstore)
|
20 |
+
os.environ["OPENAI_API_KEY"] = ""
|
21 |
+
return chain
|
22 |
+
|
23 |
+
class ChatWrapper:
|
24 |
+
|
25 |
+
def __init__(self):
|
26 |
+
self.lock = Lock()
|
27 |
+
def __call__(
|
28 |
+
self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain
|
29 |
+
):
|
30 |
+
"""Execute the chat functionality."""
|
31 |
+
self.lock.acquire()
|
32 |
+
try:
|
33 |
+
history = history or []
|
34 |
+
# If chain is None, that is because no API key was provided.
|
35 |
+
if chain is None:
|
36 |
+
history.append((inp, "Please paste your OpenAI key to use"))
|
37 |
+
return history, history
|
38 |
+
# Set OpenAI key
|
39 |
+
import openai
|
40 |
+
openai.api_key = api_key
|
41 |
+
# Run chain and append input.
|
42 |
+
output = chain({"question": inp, "chat_history": history})["answer"]
|
43 |
+
history.append((inp, output))
|
44 |
+
except Exception as e:
|
45 |
+
raise e
|
46 |
+
finally:
|
47 |
+
self.lock.release()
|
48 |
+
return history, history
|
49 |
+
|
50 |
+
chat = ChatWrapper()
|
51 |
+
|
52 |
+
block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
|
53 |
+
|
54 |
+
with block:
|
55 |
+
with gr.Row():
|
56 |
+
gr.Markdown("<h3><center>Chat-Your-Data (State-of-the-Union)</center></h3>")
|
57 |
+
|
58 |
+
openai_api_key_textbox = gr.Textbox(
|
59 |
+
placeholder="Paste your OpenAI API key (sk-...)",
|
60 |
+
show_label=False,
|
61 |
+
lines=1,
|
62 |
+
type="password",
|
63 |
+
)
|
64 |
+
|
65 |
+
chatbot = gr.Chatbot()
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
message = gr.Textbox(
|
69 |
+
label="What's your question?",
|
70 |
+
placeholder="Ask questions about the most recent state of the union",
|
71 |
+
lines=1,
|
72 |
+
)
|
73 |
+
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
74 |
+
|
75 |
+
gr.Examples(
|
76 |
+
examples=[
|
77 |
+
"What did the president say about Kentaji Brown Jackson",
|
78 |
+
"Did he mention Stephen Breyer?",
|
79 |
+
"What was his stance on Ukraine",
|
80 |
+
],
|
81 |
+
inputs=message,
|
82 |
+
)
|
83 |
+
|
84 |
+
gr.HTML("Demo application of a LangChain chain.")
|
85 |
+
|
86 |
+
gr.HTML(
|
87 |
+
"<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>"
|
88 |
+
)
|
89 |
+
|
90 |
+
state = gr.State()
|
91 |
+
agent_state = gr.State()
|
92 |
+
|
93 |
+
submit.click(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
|
94 |
+
message.submit(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
|
95 |
+
|
96 |
+
openai_api_key_textbox.change(
|
97 |
+
set_openai_api_key,
|
98 |
+
inputs=[openai_api_key_textbox],
|
99 |
+
outputs=[agent_state],
|
100 |
+
)
|
101 |
+
|
102 |
+
block.launch(debug=True)
|
cli_app.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
from query_data import get_chain
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == "__main__":
|
6 |
+
with open("vectorstore.pkl", "rb") as f:
|
7 |
+
vectorstore = pickle.load(f)
|
8 |
+
qa_chain = get_chain(vectorstore)
|
9 |
+
chat_history = []
|
10 |
+
print("Chat with your docs!")
|
11 |
+
while True:
|
12 |
+
print("Human:")
|
13 |
+
question = input()
|
14 |
+
result = qa_chain({"question": question, "chat_history": chat_history})
|
15 |
+
chat_history.append((question, result["answer"]))
|
16 |
+
print("AI:")
|
17 |
+
print(result["answer"])
|
ingest_data.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
+
from langchain.document_loaders import UnstructuredFileLoader
|
3 |
+
from langchain.vectorstores.faiss import FAISS
|
4 |
+
from langchain.embeddings import OpenAIEmbeddings
|
5 |
+
import pickle
|
6 |
+
|
7 |
+
# Load Data
|
8 |
+
loader = UnstructuredFileLoader("state_of_the_union.txt")
|
9 |
+
raw_documents = loader.load()
|
10 |
+
|
11 |
+
# Split text
|
12 |
+
text_splitter = RecursiveCharacterTextSplitter()
|
13 |
+
documents = text_splitter.split_documents(raw_documents)
|
14 |
+
|
15 |
+
|
16 |
+
# Load Data to vectorstore
|
17 |
+
embeddings = OpenAIEmbeddings()
|
18 |
+
vectorstore = FAISS.from_documents(documents, embeddings)
|
19 |
+
|
20 |
+
|
21 |
+
# Save vectorstore
|
22 |
+
with open("vectorstore.pkl", "wb") as f:
|
23 |
+
pickle.dump(vectorstore, f)
|
query_data.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts.prompt import PromptTemplate
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
from langchain.chains import ChatVectorDBChain
|
4 |
+
|
5 |
+
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
|
6 |
+
You can assume the question about the most recent state of the union address.
|
7 |
+
|
8 |
+
Chat History:
|
9 |
+
{chat_history}
|
10 |
+
Follow Up Input: {question}
|
11 |
+
Standalone question:"""
|
12 |
+
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
13 |
+
|
14 |
+
template = """You are an AI assistant for answering questions about the most recent state of the union address.
|
15 |
+
You are given the following extracted parts of a long document and a question. Provide a conversational answer.
|
16 |
+
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
|
17 |
+
If the question is not about the most recent state of the union, politely inform them that you are tuned to only answer questions about the most recent state of the union.
|
18 |
+
Question: {question}
|
19 |
+
=========
|
20 |
+
{context}
|
21 |
+
=========
|
22 |
+
Answer in Markdown:"""
|
23 |
+
QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
|
24 |
+
|
25 |
+
|
26 |
+
def get_chain(vectorstore):
|
27 |
+
llm = OpenAI(temperature=0)
|
28 |
+
qa_chain = ChatVectorDBChain.from_llm(
|
29 |
+
llm,
|
30 |
+
vectorstore,
|
31 |
+
qa_prompt=QA_PROMPT,
|
32 |
+
condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
33 |
+
)
|
34 |
+
return qa_chain
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
openai
|
3 |
+
unstructured
|
4 |
+
faiss-cpu
|
5 |
+
gradio
|
vectorstore.pkl
ADDED
Binary file (103 kB). View file
|
|