Spaces:
Runtime error
Runtime error
import os | |
import openai | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
os.environ["OPENAI_API_KEY"] | |
def save_docs(docs): | |
import shutil | |
import os | |
output_dir="/home/user/app/docs/" | |
if os.path.exists(output_dir): | |
shutil.rmtree(output_dir) | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
for doc in docs: | |
shutil.copy(doc.name, output_dir) | |
return "Successful!" | |
def process_docs(): | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.document_loaders import DirectoryLoader | |
from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import Docx2txtLoader | |
from langchain.document_loaders.csv_loader import CSVLoader | |
from langchain.document_loaders import UnstructuredExcelLoader | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
loader1 = DirectoryLoader('/home/user/app/docs/', glob="./*.pdf", loader_cls=PyPDFLoader) | |
document1 = loader1.load() | |
loader2 = DirectoryLoader('/home/user/app/docs/', glob="./*.txt", loader_cls=TextLoader) | |
document2 = loader2.load() | |
loader3 = DirectoryLoader('/home/user/app/docs/', glob="./*.docx", loader_cls=Docx2txtLoader) | |
document3 = loader3.load() | |
loader4 = DirectoryLoader('/home/user/app/docs/', glob="./*.csv", loader_cls=CSVLoader) | |
document4 = loader4.load() | |
loader5 = DirectoryLoader('/home/user/app/docs/', glob="./*.xlsx", loader_cls=UnstructuredExcelLoader) | |
document5 = loader5.load() | |
document1.extend(document2) | |
document1.extend(document3) | |
document1.extend(document4) | |
document1.extend(document5) | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=200, | |
length_function=len | |
) | |
docs = text_splitter.split_documents(document1) | |
embeddings = OpenAIEmbeddings() | |
docs_db = FAISS.from_documents(docs, embeddings) | |
docs_db.save_local("/home/user/app/docs_db/") | |
return "Successful!" | |
global agent | |
def create_agent(): | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains.conversation.memory import ConversationSummaryBufferMemory | |
from langchain.chains import ConversationChain | |
global agent | |
llm = ChatOpenAI(model_name='gpt-3.5-turbo-16k') | |
memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000) | |
agent = ConversationChain(llm=llm, memory=memory, verbose=True) | |
return "Successful!" | |
def formatted_response(docs, question, response, state): | |
formatted_output = response + "\n\nSources" | |
for i, doc in enumerate(docs): | |
source_info = doc.metadata.get('source', 'Unknown source') | |
page_info = doc.metadata.get('page', None) | |
doc_name = source_info.split('/')[-1].strip() | |
if page_info is not None: | |
formatted_output += f"\n{doc_name}\tpage no {page_info}" | |
else: | |
formatted_output += f"\n{doc_name}" | |
state.append((question, formatted_output)) | |
return state, state | |
def search_docs(prompt, question, state): | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.callbacks import get_openai_callback | |
global agent | |
agent = agent | |
state = state or [] | |
embeddings = OpenAIEmbeddings() | |
docs_db = FAISS.load_local("/home/user/app/docs_db/", embeddings) | |
docs = docs_db.similarity_search(question) | |
prompt += "\n\n" | |
prompt += question | |
prompt += "\n\n" | |
prompt += str(docs) | |
with get_openai_callback() as cb: | |
response = agent.predict(input=prompt) | |
print(cb) | |
return formatted_response(docs, question, response, state) | |
import gradio as gr | |
css = """ | |
.col{ | |
max-width: 75%; | |
margin: 0 auto; | |
display: flex; | |
flex-direction: column; | |
justify-content: center; | |
align-items: center; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown("## <center>All in One Document Chatting App</center>") | |
with gr.Tab("Chat With Your Documents"): | |
with gr.Column(elem_classes="col"): | |
with gr.Tab("Upload and Process Documents"): | |
with gr.Column(): | |
docs_upload_input = gr.Files(label="Upload File(s)") | |
docs_upload_button = gr.Button("Upload") | |
docs_upload_output = gr.Textbox(label="Output") | |
docs_process_button = gr.Button("Process") | |
docs_process_output = gr.Textbox(label="Output") | |
create_agent_button = gr.Button("Create Agent") | |
create_agent_output = gr.Textbox(label="Output") | |
gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output, create_agent_output]) | |
with gr.Tab("Query Documents"): | |
with gr.Column(): | |
docs_prompt_input = gr.Textbox(label="Custom Prompt") | |
docs_chatbot = gr.Chatbot(label="Chats") | |
docs_state = gr.State() | |
docs_search_input = gr.Textbox(label="Question") | |
docs_search_button = gr.Button("Search") | |
gr.ClearButton([docs_prompt_input, docs_search_input]) | |
######################################################################################################### | |
docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output) | |
docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output) | |
create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output) | |
docs_search_button.click(search_docs, inputs=[docs_prompt_input, docs_search_input, docs_state], outputs=[docs_chatbot, docs_state]) | |
######################################################################################################### | |
demo.queue() | |
demo.launch() |