import os import openai os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["OPENAI_API_KEY"] def save_docs(docs): import shutil import os output_dir = "/home/user/app/docs/" if os.path.exists(output_dir): shutil.rmtree(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) for doc in docs: shutil.copy(doc.name, output_dir) return "Successful!" def process_docs(): from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import DirectoryLoader from langchain.document_loaders import TextLoader from langchain.document_loaders import Docx2txtLoader from langchain.document_loaders.csv_loader import CSVLoader from langchain.document_loaders import UnstructuredExcelLoader from langchain.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter loader1 = DirectoryLoader( "/home/user/app/docs/", glob="./*.pdf", loader_cls=PyPDFLoader ) document1 = loader1.load() loader2 = DirectoryLoader( "/home/user/app/docs/", glob="./*.txt", loader_cls=TextLoader ) document2 = loader2.load() loader3 = DirectoryLoader( "/home/user/app/docs/", glob="./*.docx", loader_cls=Docx2txtLoader ) document3 = loader3.load() loader4 = DirectoryLoader( "/home/user/app/docs/", glob="./*.csv", loader_cls=CSVLoader ) document4 = loader4.load() loader5 = DirectoryLoader( "/home/user/app/docs/", glob="./*.xlsx", loader_cls=UnstructuredExcelLoader ) document5 = loader5.load() document1.extend(document2) document1.extend(document3) document1.extend(document4) document1.extend(document5) text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) docs = text_splitter.split_documents(document1) embeddings = OpenAIEmbeddings() docs_db = FAISS.from_documents(docs, embeddings) docs_db.save_local("/home/user/app/docs_db/") return "Successful!" global agent def create_agent(): from langchain_openai import ChatOpenAI from langchain.chains.conversation.memory import ConversationSummaryBufferMemory from langchain.chains import ConversationChain global agent llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k") memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000) agent = ConversationChain(llm=llm, memory=memory, verbose=True) return "Successful!" def formatted_response(docs, question, response, state): formatted_output = response + "\n\nSources" for i, doc in enumerate(docs): source_info = doc.metadata.get("source", "Unknown source") page_info = doc.metadata.get("page", None) doc_name = source_info.split("/")[-1].strip() if page_info is not None: formatted_output += f"\n{doc_name}\tpage no {page_info}" else: formatted_output += f"\n{doc_name}" state.append((question, formatted_output)) return state, state def search_docs(prompt, question, state): from langchain_openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.callbacks import get_openai_callback global agent agent = agent state = state or [] embeddings = OpenAIEmbeddings() docs_db = FAISS.load_local( "/home/user/app/docs_db/", embeddings, allow_dangerous_deserialization=True ) docs = docs_db.similarity_search(question) prompt += "\n\n" prompt += question prompt += "\n\n" prompt += str(docs) with get_openai_callback() as cb: response = agent.predict(input=prompt) print(cb) return formatted_response(docs, question, response, state) import gradio as gr css = """ .col{ max-width: 75%; margin: 0 auto; display: flex; flex-direction: column; justify-content: center; align-items: center; } """ with gr.Blocks(css=css) as demo: gr.Markdown("##