Spaces:
Runtime error
Runtime error
import os | |
import openai | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
os.environ["OPENAI_API_KEY"] | |
def save_docs(docs): | |
import shutil | |
import os | |
output_dir = "/home/user/app/docs/" | |
if os.path.exists(output_dir): | |
shutil.rmtree(output_dir) | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
for doc in docs: | |
shutil.copy(doc.name, output_dir) | |
return "Successful!" | |
def process_docs(): | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.document_loaders import DirectoryLoader | |
from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import Docx2txtLoader | |
from langchain.document_loaders.csv_loader import CSVLoader | |
from langchain.document_loaders import UnstructuredExcelLoader | |
from langchain.vectorstores import FAISS | |
from langchain_openai import OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
loader1 = DirectoryLoader( | |
"/home/user/app/docs/", glob="./*.pdf", loader_cls=PyPDFLoader | |
) | |
document1 = loader1.load() | |
loader2 = DirectoryLoader( | |
"/home/user/app/docs/", glob="./*.txt", loader_cls=TextLoader | |
) | |
document2 = loader2.load() | |
loader3 = DirectoryLoader( | |
"/home/user/app/docs/", glob="./*.docx", loader_cls=Docx2txtLoader | |
) | |
document3 = loader3.load() | |
loader4 = DirectoryLoader( | |
"/home/user/app/docs/", glob="./*.csv", loader_cls=CSVLoader | |
) | |
document4 = loader4.load() | |
loader5 = DirectoryLoader( | |
"/home/user/app/docs/", glob="./*.xlsx", loader_cls=UnstructuredExcelLoader | |
) | |
document5 = loader5.load() | |
document1.extend(document2) | |
document1.extend(document3) | |
document1.extend(document4) | |
document1.extend(document5) | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, chunk_overlap=200, length_function=len | |
) | |
docs = text_splitter.split_documents(document1) | |
embeddings = OpenAIEmbeddings() | |
docs_db = FAISS.from_documents(docs, embeddings) | |
docs_db.save_local("/home/user/app/docs_db/") | |
return "Successful!" | |
global agent | |
def create_agent(): | |
from langchain_openai import ChatOpenAI | |
from langchain.chains.conversation.memory import ConversationSummaryBufferMemory | |
from langchain.chains import ConversationChain | |
global agent | |
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k") | |
memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000) | |
agent = ConversationChain(llm=llm, memory=memory, verbose=True) | |
return "Successful!" | |
def formatted_response(docs, question, response, state): | |
formatted_output = response + "\n\nSources" | |
for i, doc in enumerate(docs): | |
source_info = doc.metadata.get("source", "Unknown source") | |
page_info = doc.metadata.get("page", None) | |
doc_name = source_info.split("/")[-1].strip() | |
if page_info is not None: | |
formatted_output += f"\n{doc_name}\tpage no {page_info}" | |
else: | |
formatted_output += f"\n{doc_name}" | |
state.append((question, formatted_output)) | |
return state, state | |
def search_docs(prompt, question, state): | |
from langchain_openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.callbacks import get_openai_callback | |
global agent | |
agent = agent | |
state = state or [] | |
embeddings = OpenAIEmbeddings() | |
docs_db = FAISS.load_local( | |
"/home/user/app/docs_db/", embeddings, allow_dangerous_deserialization=True | |
) | |
docs = docs_db.similarity_search(question) | |
prompt += "\n\n" | |
prompt += question | |
prompt += "\n\n" | |
prompt += str(docs) | |
with get_openai_callback() as cb: | |
response = agent.predict(input=prompt) | |
print(cb) | |
return formatted_response(docs, question, response, state) | |
import gradio as gr | |
css = """ | |
.col{ | |
max-width: 75%; | |
margin: 0 auto; | |
display: flex; | |
flex-direction: column; | |
justify-content: center; | |
align-items: center; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown("## <center>Your AI Medical Assistant</center>") | |
with gr.Tab("Your AI Medical Assistant"): | |
with gr.Column(elem_classes="col"): | |
with gr.Tab("Upload and Process Documents"): | |
with gr.Column(): | |
docs_upload_input = gr.Files(label="Upload File(s)") | |
docs_upload_button = gr.Button("Upload") | |
docs_upload_output = gr.Textbox(label="Output") | |
docs_process_button = gr.Button("Process") | |
docs_process_output = gr.Textbox(label="Output") | |
create_agent_button = gr.Button("Create Agent") | |
create_agent_output = gr.Textbox(label="Output") | |
gr.ClearButton( | |
[ | |
docs_upload_input, | |
docs_upload_output, | |
docs_process_output, | |
create_agent_output, | |
] | |
) | |
with gr.Tab("Query Documents"): | |
with gr.Column(): | |
docs_prompt_input = gr.Textbox(label="Custom Prompt") | |
docs_chatbot = gr.Chatbot(label="Chats") | |
docs_state = gr.State() | |
docs_search_input = gr.Textbox(label="Question") | |
docs_search_button = gr.Button("Search") | |
gr.ClearButton([docs_prompt_input, docs_search_input]) | |
######################################################################################################### | |
docs_upload_button.click( | |
save_docs, inputs=docs_upload_input, outputs=docs_upload_output | |
) | |
docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output) | |
create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output) | |
docs_search_button.click( | |
search_docs, | |
inputs=[docs_prompt_input, docs_search_input, docs_state], | |
outputs=[docs_chatbot, docs_state], | |
) | |
######################################################################################################### | |
demo.queue() | |
demo.launch() | |