Spaces:
Sleeping
Sleeping
File size: 4,975 Bytes
3dff4cb e9840df 3dff4cb e9840df 6814430 e9840df 3dff4cb e9840df 6814430 e9840df 446dbbb 6814430 e9840df 6814430 3dff4cb 6814430 e9840df 6814430 e9840df 6814430 e9840df 6814430 e9840df be312e0 6814430 446dbbb 571b70a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import gradio as gr
from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
DEVICE = 'cpu '
FILE_EXT = ['pdf','text','csv','word','wav']
def loading_file():
return "Loading..."
def get_openai_chat_model(API_key):
try:
from langchain.llms import OpenAI
except ImportError as err:
raise "{}, unable to load openAI. Please install openai and add OPENAIAPI_KEY"
os.environ["OPENAI_API_KEY"] = API_key
llm = OpenAI()
return llm
def process_documents(documents,data_chunk=1000,chunk_overlap=50):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap)
texts = text_splitter.split_documents(documents[0])
return texts
def get_hugging_face_model(model_id,API_key,temperature=0.1):
chat_llm = HuggingFaceHub(huggingfacehub_api_token=API_key,
repo_id=model_id,
model_kwargs={"temperature": temperature, "max_new_tokens": 2048})
return chat_llm
def chat_application(llm_model,key):
if llm_model == 'HuggingFace':
llm = get_hugging_face_model(model_id='tiiuae/falcon-7b-instruct',API_key=key)
else:
llm_model = get_openai_chat_model(API_key=key)
def document_loader(file_data,doc_type='pdf',key=None):
embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
document = None
if doc_type == 'pdf':
document = process_pdf_document(document_file_name=file_data)
elif doc_type == 'text':
document = process_text_document(document_file_name=file_data)
elif doc_type == 'csv':
document = process_csv_document(document_file_name=file_data)
elif doc_type == 'word':
document = process_word_document(document_file_name=file_data)
if document:
texts = process_documents(documents=document)
global vectordb
vectordb = FAISS.from_documents(documents=texts, embedding= embedding_model)
else:
return "Error in loading Documents "
return "Document loaded - Embeddings ready "
def process_text_document(document_file_name):
loader = TextLoader(document_file_name)
document = loader.load()
return document
def process_csv_document(document_file_name):
loader = CSVLoader(file_path=document_file_name)
document = loader.load()
return document
def process_word_document(document_file_name):
loader = UnstructuredWordDocumentLoader(file_path=document_file_name)
document = loader.load()
return document
def process_pdf_document(document_file_name):
loader = PDFMinerLoader(document_file_name)
document = loader.load()[0]
return document
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
title = """
<div style="text-align: center;max-width: 700px;">
<h1>Chat with Data • OpenAI/HuggingFace</h1>
<p style="text-align: center;">Upload a file from your computer, click the "Load data to LangChain" button, <br />
when everything is ready, you can start asking questions about the data you uploaded ;) <br />
This version is just for QA retrival so it will not use chat history, and uses Hugging face as LLM,
so you don't need any key</p>
</div>
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
with gr.Column():
with gr.Box():
LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='Large Language Model Selection',info='LLM Service')
API_key = gr.Textbox(label="Add {} API key".format(LLM_option), type="password")
with gr.Column():
with gr.row():
file_extension = gr.Dropdown(FILE_EXT, label="File Extensions", info="Select your files extensions!")
pdf_doc = gr.File(label="Upload File to start QA", file_types=FILE_EXT, type="file")
with gr.Row():
load_pdf = gr.Button("Load file to langchain")
langchain_status = gr.Textbox(label="Status", placeholder="", interactive=True)
chatbot = gr.Chatbot()
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
submit_button = gr.Button("Send Message")
load_pdf.click(loading_file, None, langchain_status, queue=False)
load_pdf.click(document_loader, inputs=[pdf_doc,file_extension,API_key], outputs=[langchain_status], queue=False)
# question.submit(add_text, [chatbot, question], [chatbot, question]).then(
# bot, chatbot, chatbot
# )
demo.launch() |