Spaces:

DexterSptizu
/

langchain-openai-smart-faq-bot

Running

App Files Files Community

langchain-openai-smart-faq-bot / app.py

DexterSptizu

Update app.py

3771898 verified 5 days ago

raw

history blame contribute delete

3.61 kB

	import gradio as gr
	from langchain.vectorstores import Chroma
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.schema import Document
	from langchain.chat_models import ChatOpenAI
	from langchain.schema import SystemMessage, HumanMessage
	from PyPDF2 import PdfReader
	import os

	# Function to process the uploaded PDF and convert it to documents
	def pdf_to_documents(pdf_file):
	try:
	reader = PdfReader(pdf_file.name)
	pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()]
	documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
	if not documents:
	raise ValueError("The uploaded PDF is empty or could not be processed.")
	return documents
	except Exception as e:
	raise ValueError(f"Failed to process the PDF: {str(e)}")

	# Initialize vector store
	def initialize_vectorstore(documents, api_key):
	os.environ["OPENAI_API_KEY"] = api_key
	embeddings = OpenAIEmbeddings()
	vectorstore = Chroma.from_documents(documents, embedding=embeddings)
	return vectorstore

	# RAG retrieval and LLM chain for FAQ Bot
	def rag_from_pdf(question, pdf_file, api_key):
	if not question.strip():
	return "Please enter a question."
	if not pdf_file:
	return "Please upload a valid PDF file."
	if not api_key.strip():
	return "Please enter your OpenAI API key."

	try:
	# Process the PDF into documents
	documents = pdf_to_documents(pdf_file)

	# Initialize vectorstore
	vectorstore = initialize_vectorstore(documents, api_key)
	retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

	# Retrieve relevant documents
	retrieved_docs = retriever.get_relevant_documents(question)
	context = "\n".join([doc.page_content for doc in retrieved_docs])

	# Check if there's relevant context
	if not context.strip():
	return "No relevant information found in the document to answer the question."

	# Initialize the LLM
	llm = ChatOpenAI(model="gpt-3.5-turbo")

	# Create the chat messages
	messages = [
	SystemMessage(content="You are a helpful assistant answering questions based on the provided PDF document."),
	HumanMessage(content=f"Question: {question}\n\nContext: {context}")
	]

	# Generate response
	response = llm(messages=messages)
	return response.content.strip()
	except Exception as e:
	return f"An error occurred: {str(e)}"

	# Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File")

	# Input for OpenAI API Key
	api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...")

	# File upload for the PDF
	pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"])

	# Question input
	question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...")

	# Output for the RAG response
	rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...")

	# Button to run RAG chain
	rag_button = gr.Button("Ask Question")

	# Functionality for the RAG chain
	rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)

	# Launch Gradio app
	app.launch()