DexterSptizu's picture
Update app.py
3771898 verified
import gradio as gr
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from PyPDF2 import PdfReader
import os
# Function to process the uploaded PDF and convert it to documents
def pdf_to_documents(pdf_file):
try:
reader = PdfReader(pdf_file.name)
pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()]
documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
if not documents:
raise ValueError("The uploaded PDF is empty or could not be processed.")
return documents
except Exception as e:
raise ValueError(f"Failed to process the PDF: {str(e)}")
# Initialize vector store
def initialize_vectorstore(documents, api_key):
os.environ["OPENAI_API_KEY"] = api_key
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embedding=embeddings)
return vectorstore
# RAG retrieval and LLM chain for FAQ Bot
def rag_from_pdf(question, pdf_file, api_key):
if not question.strip():
return "Please enter a question."
if not pdf_file:
return "Please upload a valid PDF file."
if not api_key.strip():
return "Please enter your OpenAI API key."
try:
# Process the PDF into documents
documents = pdf_to_documents(pdf_file)
# Initialize vectorstore
vectorstore = initialize_vectorstore(documents, api_key)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
# Retrieve relevant documents
retrieved_docs = retriever.get_relevant_documents(question)
context = "\n".join([doc.page_content for doc in retrieved_docs])
# Check if there's relevant context
if not context.strip():
return "No relevant information found in the document to answer the question."
# Initialize the LLM
llm = ChatOpenAI(model="gpt-3.5-turbo")
# Create the chat messages
messages = [
SystemMessage(content="You are a helpful assistant answering questions based on the provided PDF document."),
HumanMessage(content=f"Question: {question}\n\nContext: {context}")
]
# Generate response
response = llm(messages=messages)
return response.content.strip()
except Exception as e:
return f"An error occurred: {str(e)}"
# Gradio interface
with gr.Blocks() as app:
gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File")
# Input for OpenAI API Key
api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...")
# File upload for the PDF
pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"])
# Question input
question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...")
# Output for the RAG response
rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...")
# Button to run RAG chain
rag_button = gr.Button("Ask Question")
# Functionality for the RAG chain
rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
# Launch Gradio app
app.launch()