DexterSptizu's picture
Update app.py
b77cac2 verified
import gradio as gr
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from PyPDF2 import PdfReader
import os
# Function to process the uploaded PDF and convert it to documents
def pdf_to_documents(pdf_file):
reader = PdfReader(pdf_file.name)
pages = [page.extract_text() for page in reader.pages]
documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
return documents
# Initialize vector store
def initialize_vectorstore(documents, api_key):
os.environ["OPENAI_API_KEY"] = api_key
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embedding=embeddings)
return vectorstore
# RAG retrieval and LLM chain
def rag_from_pdf(question, pdf_file, api_key):
documents = pdf_to_documents(pdf_file)
vectorstore = initialize_vectorstore(documents, api_key)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections
# Initialize the LLM
llm = ChatOpenAI(model="gpt-3.5-turbo")
# Create a prompt template for combining context and question
prompt_template = """
Answer this question using the provided context only.
{question}
Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
# Create a RAG chain combining retriever and LLM
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
# Perform retrieval and return LLM's answer
response = rag_chain.invoke(question)
return response.content
# Gradio interface
with gr.Blocks() as app:
gr.Markdown("## PDF-based Question Answering with RAG")
# Input for OpenAI API Key
api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
# File upload for the PDF
pdf_file_input = gr.File(label="Upload your PDF document")
# Question input
question_input = gr.Textbox(label="Ask a question related to the PDF")
# Output for the RAG response
rag_output = gr.Textbox(label="Generated Response", lines=10)
# Button to run RAG chain
rag_button = gr.Button("Ask Question")
# Functionality for the RAG chain
rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
# Launch Gradio app
app.launch()