Spaces:

DexterSptizu
/

langchain-RAG-pdf

Running

File size: 2,656 Bytes

import gradio as gr
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from PyPDF2 import PdfReader
import os

# Function to process the uploaded PDF and convert it to documents
def pdf_to_documents(pdf_file):
    reader = PdfReader(pdf_file.name)
    pages = [page.extract_text() for page in reader.pages]
    documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
    return documents

# Initialize vector store
def initialize_vectorstore(documents, api_key):
    os.environ["OPENAI_API_KEY"] = api_key
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(documents, embedding=embeddings)
    return vectorstore

# RAG retrieval and LLM chain
def rag_from_pdf(question, pdf_file, api_key):
    documents = pdf_to_documents(pdf_file)
    vectorstore = initialize_vectorstore(documents, api_key)
    
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})  # Retrieve top 2 relevant sections
    
    # Initialize the LLM
    llm = ChatOpenAI(model="gpt-3.5-turbo")
    
    # Create a prompt template for combining context and question
    prompt_template = """
    Answer this question using the provided context only.

    {question}

    Context:
    {context}
    """
    
    prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
    
    # Create a RAG chain combining retriever and LLM
    rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
    
    # Perform retrieval and return LLM's answer
    response = rag_chain.invoke(question)
    return response.content

# Gradio interface
with gr.Blocks() as app:
    gr.Markdown("## PDF-based Question Answering with RAG")

    # Input for OpenAI API Key
    api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
    
    # File upload for the PDF
    pdf_file_input = gr.File(label="Upload your PDF document")
    
    # Question input
    question_input = gr.Textbox(label="Ask a question related to the PDF")
    
    # Output for the RAG response
    rag_output = gr.Textbox(label="Generated Response", lines=10)
    
    # Button to run RAG chain
    rag_button = gr.Button("Ask Question")
    
    # Functionality for the RAG chain
    rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)

# Launch Gradio app
app.launch()