Spaces:

DexterSptizu
/

langchain-RAG-pdf

Running

App Files Files Community

DexterSptizu commited on Oct 20, 2024

Commit

c647394

verified ·

1 Parent(s): 0bc46ee

Create app.py

Browse files

Files changed (1) hide show

app.py +77 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import gradio as gr
+from langchain_chroma import Chroma
+from langchain_openai import OpenAIEmbeddings
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from PyPDF2 import PdfReader
+import os
+# Function to process the uploaded PDF and convert it to documents
+def pdf_to_documents(pdf_file):
+    reader = PdfReader(pdf_file.name)
+    pages = [page.extract_text() for page in reader.pages]
+    documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
+    return documents
+# Initialize vector store
+def initialize_vectorstore(documents, api_key):
+    os.environ["OPENAI_API_KEY"] = api_key
+    embeddings = OpenAIEmbeddings()
+    vectorstore = Chroma.from_documents(documents, embedding=embeddings)
+    return vectorstore
+# RAG retrieval and LLM chain
+def rag_from_pdf(question, pdf_file, api_key):
+    documents = pdf_to_documents(pdf_file)
+    vectorstore = initialize_vectorstore(documents, api_key)
+    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})  # Retrieve top 2 relevant sections
+    # Initialize the LLM
+    llm = ChatOpenAI(model="gpt-3.5-turbo")
+    # Create a prompt template for combining context and question
+    prompt_template = """
+    Answer this question using the provided context only.
+    {question}
+    Context:
+    {context}
+    """
+    prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
+    # Create a RAG chain combining retriever and LLM
+    rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
+    # Perform retrieval and return LLM's answer
+    response = rag_chain.invoke(question)
+    return response.content
+# Gradio interface
+with gr.Blocks() as app:
+    gr.Markdown("## PDF-based Question Answering with RAG")
+    # Input for OpenAI API Key
+    api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
+    # File upload for the PDF
+    pdf_file_input = gr.File(label="Upload your PDF document")
+    # Question input
+    question_input = gr.Textbox(label="Ask a question related to the PDF")
+    # Output for the RAG response
+    rag_output = gr.Textbox(label="Generated Response", lines=10)
+    # Button to run RAG chain
+    rag_button = gr.Button("Ask Question")
+    # Functionality for the RAG chain
+    rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
+# Launch Gradio app
+app.launch()