Spaces:

DexterSptizu
/

langchain-RAG-pdf

Running

App Files Files Community

DexterSptizu commited on 2 days ago

Commit

b77cac2

•

1 Parent(s): 77ea126

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -61

app.py CHANGED Viewed

@@ -1,23 +1,19 @@
 import gradio as gr
-from langchain.vectorstores import Chroma
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.schema import Document
-from langchain.chat_models import ChatOpenAI
-from langchain.prompts.chat import ChatPromptTemplate
 from PyPDF2 import PdfReader
 import os
 # Function to process the uploaded PDF and convert it to documents
 def pdf_to_documents(pdf_file):
-    try:
-        reader = PdfReader(pdf_file.name)
-        pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()]
-        documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
-        if not documents:
-            raise ValueError("The uploaded PDF is empty or could not be processed.")
-        return documents
-    except Exception as e:
-        raise ValueError(f"Failed to process the PDF: {str(e)}")
 # Initialize vector store
 def initialize_vectorstore(documents, api_key):
@@ -26,64 +22,50 @@ def initialize_vectorstore(documents, api_key):
     vectorstore = Chroma.from_documents(documents, embedding=embeddings)
     return vectorstore
-# RAG retrieval and LLM chain for FAQ Bot
 def rag_from_pdf(question, pdf_file, api_key):
-    if not question.strip():
-        return "Please enter a question."
-    if not pdf_file:
-        return "Please upload a valid PDF file."
-    if not api_key.strip():
-        return "Please enter your OpenAI API key."
-    try:
-        # Process the PDF into documents
-        documents = pdf_to_documents(pdf_file)
-        # Initialize vectorstore
-        vectorstore = initialize_vectorstore(documents, api_key)
-        retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
-        # Initialize the LLM
-        llm = ChatOpenAI(model="gpt-3.5-turbo")
-        # Create a prompt template for combining context and question
-        prompt_template = """
-        You are a helpful assistant answering questions based on the provided PDF document.
-        Only use the given context to answer the question.
-        Question: {question}
-        Context: {context}
-        """
-        prompt = ChatPromptTemplate.from_template(prompt_template)
-        # Retrieve relevant documents
-        retrieved_docs = retriever.get_relevant_documents(question)
-        context = "\n".join([doc.page_content for doc in retrieved_docs])
-        # Generate response using the LLM
-        if not context.strip():
-            return "No relevant information found in the document to answer the question."
-        formatted_prompt = prompt.format(question=question, context=context)
-        response = llm(completion=formatted_prompt)
-        return response.strip()
-    except Exception as e:
-        return f"An error occurred: {str(e)}"
 # Gradio interface
 with gr.Blocks() as app:
-    gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File")
     # Input for OpenAI API Key
-    api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...")
     # File upload for the PDF
-    pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"])
     # Question input
-    question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...")
     # Output for the RAG response
-    rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...")
     # Button to run RAG chain
     rag_button = gr.Button("Ask Question")
@@ -92,4 +74,4 @@ with gr.Blocks() as app:
     rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
 # Launch Gradio app
-app.launch()

 import gradio as gr
+from langchain_chroma import Chroma
+from langchain_openai import OpenAIEmbeddings
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
 from PyPDF2 import PdfReader
 import os
 # Function to process the uploaded PDF and convert it to documents
 def pdf_to_documents(pdf_file):
+    reader = PdfReader(pdf_file.name)
+    pages = [page.extract_text() for page in reader.pages]
+    documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
+    return documents
 # Initialize vector store
 def initialize_vectorstore(documents, api_key):
     vectorstore = Chroma.from_documents(documents, embedding=embeddings)
     return vectorstore
+# RAG retrieval and LLM chain
 def rag_from_pdf(question, pdf_file, api_key):
+    documents = pdf_to_documents(pdf_file)
+    vectorstore = initialize_vectorstore(documents, api_key)
+    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})  # Retrieve top 2 relevant sections
+    # Initialize the LLM
+    llm = ChatOpenAI(model="gpt-3.5-turbo")
+    # Create a prompt template for combining context and question
+    prompt_template = """
+    Answer this question using the provided context only.
+    {question}
+    Context:
+    {context}
+    """
+    prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
+    # Create a RAG chain combining retriever and LLM
+    rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
+    # Perform retrieval and return LLM's answer
+    response = rag_chain.invoke(question)
+    return response.content
 # Gradio interface
 with gr.Blocks() as app:
+    gr.Markdown("## PDF-based Question Answering with RAG")
     # Input for OpenAI API Key
+    api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
     # File upload for the PDF
+    pdf_file_input = gr.File(label="Upload your PDF document")
     # Question input
+    question_input = gr.Textbox(label="Ask a question related to the PDF")
     # Output for the RAG response
+    rag_output = gr.Textbox(label="Generated Response", lines=10)
     # Button to run RAG chain
     rag_button = gr.Button("Ask Question")
     rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
 # Launch Gradio app
+app.launch()