Spaces:

DexterSptizu
/

langchain-RAG-pdf

Running

App Files Files Community

DexterSptizu commited on 2 days ago

Commit

77ea126

•

1 Parent(s): 9105364

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -42

app.py CHANGED Viewed

@@ -1,19 +1,23 @@
 import gradio as gr
-from langchain_chroma import Chroma
-from langchain_openai import OpenAIEmbeddings
-from langchain_core.documents import Document
-from langchain_openai import ChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnablePassthrough
 from PyPDF2 import PdfReader
 import os
 # Function to process the uploaded PDF and convert it to documents
 def pdf_to_documents(pdf_file):
-    reader = PdfReader(pdf_file.name)
-    pages = [page.extract_text() for page in reader.pages]
-    documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
-    return documents
 # Initialize vector store
 def initialize_vectorstore(documents, api_key):
@@ -22,50 +26,64 @@ def initialize_vectorstore(documents, api_key):
     vectorstore = Chroma.from_documents(documents, embedding=embeddings)
     return vectorstore
-# RAG retrieval and LLM chain
 def rag_from_pdf(question, pdf_file, api_key):
-    documents = pdf_to_documents(pdf_file)
-    vectorstore = initialize_vectorstore(documents, api_key)
-    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})  # Retrieve top 2 relevant sections
-    # Initialize the LLM
-    llm = ChatOpenAI(model="gpt-3.5-turbo")
-    # Create a prompt template for combining context and question
-    prompt_template = """
-    Answer this question using the provided context only.
-    {question}
-    Context:
-    {context}
-    """
-    prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
-    # Create a RAG chain combining retriever and LLM
-    rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
-    # Perform retrieval and return LLM's answer
-    response = rag_chain.invoke(question)
-    return response.content
 # Gradio interface
 with gr.Blocks() as app:
-    gr.Markdown("## PDF-based Question Answering with RAG")
     # Input for OpenAI API Key
-    api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
     # File upload for the PDF
-    pdf_file_input = gr.File(label="Upload your PDF document")
     # Question input
-    question_input = gr.Textbox(label="Ask a question related to the PDF")
     # Output for the RAG response
-    rag_output = gr.Textbox(label="Generated Response", lines=10)
     # Button to run RAG chain
     rag_button = gr.Button("Ask Question")

 import gradio as gr
+from langchain.vectorstores import Chroma
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.schema import Document
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import ChatPromptTemplate
 from PyPDF2 import PdfReader
 import os
 # Function to process the uploaded PDF and convert it to documents
 def pdf_to_documents(pdf_file):
+    try:
+        reader = PdfReader(pdf_file.name)
+        pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()]
+        documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
+        if not documents:
+            raise ValueError("The uploaded PDF is empty or could not be processed.")
+        return documents
+    except Exception as e:
+        raise ValueError(f"Failed to process the PDF: {str(e)}")
 # Initialize vector store
 def initialize_vectorstore(documents, api_key):
     vectorstore = Chroma.from_documents(documents, embedding=embeddings)
     return vectorstore
+# RAG retrieval and LLM chain for FAQ Bot
 def rag_from_pdf(question, pdf_file, api_key):
+    if not question.strip():
+        return "Please enter a question."
+    if not pdf_file:
+        return "Please upload a valid PDF file."
+    if not api_key.strip():
+        return "Please enter your OpenAI API key."
+    try:
+        # Process the PDF into documents
+        documents = pdf_to_documents(pdf_file)
+        # Initialize vectorstore
+        vectorstore = initialize_vectorstore(documents, api_key)
+        retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
+        # Initialize the LLM
+        llm = ChatOpenAI(model="gpt-3.5-turbo")
+        # Create a prompt template for combining context and question
+        prompt_template = """
+        You are a helpful assistant answering questions based on the provided PDF document.
+        Only use the given context to answer the question.
+        Question: {question}
+        Context: {context}
+        """
+        prompt = ChatPromptTemplate.from_template(prompt_template)
+        # Retrieve relevant documents
+        retrieved_docs = retriever.get_relevant_documents(question)
+        context = "\n".join([doc.page_content for doc in retrieved_docs])
+        # Generate response using the LLM
+        if not context.strip():
+            return "No relevant information found in the document to answer the question."
+        formatted_prompt = prompt.format(question=question, context=context)
+        response = llm(completion=formatted_prompt)
+        return response.strip()
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
 # Gradio interface
 with gr.Blocks() as app:
+    gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File")
     # Input for OpenAI API Key
+    api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...")
     # File upload for the PDF
+    pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"])
     # Question input
+    question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...")
     # Output for the RAG response
+    rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...")
     # Button to run RAG chain
     rag_button = gr.Button("Ask Question")