|
import gradio as gr |
|
from langchain.vectorstores import Chroma |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.schema import Document |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.schema import SystemMessage, HumanMessage |
|
from PyPDF2 import PdfReader |
|
import os |
|
|
|
|
|
def pdf_to_documents(pdf_file): |
|
try: |
|
reader = PdfReader(pdf_file.name) |
|
pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()] |
|
documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)] |
|
if not documents: |
|
raise ValueError("The uploaded PDF is empty or could not be processed.") |
|
return documents |
|
except Exception as e: |
|
raise ValueError(f"Failed to process the PDF: {str(e)}") |
|
|
|
|
|
def initialize_vectorstore(documents, api_key): |
|
os.environ["OPENAI_API_KEY"] = api_key |
|
embeddings = OpenAIEmbeddings() |
|
vectorstore = Chroma.from_documents(documents, embedding=embeddings) |
|
return vectorstore |
|
|
|
|
|
def rag_from_pdf(question, pdf_file, api_key): |
|
if not question.strip(): |
|
return "Please enter a question." |
|
if not pdf_file: |
|
return "Please upload a valid PDF file." |
|
if not api_key.strip(): |
|
return "Please enter your OpenAI API key." |
|
|
|
try: |
|
|
|
documents = pdf_to_documents(pdf_file) |
|
|
|
|
|
vectorstore = initialize_vectorstore(documents, api_key) |
|
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3}) |
|
|
|
|
|
retrieved_docs = retriever.get_relevant_documents(question) |
|
context = "\n".join([doc.page_content for doc in retrieved_docs]) |
|
|
|
|
|
if not context.strip(): |
|
return "No relevant information found in the document to answer the question." |
|
|
|
|
|
llm = ChatOpenAI(model="gpt-3.5-turbo") |
|
|
|
|
|
messages = [ |
|
SystemMessage(content="You are a helpful assistant answering questions based on the provided PDF document."), |
|
HumanMessage(content=f"Question: {question}\n\nContext: {context}") |
|
] |
|
|
|
|
|
response = llm(messages=messages) |
|
return response.content.strip() |
|
except Exception as e: |
|
return f"An error occurred: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File") |
|
|
|
|
|
api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...") |
|
|
|
|
|
pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"]) |
|
|
|
|
|
question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...") |
|
|
|
|
|
rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...") |
|
|
|
|
|
rag_button = gr.Button("Ask Question") |
|
|
|
|
|
rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output) |
|
|
|
|
|
app.launch() |
|
|