import gradio as gr from langchain.vectorstores import Chroma from langchain.embeddings.openai import OpenAIEmbeddings from langchain.schema import Document from langchain.chat_models import ChatOpenAI from langchain.schema import SystemMessage, HumanMessage from PyPDF2 import PdfReader import os # Function to process the uploaded PDF and convert it to documents def pdf_to_documents(pdf_file): try: reader = PdfReader(pdf_file.name) pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()] documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)] if not documents: raise ValueError("The uploaded PDF is empty or could not be processed.") return documents except Exception as e: raise ValueError(f"Failed to process the PDF: {str(e)}") # Initialize vector store def initialize_vectorstore(documents, api_key): os.environ["OPENAI_API_KEY"] = api_key embeddings = OpenAIEmbeddings() vectorstore = Chroma.from_documents(documents, embedding=embeddings) return vectorstore # RAG retrieval and LLM chain for FAQ Bot def rag_from_pdf(question, pdf_file, api_key): if not question.strip(): return "Please enter a question." if not pdf_file: return "Please upload a valid PDF file." if not api_key.strip(): return "Please enter your OpenAI API key." try: # Process the PDF into documents documents = pdf_to_documents(pdf_file) # Initialize vectorstore vectorstore = initialize_vectorstore(documents, api_key) retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3}) # Retrieve relevant documents retrieved_docs = retriever.get_relevant_documents(question) context = "\n".join([doc.page_content for doc in retrieved_docs]) # Check if there's relevant context if not context.strip(): return "No relevant information found in the document to answer the question." # Initialize the LLM llm = ChatOpenAI(model="gpt-3.5-turbo") # Create the chat messages messages = [ SystemMessage(content="You are a helpful assistant answering questions based on the provided PDF document."), HumanMessage(content=f"Question: {question}\n\nContext: {context}") ] # Generate response response = llm(messages=messages) return response.content.strip() except Exception as e: return f"An error occurred: {str(e)}" # Gradio interface with gr.Blocks() as app: gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File") # Input for OpenAI API Key api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...") # File upload for the PDF pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"]) # Question input question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...") # Output for the RAG response rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...") # Button to run RAG chain rag_button = gr.Button("Ask Question") # Functionality for the RAG chain rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output) # Launch Gradio app app.launch()