import gradio as gr from langchain_chroma import Chroma from langchain_openai import OpenAIEmbeddings from langchain_core.documents import Document from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from PyPDF2 import PdfReader import os # Function to process the uploaded PDF and convert it to documents def pdf_to_documents(pdf_file): reader = PdfReader(pdf_file.name) pages = [page.extract_text() for page in reader.pages] documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)] return documents # Initialize vector store def initialize_vectorstore(documents, api_key): os.environ["OPENAI_API_KEY"] = api_key embeddings = OpenAIEmbeddings() vectorstore = Chroma.from_documents(documents, embedding=embeddings) return vectorstore # RAG retrieval and LLM chain def rag_from_pdf(question, pdf_file, api_key): documents = pdf_to_documents(pdf_file) vectorstore = initialize_vectorstore(documents, api_key) retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections # Initialize the LLM llm = ChatOpenAI(model="gpt-3.5-turbo") # Create a prompt template for combining context and question prompt_template = """ Answer this question using the provided context only. {question} Context: {context} """ prompt = ChatPromptTemplate.from_messages([("human", prompt_template)]) # Create a RAG chain combining retriever and LLM rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm # Perform retrieval and return LLM's answer response = rag_chain.invoke(question) return response.content # Gradio interface with gr.Blocks() as app: gr.Markdown("## PDF-based Question Answering with RAG") # Input for OpenAI API Key api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password") # File upload for the PDF pdf_file_input = gr.File(label="Upload your PDF document") # Question input question_input = gr.Textbox(label="Ask a question related to the PDF") # Output for the RAG response rag_output = gr.Textbox(label="Generated Response", lines=10) # Button to run RAG chain rag_button = gr.Button("Ask Question") # Functionality for the RAG chain rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output) # Launch Gradio app app.launch()