Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from langchain_chroma import Chroma | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_core.documents import Document | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from PyPDF2 import PdfReader | |
| import os | |
| # Function to process the uploaded PDF and convert it to documents | |
| def pdf_to_documents(pdf_file): | |
| reader = PdfReader(pdf_file.name) | |
| pages = [page.extract_text() for page in reader.pages] | |
| documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)] | |
| return documents | |
| # Initialize vector store | |
| def initialize_vectorstore(documents, api_key): | |
| os.environ["OPENAI_API_KEY"] = api_key | |
| embeddings = OpenAIEmbeddings() | |
| vectorstore = Chroma.from_documents(documents, embedding=embeddings) | |
| return vectorstore | |
| # RAG retrieval and LLM chain | |
| def rag_from_pdf(question, pdf_file, api_key): | |
| documents = pdf_to_documents(pdf_file) | |
| vectorstore = initialize_vectorstore(documents, api_key) | |
| retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections | |
| # Initialize the LLM | |
| llm = ChatOpenAI(model="gpt-3.5-turbo") | |
| # Create a prompt template for combining context and question | |
| prompt_template = """ | |
| Answer this question using the provided context only. | |
| {question} | |
| Context: | |
| {context} | |
| """ | |
| prompt = ChatPromptTemplate.from_messages([("human", prompt_template)]) | |
| # Create a RAG chain combining retriever and LLM | |
| rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm | |
| # Perform retrieval and return LLM's answer | |
| response = rag_chain.invoke(question) | |
| return response.content | |
| # Gradio interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("## PDF-based Question Answering with RAG") | |
| # Input for OpenAI API Key | |
| api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password") | |
| # File upload for the PDF | |
| pdf_file_input = gr.File(label="Upload your PDF document") | |
| # Question input | |
| question_input = gr.Textbox(label="Ask a question related to the PDF") | |
| # Output for the RAG response | |
| rag_output = gr.Textbox(label="Generated Response", lines=10) | |
| # Button to run RAG chain | |
| rag_button = gr.Button("Ask Question") | |
| # Functionality for the RAG chain | |
| rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output) | |
| # Launch Gradio app | |
| app.launch() |