Spaces:

DexterSptizu
/

langchain-RAG-pdf

Sleeping

App Files Files Community

langchain-RAG-pdf / app.py

DexterSptizu

Update app.py

b77cac2 verified about 1 year ago

raw

history blame

2.66 kB

	import gradio as gr
	from langchain_chroma import Chroma
	from langchain_openai import OpenAIEmbeddings
	from langchain_core.documents import Document
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.runnables import RunnablePassthrough
	from PyPDF2 import PdfReader
	import os

	# Function to process the uploaded PDF and convert it to documents
	def pdf_to_documents(pdf_file):
	reader = PdfReader(pdf_file.name)
	pages = [page.extract_text() for page in reader.pages]
	documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
	return documents

	# Initialize vector store
	def initialize_vectorstore(documents, api_key):
	os.environ["OPENAI_API_KEY"] = api_key
	embeddings = OpenAIEmbeddings()
	vectorstore = Chroma.from_documents(documents, embedding=embeddings)
	return vectorstore

	# RAG retrieval and LLM chain
	def rag_from_pdf(question, pdf_file, api_key):
	documents = pdf_to_documents(pdf_file)
	vectorstore = initialize_vectorstore(documents, api_key)

	retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections

	# Initialize the LLM
	llm = ChatOpenAI(model="gpt-3.5-turbo")

	# Create a prompt template for combining context and question
	prompt_template = """
	Answer this question using the provided context only.

	{question}

	Context:
	{context}
	"""

	prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])

	# Create a RAG chain combining retriever and LLM
	rag_chain = {"context": retriever, "question": RunnablePassthrough()} \| prompt \| llm

	# Perform retrieval and return LLM's answer
	response = rag_chain.invoke(question)
	return response.content

	# Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("## PDF-based Question Answering with RAG")

	# Input for OpenAI API Key
	api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")

	# File upload for the PDF
	pdf_file_input = gr.File(label="Upload your PDF document")

	# Question input
	question_input = gr.Textbox(label="Ask a question related to the PDF")

	# Output for the RAG response
	rag_output = gr.Textbox(label="Generated Response", lines=10)

	# Button to run RAG chain
	rag_button = gr.Button("Ask Question")

	# Functionality for the RAG chain
	rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)

	# Launch Gradio app
	app.launch()