DexterSptizu commited on
Commit
c647394
·
verified ·
1 Parent(s): 0bc46ee

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain_chroma import Chroma
3
+ from langchain_openai import OpenAIEmbeddings
4
+ from langchain_core.documents import Document
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_core.runnables import RunnablePassthrough
8
+ from PyPDF2 import PdfReader
9
+ import os
10
+
11
+ # Function to process the uploaded PDF and convert it to documents
12
+ def pdf_to_documents(pdf_file):
13
+ reader = PdfReader(pdf_file.name)
14
+ pages = [page.extract_text() for page in reader.pages]
15
+ documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
16
+ return documents
17
+
18
+ # Initialize vector store
19
+ def initialize_vectorstore(documents, api_key):
20
+ os.environ["OPENAI_API_KEY"] = api_key
21
+ embeddings = OpenAIEmbeddings()
22
+ vectorstore = Chroma.from_documents(documents, embedding=embeddings)
23
+ return vectorstore
24
+
25
+ # RAG retrieval and LLM chain
26
+ def rag_from_pdf(question, pdf_file, api_key):
27
+ documents = pdf_to_documents(pdf_file)
28
+ vectorstore = initialize_vectorstore(documents, api_key)
29
+
30
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections
31
+
32
+ # Initialize the LLM
33
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
34
+
35
+ # Create a prompt template for combining context and question
36
+ prompt_template = """
37
+ Answer this question using the provided context only.
38
+
39
+ {question}
40
+
41
+ Context:
42
+ {context}
43
+ """
44
+
45
+ prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
46
+
47
+ # Create a RAG chain combining retriever and LLM
48
+ rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
49
+
50
+ # Perform retrieval and return LLM's answer
51
+ response = rag_chain.invoke(question)
52
+ return response.content
53
+
54
+ # Gradio interface
55
+ with gr.Blocks() as app:
56
+ gr.Markdown("## PDF-based Question Answering with RAG")
57
+
58
+ # Input for OpenAI API Key
59
+ api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
60
+
61
+ # File upload for the PDF
62
+ pdf_file_input = gr.File(label="Upload your PDF document")
63
+
64
+ # Question input
65
+ question_input = gr.Textbox(label="Ask a question related to the PDF")
66
+
67
+ # Output for the RAG response
68
+ rag_output = gr.Textbox(label="Generated Response", lines=10)
69
+
70
+ # Button to run RAG chain
71
+ rag_button = gr.Button("Ask Question")
72
+
73
+ # Functionality for the RAG chain
74
+ rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
75
+
76
+ # Launch Gradio app
77
+ app.launch()