Rahatara commited on
Commit
1391e84
1 Parent(s): 8168016

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -16
app.py CHANGED
@@ -1,21 +1,205 @@
1
- from interface import create_demo
2
- from pdfchatbot import PDFChatBot
3
 
4
- # Create Gradio interface
5
- demo, chat_history, show_img, txt, submit_button, uploaded_pdf = create_demo()
6
 
7
- # Create PDFChatBot instance
8
- pdf_chatbot = PDFChatBot()
9
 
10
- # Set up event handlers
11
- with demo:
12
- # Event handler for uploading a PDF
13
- uploaded_pdf.upload(pdf_chatbot.render_file, inputs=[uploaded_pdf], outputs=[show_img])
 
 
14
 
15
- # Event handler for submitting text and generating response
16
- submit_button.click(pdf_chatbot.add_text, inputs=[chat_history, txt], outputs=[chat_history], queue=False).\
17
- success(pdf_chatbot.generate_response, inputs=[chat_history, txt, uploaded_pdf], outputs=[chat_history, txt]).\
18
- success(pdf_chatbot.render_file, inputs=[uploaded_pdf], outputs=[show_img])
19
 
20
- if __name__ == "__main__":
21
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Based on an implementation by Sunil Kumar Dash:
3
 
4
+ MIT License
 
5
 
6
+ Copyright (c) 2023 Sunil Kumar Dash
 
7
 
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
 
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
 
 
17
 
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
25
+ """
26
+
27
+ from typing import Any
28
+ import gradio as gr
29
+ from langchain_openai import OpenAIEmbeddings
30
+ from langchain_community.vectorstores import Chroma
31
+
32
+ from langchain.chains import ConversationalRetrievalChain
33
+ from langchain_openai import ChatOpenAI
34
+
35
+ from langchain_community.document_loaders import PyMuPDFLoader
36
+
37
+ import fitz
38
+ from PIL import Image
39
+ import os
40
+ import re
41
+ import uuid
42
+
43
+ enable_box = gr.Textbox(
44
+ value=None, placeholder="Upload your OpenAI API key", interactive=True
45
+ )
46
+ disable_box = gr.Textbox(value="OpenAI API key is set", interactive=False)
47
+
48
+
49
+ def set_apikey(api_key: str):
50
+ print("API Key set")
51
+ app.OPENAI_API_KEY = api_key
52
+ return disable_box
53
+
54
+
55
+ def enable_api_box():
56
+ return enable_box
57
+
58
+
59
+ def add_text(history, text: str):
60
+ if not text:
61
+ raise gr.Error("enter text")
62
+ history = history + [(text, "")]
63
+ return history
64
+
65
+
66
+ class my_app:
67
+ def __init__(self, OPENAI_API_KEY: str = None) -> None:
68
+ self.OPENAI_API_KEY: str = OPENAI_API_KEY
69
+ self.chain = None
70
+ self.chat_history: list = []
71
+ self.N: int = 0
72
+ self.count: int = 0
73
+
74
+ def __call__(self, file: str) -> Any:
75
+ if self.count == 0:
76
+ self.chain = self.build_chain(file)
77
+ self.count += 1
78
+ return self.chain
79
+
80
+ def process_file(self, file: str):
81
+ loader = PyMuPDFLoader(file.name)
82
+ documents = loader.load()
83
+ pattern = r"/([^/]+)$"
84
+ match = re.search(pattern, file.name)
85
+ try:
86
+ file_name = match.group(1)
87
+ except:
88
+ file_name = os.path.basename(file)
89
+
90
+ return documents, file_name
91
+
92
+ def build_chain(self, file: str):
93
+ documents, file_name = self.process_file(file)
94
+ # Load embeddings model
95
+ embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
96
+ pdfsearch = Chroma.from_documents(
97
+ documents,
98
+ embeddings,
99
+ collection_name=file_name,
100
+ )
101
+ chain = ConversationalRetrievalChain.from_llm(
102
+ ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
103
+ retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
104
+ return_source_documents=True,
105
+ )
106
+ return chain
107
+
108
+
109
+ def get_response(history, query, file):
110
+ if not file:
111
+ raise gr.Error(message="Upload a PDF")
112
+ chain = app(file)
113
+ result = chain(
114
+ {"question": query, "chat_history": app.chat_history}, return_only_outputs=True
115
+ )
116
+ app.chat_history += [(query, result["answer"])]
117
+ app.N = list(result["source_documents"][0])[1][1]["page"]
118
+ for char in result["answer"]:
119
+ history[-1][-1] += char
120
+ yield history, ""
121
+
122
+
123
+ def render_file(file):
124
+ doc = fitz.open(file.name)
125
+ page = doc[app.N]
126
+ # Render the page as a PNG image with a resolution of 150 DPI
127
+ pix = page.get_pixmap(dpi=150)
128
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
129
+ return image
130
+
131
+
132
+ def purge_chat_and_render_first(file):
133
+ print("purge_chat_and_render_first")
134
+ # Purges the previous chat session so that the bot has no concept of previous documents
135
+ app.chat_history = []
136
+ app.count = 0
137
+
138
+ # Use PyMuPDF to render the first page of the uploaded document
139
+ doc = fitz.open(file.name)
140
+ page = doc[0]
141
+ # Render the page as a PNG image with a resolution of 150 DPI
142
+ pix = page.get_pixmap(dpi=150)
143
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
144
+ return image, []
145
+
146
+ app = my_app()
147
+
148
+ with gr.Blocks() as demo:
149
+ with gr.Column():
150
+ with gr.Row():
151
+
152
+ with gr.Column(scale=1):
153
+ api_key = gr.Textbox(
154
+ placeholder="Enter OpenAI API key and hit <RETURN>",
155
+ show_label=False,
156
+ interactive=True
157
+ )
158
+
159
+ with gr.Row():
160
+ with gr.Column(scale=2):
161
+ with gr.Row():
162
+ chatbot = gr.Chatbot(value=[], elem_id="chatbot")
163
+ with gr.Row():
164
+ txt = gr.Textbox(
165
+ show_label=False,
166
+ placeholder="Enter text and press submit",
167
+ scale=2
168
+ )
169
+ submit_btn = gr.Button("submit", scale=1)
170
+
171
+ with gr.Column(scale=1):
172
+ with gr.Row():
173
+ show_img = gr.Image(label="Upload PDF")
174
+ with gr.Row():
175
+ btn = gr.UploadButton("📁 upload a PDF", file_types=[".pdf"])
176
+
177
+ api_key.submit(
178
+ fn=set_apikey,
179
+ inputs=[api_key],
180
+ outputs=[
181
+ api_key,
182
+ ],
183
+ )
184
+
185
+ btn.upload(
186
+ fn=purge_chat_and_render_first,
187
+ inputs=[btn],
188
+ outputs=[show_img, chatbot],
189
+ )
190
+
191
+ submit_btn.click(
192
+ fn=add_text,
193
+ inputs=[chatbot, txt],
194
+ outputs=[
195
+ chatbot,
196
+ ],
197
+ queue=False,
198
+ ).success(
199
+ fn=get_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]
200
+ ).success(
201
+ fn=render_file, inputs=[btn], outputs=[show_img]
202
+ )
203
+
204
+ demo.queue()
205
+ demo.launch()