Rahatara commited on
Commit
60eae25
1 Parent(s): c73d996

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -0
app.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, List, Tuple
2
+ import gradio as gr
3
+ from langchain_openai import OpenAIEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_community.document_loaders import PyMuPDFLoader
8
+ import fitz
9
+ from PIL import Image
10
+ import os
11
+ import re
12
+ import openai
13
+
14
+ openai.api_key = "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"
15
+
16
+ # Load the saved PDF and prepare the chain
17
+ class MyApp:
18
+ def __init__(self) -> None:
19
+ self.OPENAI_API_KEY: str = openai.api_key
20
+ self.chain = None
21
+ self.chat_history: list = []
22
+ self.documents = None
23
+ self.file_name = None
24
+
25
+ def __call__(self, file: str) -> ConversationalRetrievalChain:
26
+ if self.chain is None:
27
+ self.chain = self.build_chain(file)
28
+ return self.chain
29
+
30
+ def process_file(self, file) -> Image.Image:
31
+ loader = PyMuPDFLoader(file.name)
32
+ self.documents = loader.load()
33
+ pattern = r"/([^/]+)$"
34
+ match = re.search(pattern, file.name)
35
+ try:
36
+ self.file_name = match.group(1)
37
+ except:
38
+ self.file_name = os.path.basename(file)
39
+ doc = fitz.open(file.name)
40
+ page = doc[0]
41
+ pix = page.get_pixmap(dpi=150)
42
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
43
+ return image
44
+
45
+ def build_chain(self, file) -> str:
46
+ embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
47
+ pdfsearch = Chroma.from_documents(
48
+ self.documents,
49
+ embeddings,
50
+ collection_name=self.file_name,
51
+ )
52
+ self.chain = ConversationalRetrievalChain.from_llm(
53
+ ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
54
+ retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
55
+ return_source_documents=True,
56
+ )
57
+ return "Vector database built successfully!"
58
+
59
+ def get_response(history, query, file):
60
+ if not file:
61
+ raise gr.Error(message="Upload a PDF")
62
+ chain = app(file)
63
+ try:
64
+ result = chain.invoke(
65
+ {"question": query, "chat_history": app.chat_history}
66
+ )
67
+ app.chat_history.append((query, result["answer"]))
68
+ source_docs = result["source_documents"]
69
+ source_texts = []
70
+ for doc in source_docs:
71
+ source_texts.append(f"Page {doc.metadata['page'] + 1}: {doc.page_content}")
72
+ source_texts_str = "\n\n".join(source_texts)
73
+ history[-1] = (history[-1][0], result["answer"])
74
+ return history, source_texts_str
75
+ except Exception as e:
76
+ app.chat_history.append((query, "I have no information about it. Feed me knowledge, please!"))
77
+ return history, f"I have no information about it. Feed me knowledge, please! Error: {str(e)}"
78
+
79
+ def render_file(file) -> Image.Image:
80
+ doc = fitz.open(file.name)
81
+ page = doc[0]
82
+ pix = page.get_pixmap(dpi=150)
83
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
84
+ return image
85
+
86
+ def purge_chat_and_render_first(file) -> Tuple[Image.Image, list]:
87
+ app.chat_history = []
88
+ doc = fitz.open(file.name)
89
+ page = doc[0]
90
+ pix = page.get_pixmap(dpi=150)
91
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
92
+ return image, []
93
+
94
+ def refresh_chat():
95
+ app.chat_history = []
96
+ return []
97
+
98
+ app = MyApp()
99
+
100
+ # Pre-process the saved PDF file
101
+ saved_file_path = "track_training.pdf"
102
+ app.process_file(open(saved_file_path, 'rb'))
103
+ app.build_chain(open(saved_file_path, 'rb'))
104
+
105
+ with gr.Blocks() as demo:
106
+ with gr.Tab("Inst RAG"):
107
+ with gr.Column():
108
+ with gr.Row():
109
+ btn = gr.UploadButton("📁 Upload a PDF", file_types=[".pdf"])
110
+ show_img = gr.Image(label="Uploaded PDF")
111
+ process_btn = gr.Button("Process PDF")
112
+ show_img_processed = gr.Image(label="Processed PDF")
113
+ process_status = gr.Textbox(label="Processing Status", interactive=False)
114
+ build_vector_btn = gr.Button("Build Vector Database")
115
+ status_text = gr.Textbox(label="Status", value="", interactive=False)
116
+ with gr.Row():
117
+ chatbot = gr.Chatbot(elem_id="chatbot")
118
+ txt = gr.Textbox(
119
+ show_label=False,
120
+ placeholder="Enter text and press submit",
121
+ scale=2
122
+ )
123
+ submit_btn = gr.Button("Submit", scale=1)
124
+ refresh_btn = gr.Button("Refresh Chat", scale=1)
125
+ source_texts_output = gr.Textbox(label="Source Texts", interactive=False)
126
+
127
+ btn.upload(
128
+ fn=purge_chat_and_render_first,
129
+ inputs=[btn],
130
+ outputs=[show_img, chatbot],
131
+ )
132
+
133
+ process_btn.click(
134
+ fn=lambda file: (app.process_file(file), "Processing complete!"),
135
+ inputs=[btn],
136
+ outputs=[show_img_processed, process_status],
137
+ )
138
+
139
+ build_vector_btn.click(
140
+ fn=app.build_chain,
141
+ inputs=[btn],
142
+ outputs=[status_text],
143
+ )
144
+
145
+ submit_btn.click(
146
+ fn=add_text,
147
+ inputs=[chatbot, txt],
148
+ outputs=[chatbot],
149
+ queue=False,
150
+ ).success(
151
+ fn=get_response, inputs=[chatbot, txt, btn], outputs=[chatbot, source_texts_output]
152
+ )
153
+
154
+ refresh_btn.click(
155
+ fn=refresh_chat,
156
+ inputs=[],
157
+ outputs=[chatbot],
158
+ )
159
+
160
+ with gr.Tab("Current RAG"):
161
+ with gr.Column():
162
+ chatbot_current = gr.Chatbot(elem_id="chatbot_current")
163
+ txt_current = gr.Textbox(
164
+ show_label=False,
165
+ placeholder="Enter text and press submit",
166
+ scale=2
167
+ )
168
+ submit_btn_current = gr.Button("Submit", scale=1)
169
+ refresh_btn_current = gr.Button("Refresh Chat", scale=1)
170
+ source_texts_output_current = gr.Textbox(label="Source Texts", interactive=False)
171
+
172
+ def get_response_current(history, query):
173
+ return get_response(history, query, open(saved_file_path, 'rb'))
174
+
175
+ submit_btn_current.click(
176
+ fn=add_text,
177
+ inputs=[chatbot_current, txt_current],
178
+ outputs=[chatbot_current],
179
+ queue=False,
180
+ ).success(
181
+ fn=get_response_current, inputs=[chatbot_current, txt_current], outputs=[chatbot_current, source_texts_output_current]
182
+ )
183
+
184
+ refresh_btn_current.click(
185
+ fn=refresh_chat,
186
+ inputs=[],
187
+ outputs=[chatbot_current],
188
+ )
189
+
190
+ demo.queue()
191
+ demo.launch()