Rahatara commited on
Commit
999a37b
1 Parent(s): 662ff87

Delete instaragworking.py

Browse files
Files changed (1) hide show
  1. instaragworking.py +0 -165
instaragworking.py DELETED
@@ -1,165 +0,0 @@
1
- from typing import Any, List, Tuple
2
- import gradio as gr
3
- from langchain_openai import OpenAIEmbeddings
4
- from langchain_community.vectorstores import Chroma
5
- from langchain.chains import ConversationalRetrievalChain
6
- from langchain_openai import ChatOpenAI
7
- from langchain_community.document_loaders import PyMuPDFLoader
8
- import fitz
9
- from PIL import Image
10
- import os
11
- import re
12
- import openai
13
-
14
- openai.api_key = "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"
15
-
16
- def add_text(history: List[Tuple[str, str]], text: str) -> List[Tuple[str, str]]:
17
- if not text:
18
- raise gr.Error("Enter text")
19
- history.append((text, ""))
20
- return history
21
-
22
- class MyApp:
23
- def __init__(self) -> None:
24
- self.OPENAI_API_KEY: str = openai.api_key
25
- self.chain = None
26
- self.chat_history: list = []
27
- self.documents = None
28
- self.file_name = None
29
-
30
- def __call__(self, file: str) -> ConversationalRetrievalChain:
31
- if self.chain is None:
32
- self.chain = self.build_chain(file)
33
- return self.chain
34
-
35
- def process_file(self, file) -> Image.Image:
36
- loader = PyMuPDFLoader(file.name)
37
- self.documents = loader.load()
38
- pattern = r"/([^/]+)$"
39
- match = re.search(pattern, file.name)
40
- try:
41
- self.file_name = match.group(1)
42
- except:
43
- self.file_name = os.path.basename(file)
44
- doc = fitz.open(file.name)
45
- page = doc[0]
46
- pix = page.get_pixmap(dpi=150)
47
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
48
- return image
49
-
50
- def build_chain(self, file) -> str:
51
- embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
52
- pdfsearch = Chroma.from_documents(
53
- self.documents,
54
- embeddings,
55
- collection_name=self.file_name,
56
- )
57
- self.chain = ConversationalRetrievalChain.from_llm(
58
- ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
59
- retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
60
- return_source_documents=True,
61
- )
62
- return "Vector database built successfully!"
63
-
64
- def get_response(history, query, file):
65
- if not file:
66
- raise gr.Error(message="Upload a PDF")
67
- chain = app(file)
68
- try:
69
- result = chain.invoke(
70
- {"question": query, "chat_history": app.chat_history}
71
- )
72
- app.chat_history.append((query, result["answer"]))
73
- source_docs = result["source_documents"]
74
- source_texts = []
75
- for doc in source_docs:
76
- source_texts.append(f"Page {doc.metadata['page'] + 1}: {doc.page_content}")
77
- source_texts_str = "\n\n".join(source_texts)
78
- for char in result["answer"]:
79
- history[-1][-1] += char
80
- return history, source_texts_str
81
- except Exception:
82
- app.chat_history.append((query, "I have no information about it. Feed me knowledge, please!"))
83
- return history, "I have no information about it. Feed me knowledge, please!"
84
-
85
- def render_file(file) -> Image.Image:
86
- doc = fitz.open(file.name)
87
- page = doc[0]
88
- pix = page.get_pixmap(dpi=150)
89
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
90
- return image
91
-
92
- def purge_chat_and_render_first(file) -> Tuple[Image.Image, list]:
93
- app.chat_history = []
94
- doc = fitz.open(file.name)
95
- page = doc[0]
96
- pix = page.get_pixmap(dpi=150)
97
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
98
- return image, []
99
-
100
- def refresh_chat():
101
- app.chat_history = []
102
- return []
103
-
104
- app = MyApp()
105
-
106
- with gr.Blocks() as demo:
107
- with gr.Tab("Step 1: Upload PDF"):
108
- btn = gr.UploadButton("📁 Upload a PDF", file_types=[".pdf"])
109
- show_img = gr.Image(label="Uploaded PDF")
110
-
111
- with gr.Tab("Step 2: Process File"):
112
- process_btn = gr.Button("Process PDF")
113
- show_img_processed = gr.Image(label="Processed PDF")
114
- process_status = gr.Textbox(label="Processing Status", interactive=False)
115
-
116
- with gr.Tab("Step 3: Build Vector Database"):
117
- build_vector_btn = gr.Button("Build Vector Database")
118
- status_text = gr.Textbox(label="Status", value="", interactive=False)
119
-
120
- with gr.Tab("Step 4: Ask Questions"):
121
- chatbot = gr.Chatbot(elem_id="chatbot")
122
- txt = gr.Textbox(
123
- show_label=False,
124
- placeholder="Enter text and press submit",
125
- scale=2
126
- )
127
- submit_btn = gr.Button("Submit", scale=1)
128
- refresh_btn = gr.Button("Refresh Chat", scale=1)
129
- source_texts_output = gr.Textbox(label="Source Texts", interactive=False)
130
-
131
- btn.upload(
132
- fn=purge_chat_and_render_first,
133
- inputs=[btn],
134
- outputs=[show_img, chatbot],
135
- )
136
-
137
- process_btn.click(
138
- fn=lambda file: (app.process_file(file), "Processing complete!"),
139
- inputs=[btn],
140
- outputs=[show_img_processed, process_status],
141
- )
142
-
143
- build_vector_btn.click(
144
- fn=app.build_chain,
145
- inputs=[btn],
146
- outputs=[status_text],
147
- )
148
-
149
- submit_btn.click(
150
- fn=add_text,
151
- inputs=[chatbot, txt],
152
- outputs=[chatbot],
153
- queue=False,
154
- ).success(
155
- fn=get_response, inputs=[chatbot, txt, btn], outputs=[chatbot, source_texts_output]
156
- )
157
-
158
- refresh_btn.click(
159
- fn=refresh_chat,
160
- inputs=[],
161
- outputs=[chatbot],
162
- )
163
-
164
- demo.queue()
165
- demo.launch()