Rahatara commited on
Commit
061f74e
1 Parent(s): 27523b3

Delete rag_pre_trained.py

Browse files
Files changed (1) hide show
  1. rag_pre_trained.py +0 -153
rag_pre_trained.py DELETED
@@ -1,153 +0,0 @@
1
-
2
- from typing import Any
3
- import gradio as gr
4
- from langchain_openai import OpenAIEmbeddings
5
- from langchain_community.vectorstores import Chroma
6
-
7
- from langchain.chains import ConversationalRetrievalChain
8
- from langchain_openai import ChatOpenAI
9
-
10
- from langchain_community.document_loaders import PyMuPDFLoader
11
-
12
- import fitz
13
- from PIL import Image
14
- import os
15
- import re
16
- import openai
17
-
18
- openai.api_key = "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"
19
-
20
-
21
- def add_text(history, text: str):
22
- if not text:
23
- raise gr.Error("Enter text")
24
- history = history + [(text, "")]
25
- return history
26
-
27
-
28
- class MyApp:
29
- def __init__(self) -> None:
30
- self.OPENAI_API_KEY: str = openai.api_key
31
- self.chain = None
32
- self.chat_history: list = []
33
- self.N: int = 0
34
- self.count: int = 0
35
-
36
- def __call__(self, file: str) -> Any:
37
- if self.count == 0:
38
- self.chain = self.build_chain(file)
39
- self.count += 1
40
- return self.chain
41
-
42
- def process_file(self, file: str):
43
- loader = PyMuPDFLoader(file.name)
44
- documents = loader.load()
45
- pattern = r"/([^/]+)$"
46
- match = re.search(pattern, file.name)
47
- try:
48
- file_name = match.group(1)
49
- except:
50
- file_name = os.path.basename(file)
51
-
52
- return documents, file_name
53
-
54
- def build_chain(self, file: str):
55
- documents, file_name = self.process_file(file)
56
- # Load embeddings model
57
- embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
58
- pdfsearch = Chroma.from_documents(
59
- documents,
60
- embeddings,
61
- collection_name=file_name,
62
- )
63
- chain = ConversationalRetrievalChain.from_llm(
64
- ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
65
- retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
66
- return_source_documents=True,
67
- )
68
- return chain
69
-
70
-
71
- def get_response(history, query, file):
72
- if not file:
73
- raise gr.Error(message="Upload a PDF")
74
- chain = app(file)
75
- result = chain(
76
- {"question": query, "chat_history": app.chat_history}, return_only_outputs=True
77
- )
78
- app.chat_history += [(query, result["answer"])]
79
- app.N = list(result["source_documents"][0])[1][1]["page"]
80
- for char in result["answer"]:
81
- history[-1][-1] += char
82
- yield history, ""
83
-
84
-
85
- def render_file(file):
86
- doc = fitz.open(file.name)
87
- page = doc[app.N]
88
- # Render the page as a PNG image with a resolution of 150 DPI
89
- pix = page.get_pixmap(dpi=150)
90
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
91
- return image
92
-
93
-
94
- def purge_chat_and_render_first(file):
95
- print("purge_chat_and_render_first")
96
- # Purges the previous chat session so that the bot has no concept of previous documents
97
- app.chat_history = []
98
- app.count = 0
99
-
100
- # Use PyMuPDF to render the first page of the uploaded document
101
- doc = fitz.open(file.name)
102
- page = doc[0]
103
- # Render the page as a PNG image with a resolution of 150 DPI
104
- pix = page.get_pixmap(dpi=150)
105
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
106
- return image, []
107
-
108
-
109
- app = MyApp()
110
-
111
- with gr.Blocks() as demo:
112
- with gr.Column():
113
- with gr.Row():
114
- with gr.Column(scale=2):
115
- with gr.Row():
116
- chatbot = gr.Chatbot(value=[], elem_id="chatbot")
117
- with gr.Row():
118
- txt = gr.Textbox(
119
- show_label=False,
120
- placeholder="Enter text and press submit",
121
- scale=2
122
- )
123
- submit_btn = gr.Button("Submit", scale=1)
124
-
125
- with gr.Column(scale=1):
126
- with gr.Row():
127
- show_img = gr.Image(label="Upload PDF")
128
- with gr.Row():
129
- btn = gr.UploadButton("📁 Upload a PDF", file_types=[".pdf"])
130
-
131
- btn.upload(
132
- fn=purge_chat_and_render_first,
133
- inputs=[btn],
134
- outputs=[show_img, chatbot],
135
- )
136
-
137
- submit_btn.click(
138
- fn=add_text,
139
- inputs=[chatbot, txt],
140
- outputs=[
141
- chatbot,
142
- ],
143
- queue=False,
144
- ).success(
145
- fn=get_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]
146
- ).success(
147
- fn=render_file, inputs=[btn], outputs=[show_img]
148
- )
149
-
150
- demo.queue()
151
- demo.launch()
152
-
153
-