RAHMAN00700
commited on
Commit
•
dea6e74
1
Parent(s):
19d7250
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import yaml
|
|
8 |
from bs4 import BeautifulSoup
|
9 |
from pptx import Presentation
|
10 |
from docx import Document
|
|
|
11 |
|
12 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
13 |
from langchain.indexes import VectorstoreIndexCreator
|
@@ -22,9 +23,12 @@ from ibm_watson_machine_learning.foundation_models.extensions.langchain import W
|
|
22 |
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
|
23 |
from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
|
24 |
|
25 |
-
#
|
|
|
|
|
|
|
26 |
index = None
|
27 |
-
rag_chain = None
|
28 |
|
29 |
# Custom loader for DOCX files
|
30 |
class DocxLoader:
|
@@ -49,16 +53,11 @@ class PptxLoader:
|
|
49 |
# Custom loader for additional file types
|
50 |
def load_csv(file_path):
|
51 |
df = pd.read_csv(file_path)
|
52 |
-
|
53 |
-
st.write("Large dataset detected, displaying data in pages.")
|
54 |
-
page_size = 100 # Define the number of rows per page
|
55 |
page_number = st.number_input("Page number", min_value=1, max_value=(len(df) // page_size) + 1, step=1, value=1)
|
56 |
-
|
57 |
start_index = (page_number - 1) * page_size
|
58 |
end_index = start_index + page_size
|
59 |
-
|
60 |
-
|
61 |
-
st.dataframe(paginated_data) # Display paginated data
|
62 |
return df.to_string(index=False)
|
63 |
|
64 |
def load_json(file_path):
|
@@ -85,6 +84,7 @@ def load_html(file_path):
|
|
85 |
@st.cache_resource
|
86 |
def load_file(file_name, file_type):
|
87 |
loaders = []
|
|
|
88 |
|
89 |
if file_type == "pdf":
|
90 |
loaders = [PyPDFLoader(file_name)]
|
@@ -110,17 +110,20 @@ def load_file(file_name, file_type):
|
|
110 |
st.error("Unsupported file type.")
|
111 |
return None
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
124 |
|
125 |
# Watsonx API setup
|
126 |
watsonx_api_key = os.getenv("WATSONX_API_KEY")
|
@@ -158,14 +161,12 @@ with st.sidebar:
|
|
158 |
}
|
159 |
st.info("Upload a file to use RAG")
|
160 |
uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx", "csv", "json", "xml", "yaml", "html"])
|
161 |
-
|
162 |
if uploaded_file is not None:
|
163 |
bytes_data = uploaded_file.read()
|
164 |
st.write("Filename:", uploaded_file.name)
|
165 |
-
|
166 |
with open(uploaded_file.name, 'wb') as f:
|
167 |
f.write(bytes_data)
|
168 |
-
|
169 |
file_type = uploaded_file.name.split('.')[-1].lower()
|
170 |
index = load_file(uploaded_file.name, file_type)
|
171 |
|
@@ -211,10 +212,4 @@ prompt = st.chat_input("Ask your question here", disabled=False if chain else Tr
|
|
211 |
if prompt:
|
212 |
st.chat_message("user").markdown(prompt)
|
213 |
if rag_chain:
|
214 |
-
|
215 |
-
else:
|
216 |
-
response_text = chain.run(question=prompt, context="").strip()
|
217 |
-
|
218 |
-
st.session_state.messages.append({'role': 'User', 'content': prompt})
|
219 |
-
st.chat_message("assistant").markdown(response_text)
|
220 |
-
st.session_state.messages.append({'role': 'Assistant', 'content': response_text})
|
|
|
8 |
from bs4 import BeautifulSoup
|
9 |
from pptx import Presentation
|
10 |
from docx import Document
|
11 |
+
from dotenv import load_dotenv
|
12 |
|
13 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
14 |
from langchain.indexes import VectorstoreIndexCreator
|
|
|
23 |
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
|
24 |
from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
|
25 |
|
26 |
+
# Load environment variables from .env file
|
27 |
+
load_dotenv()
|
28 |
+
|
29 |
+
# Initialize index and chain to None
|
30 |
index = None
|
31 |
+
rag_chain = None
|
32 |
|
33 |
# Custom loader for DOCX files
|
34 |
class DocxLoader:
|
|
|
53 |
# Custom loader for additional file types
|
54 |
def load_csv(file_path):
|
55 |
df = pd.read_csv(file_path)
|
56 |
+
page_size = 100
|
|
|
|
|
57 |
page_number = st.number_input("Page number", min_value=1, max_value=(len(df) // page_size) + 1, step=1, value=1)
|
|
|
58 |
start_index = (page_number - 1) * page_size
|
59 |
end_index = start_index + page_size
|
60 |
+
st.dataframe(df.iloc[start_index:end_index])
|
|
|
|
|
61 |
return df.to_string(index=False)
|
62 |
|
63 |
def load_json(file_path):
|
|
|
84 |
@st.cache_resource
|
85 |
def load_file(file_name, file_type):
|
86 |
loaders = []
|
87 |
+
text = None
|
88 |
|
89 |
if file_type == "pdf":
|
90 |
loaders = [PyPDFLoader(file_name)]
|
|
|
110 |
st.error("Unsupported file type.")
|
111 |
return None
|
112 |
|
113 |
+
if text:
|
114 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
|
115 |
+
temp_file.write(text.encode("utf-8"))
|
116 |
+
temp_file_path = temp_file.name
|
117 |
+
loaders = [TextLoader(temp_file_path)]
|
118 |
+
|
119 |
+
if loaders:
|
120 |
+
index = VectorstoreIndexCreator(
|
121 |
+
embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
|
122 |
+
text_splitter=RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=50)
|
123 |
+
).from_loaders(loaders)
|
124 |
+
st.success("Index created successfully!")
|
125 |
+
return index
|
126 |
+
return None
|
127 |
|
128 |
# Watsonx API setup
|
129 |
watsonx_api_key = os.getenv("WATSONX_API_KEY")
|
|
|
161 |
}
|
162 |
st.info("Upload a file to use RAG")
|
163 |
uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx", "csv", "json", "xml", "yaml", "html"])
|
164 |
+
|
165 |
if uploaded_file is not None:
|
166 |
bytes_data = uploaded_file.read()
|
167 |
st.write("Filename:", uploaded_file.name)
|
|
|
168 |
with open(uploaded_file.name, 'wb') as f:
|
169 |
f.write(bytes_data)
|
|
|
170 |
file_type = uploaded_file.name.split('.')[-1].lower()
|
171 |
index = load_file(uploaded_file.name, file_type)
|
172 |
|
|
|
212 |
if prompt:
|
213 |
st.chat_message("user").markdown(prompt)
|
214 |
if rag_chain:
|
215 |
+
response
|
|
|
|
|
|
|
|
|
|
|
|