RAHMAN00700 commited on
Commit
dea6e74
1 Parent(s): 19d7250

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -30
app.py CHANGED
@@ -8,6 +8,7 @@ import yaml
8
  from bs4 import BeautifulSoup
9
  from pptx import Presentation
10
  from docx import Document
 
11
 
12
  from langchain.document_loaders import PyPDFLoader, TextLoader
13
  from langchain.indexes import VectorstoreIndexCreator
@@ -22,9 +23,12 @@ from ibm_watson_machine_learning.foundation_models.extensions.langchain import W
22
  from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
23
  from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
24
 
25
- # Initialize index to None
 
 
 
26
  index = None
27
- rag_chain = None # Initialize rag_chain as None by default
28
 
29
  # Custom loader for DOCX files
30
  class DocxLoader:
@@ -49,16 +53,11 @@ class PptxLoader:
49
  # Custom loader for additional file types
50
  def load_csv(file_path):
51
  df = pd.read_csv(file_path)
52
- # Adding pagination for large CSV data
53
- st.write("Large dataset detected, displaying data in pages.")
54
- page_size = 100 # Define the number of rows per page
55
  page_number = st.number_input("Page number", min_value=1, max_value=(len(df) // page_size) + 1, step=1, value=1)
56
-
57
  start_index = (page_number - 1) * page_size
58
  end_index = start_index + page_size
59
- paginated_data = df.iloc[start_index:end_index]
60
-
61
- st.dataframe(paginated_data) # Display paginated data
62
  return df.to_string(index=False)
63
 
64
  def load_json(file_path):
@@ -85,6 +84,7 @@ def load_html(file_path):
85
  @st.cache_resource
86
  def load_file(file_name, file_type):
87
  loaders = []
 
88
 
89
  if file_type == "pdf":
90
  loaders = [PyPDFLoader(file_name)]
@@ -110,17 +110,20 @@ def load_file(file_name, file_type):
110
  st.error("Unsupported file type.")
111
  return None
112
 
113
- # Use TextLoader for intermediate text files from custom loaders
114
- with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
115
- temp_file.write(text.encode("utf-8"))
116
- temp_file_path = temp_file.name
117
- loaders = [TextLoader(temp_file_path)]
118
-
119
- index = VectorstoreIndexCreator(
120
- embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
121
- text_splitter=RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=50)
122
- ).from_loaders(loaders)
123
- return index
 
 
 
124
 
125
  # Watsonx API setup
126
  watsonx_api_key = os.getenv("WATSONX_API_KEY")
@@ -158,14 +161,12 @@ with st.sidebar:
158
  }
159
  st.info("Upload a file to use RAG")
160
  uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx", "csv", "json", "xml", "yaml", "html"])
161
-
162
  if uploaded_file is not None:
163
  bytes_data = uploaded_file.read()
164
  st.write("Filename:", uploaded_file.name)
165
-
166
  with open(uploaded_file.name, 'wb') as f:
167
  f.write(bytes_data)
168
-
169
  file_type = uploaded_file.name.split('.')[-1].lower()
170
  index = load_file(uploaded_file.name, file_type)
171
 
@@ -211,10 +212,4 @@ prompt = st.chat_input("Ask your question here", disabled=False if chain else Tr
211
  if prompt:
212
  st.chat_message("user").markdown(prompt)
213
  if rag_chain:
214
- response_text = rag_chain.run(prompt).strip()
215
- else:
216
- response_text = chain.run(question=prompt, context="").strip()
217
-
218
- st.session_state.messages.append({'role': 'User', 'content': prompt})
219
- st.chat_message("assistant").markdown(response_text)
220
- st.session_state.messages.append({'role': 'Assistant', 'content': response_text})
 
8
  from bs4 import BeautifulSoup
9
  from pptx import Presentation
10
  from docx import Document
11
+ from dotenv import load_dotenv
12
 
13
  from langchain.document_loaders import PyPDFLoader, TextLoader
14
  from langchain.indexes import VectorstoreIndexCreator
 
23
  from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
24
  from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
25
 
26
+ # Load environment variables from .env file
27
+ load_dotenv()
28
+
29
+ # Initialize index and chain to None
30
  index = None
31
+ rag_chain = None
32
 
33
  # Custom loader for DOCX files
34
  class DocxLoader:
 
53
  # Custom loader for additional file types
54
  def load_csv(file_path):
55
  df = pd.read_csv(file_path)
56
+ page_size = 100
 
 
57
  page_number = st.number_input("Page number", min_value=1, max_value=(len(df) // page_size) + 1, step=1, value=1)
 
58
  start_index = (page_number - 1) * page_size
59
  end_index = start_index + page_size
60
+ st.dataframe(df.iloc[start_index:end_index])
 
 
61
  return df.to_string(index=False)
62
 
63
  def load_json(file_path):
 
84
  @st.cache_resource
85
  def load_file(file_name, file_type):
86
  loaders = []
87
+ text = None
88
 
89
  if file_type == "pdf":
90
  loaders = [PyPDFLoader(file_name)]
 
110
  st.error("Unsupported file type.")
111
  return None
112
 
113
+ if text:
114
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
115
+ temp_file.write(text.encode("utf-8"))
116
+ temp_file_path = temp_file.name
117
+ loaders = [TextLoader(temp_file_path)]
118
+
119
+ if loaders:
120
+ index = VectorstoreIndexCreator(
121
+ embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
122
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=50)
123
+ ).from_loaders(loaders)
124
+ st.success("Index created successfully!")
125
+ return index
126
+ return None
127
 
128
  # Watsonx API setup
129
  watsonx_api_key = os.getenv("WATSONX_API_KEY")
 
161
  }
162
  st.info("Upload a file to use RAG")
163
  uploaded_file = st.file_uploader("Upload file", accept_multiple_files=False, type=["pdf", "docx", "txt", "pptx", "csv", "json", "xml", "yaml", "html"])
164
+
165
  if uploaded_file is not None:
166
  bytes_data = uploaded_file.read()
167
  st.write("Filename:", uploaded_file.name)
 
168
  with open(uploaded_file.name, 'wb') as f:
169
  f.write(bytes_data)
 
170
  file_type = uploaded_file.name.split('.')[-1].lower()
171
  index = load_file(uploaded_file.name, file_type)
172
 
 
212
  if prompt:
213
  st.chat_message("user").markdown(prompt)
214
  if rag_chain:
215
+ response