Files changed (1) hide show
  1. app.py +137 -71
app.py CHANGED
@@ -14,6 +14,14 @@ from streamlit_extras.switch_page_button import switch_page
14
  import json
15
  import pandas as pd
16
  from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode
 
 
 
 
 
 
 
 
17
  # ---------------------- Configuration ----------------------
18
  st.set_page_config(page_title="Building Regulations Chatbot", layout="wide", initial_sidebar_state="expanded")
19
  # Load environment variables from .env file
@@ -39,6 +47,7 @@ if 'thread_id' not in st.session_state:
39
  if 'file_ids' not in st.session_state:
40
  st.session_state.file_ids = []
41
 
 
42
  # ---------------------- Helper Functions ----------------------
43
 
44
  def get_vector_stores():
@@ -48,6 +57,7 @@ def get_vector_stores():
48
  except Exception as e:
49
  return f"Error retrieving vector stores: {str(e)}"
50
 
 
51
  def fetch_pdfs(city_code):
52
  url = f"http://91.203.213.50:5000/oereblex/{city_code}"
53
  response = requests.get(url)
@@ -59,6 +69,7 @@ def fetch_pdfs(city_code):
59
  st.error(f"Failed to fetch PDFs for city code {city_code}")
60
  return None
61
 
 
62
  def download_pdf(url, doc_title):
63
  # Add 'https://' scheme if it's missing
64
  if not url.startswith(('http://', 'https://')):
@@ -87,6 +98,7 @@ def download_pdf(url, doc_title):
87
  st.error(f"Failed to download PDF from {url}. Error: {str(e)}")
88
  return None
89
 
 
90
  # Helper function to upload file to OpenAI
91
  def upload_file_to_openai(file_path):
92
  try:
@@ -99,6 +111,7 @@ def upload_file_to_openai(file_path):
99
  st.error(f"Failed to upload file {file_path}. Error: {str(e)}")
100
  return None
101
 
 
102
  def create_assistant():
103
  assistant = client.beta.assistants.create(
104
  name="Building Regulations Assistant",
@@ -109,15 +122,53 @@ def create_assistant():
109
  st.session_state.assistant_id = assistant.id
110
  return assistant.id
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def chat_with_assistant(file_ids, user_message):
113
  print("----- Starting chat_with_assistant -----")
114
  print("Received file_ids:", file_ids)
115
  print("Received user_message:", user_message)
116
-
117
  # Create attachments for each file_id
118
  attachments = [{"file_id": file_id, "tools": [{"type": "file_search"}]} for file_id in file_ids]
119
  print("Attachments created:", attachments)
120
-
121
  if st.session_state.thread_id is None:
122
  print("No existing thread_id found. Creating a new thread.")
123
  thread = client.beta.threads.create(
@@ -133,7 +184,6 @@ def chat_with_assistant(file_ids, user_message):
133
  print("New thread created with id:", st.session_state.thread_id)
134
  else:
135
  print(f"Existing thread_id found: {st.session_state.thread_id}. Adding message to the thread.")
136
- # Add a message to the existing thread without updating thread_id
137
  message = client.beta.threads.messages.create(
138
  thread_id=st.session_state.thread_id,
139
  role="user",
@@ -141,27 +191,14 @@ def chat_with_assistant(file_ids, user_message):
141
  attachments=attachments
142
  )
143
  print("Message added to thread with id:", message.id)
144
- # Do NOT update st.session_state.thread_id here
145
-
146
- # Retrieve the thread object using the thread_id
147
  try:
148
  thread = client.beta.threads.retrieve(thread_id=st.session_state.thread_id)
149
  print("Retrieved thread:", thread)
150
  except Exception as e:
151
  print(f"Error retrieving thread with id {st.session_state.thread_id}: {e}")
152
  return "An error occurred while processing your request.", []
153
-
154
- # Debugging tool resources
155
- try:
156
- tool_resources = thread.tool_resources.file_search
157
- print("Thread tool resources (file_search):", tool_resources)
158
- except AttributeError:
159
- print("No tool_resources.file_search found in thread.")
160
-
161
- print("Assistant ID:", st.session_state.assistant_id)
162
- print("Thread ID:", thread.id)
163
-
164
- # Create and poll the run
165
  try:
166
  run = client.beta.threads.runs.create_and_poll(
167
  thread_id=thread.id, assistant_id=st.session_state.assistant_id
@@ -170,46 +207,57 @@ def chat_with_assistant(file_ids, user_message):
170
  except Exception as e:
171
  print("Error during run creation and polling:", e)
172
  return "An error occurred while processing your request.", []
173
-
174
- # Retrieve messages
175
  try:
176
  messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
177
  print("Retrieved messages:", messages)
178
  except Exception as e:
179
  print("Error retrieving messages:", e)
180
  return "An error occurred while retrieving messages.", []
181
-
182
  # Process the first message content
183
  if messages and messages[0].content:
184
  message_content = messages[0].content[0].text
185
  print("Raw message content:", message_content)
186
-
187
  annotations = message_content.annotations
188
- print("Annotations found:", annotations)
189
-
190
  citations = []
 
 
 
191
  for index, annotation in enumerate(annotations):
192
- print(f"Processing annotation {index}: {annotation.text}")
193
  message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
194
  if file_citation := getattr(annotation, "file_citation", None):
195
  try:
196
  cited_file = client.files.retrieve(file_citation.file_id)
197
  citation_entry = f"[{index}] {cited_file.filename}"
198
- citations.append(citation_entry)
199
- print(f"Citation added: {citation_entry}")
 
200
  except Exception as e:
201
  print(f"Error retrieving cited file for annotation {index}: {e}")
202
-
203
- print("Final message content after replacements:", message_content.value)
204
- print("All citations:", citations)
205
- print("----- Ending chat_with_assistant -----")
206
- return message_content.value, citations
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  else:
208
- print("No messages or content found in the retrieved messages.")
209
  return "No response received from the assistant.", []
210
 
211
 
212
-
213
  # ---------------------- Streamlit App ----------------------
214
 
215
  # ---------------------- Custom CSS Injection ----------------------
@@ -221,40 +269,50 @@ st.markdown("""
221
  .chat-container {
222
  display: flex;
223
  flex-direction: column;
 
224
  }
225
 
226
  /* Style for individual chat messages */
227
  .chat-message {
228
- margin-bottom: 20px; /* Increased space between messages */
229
  }
230
 
231
  /* Style for user messages */
232
  .chat-message.user > div:first-child {
233
  color: #1E90FF; /* Dodger Blue for "You" */
234
- font-size: 1.2em;
235
- margin-bottom: 5px;
236
  }
237
 
238
  /* Style for assistant messages */
239
  .chat-message.assistant > div:first-child {
240
  color: #32CD32; /* Lime Green for "Assistant" */
241
- font-size: 1.2em;
242
- margin-bottom: 5px;
243
  }
244
 
245
  /* Style for the message content */
246
  .message-content {
247
- /* Removed the background color to maintain original background */
248
- padding: 10px;
249
- border-radius: 5px;
250
- /* Optionally, you can set a semi-transparent background or match it with your theme */
251
- /* background-color: rgba(241, 241, 241, 0.8); */
252
  }
253
 
254
- /* Optional: Add more spacing between messages */
255
- .chat-message.user, .chat-message.assistant {
256
- padding-top: 10px;
257
- padding-bottom: 10px;
 
 
 
 
 
 
 
 
 
 
 
258
  }
259
  </style>
260
  """, unsafe_allow_html=True)
@@ -316,18 +374,18 @@ if page == "Home":
316
  if submit and user_input.strip() != "":
317
  # Add user message to chat history
318
  st.session_state.chat_history.append({"role": "user", "content": user_input})
319
- print("chat history:", st.session_state.chat_history)
320
  if not st.session_state.file_ids:
321
  st.error("Please process PDFs first.")
322
  else:
323
  with st.spinner("Generating response..."):
324
  try:
325
  response, citations = chat_with_assistant(st.session_state.file_ids, user_input)
326
- # Add assistant response to chat history
327
- print("response:", response)
328
- print("citations:", citations)
329
- st.session_state.chat_history.append({"role": "assistant", "content": response+"\n\n"+"\n".join(citations)})
330
- print("chat history:", st.session_state.chat_history)
331
  except Exception as e:
332
  st.error(f"Error generating response: {str(e)}")
333
 
@@ -360,10 +418,10 @@ elif page == "Documents":
360
 
361
  if 'available_pdfs' in st.session_state:
362
  st.write(f"Total PDFs: {len(st.session_state.available_pdfs)}")
363
-
364
  # Create a DataFrame from the available PDFs
365
  df = pd.DataFrame(st.session_state.available_pdfs)
366
-
367
  # Select and rename only the specified columns
368
  df = df[['municipality', 'abbreviation', 'doc_title', 'file_title', 'file_href', 'enactment_date', 'prio']]
369
  df = df.rename(columns={
@@ -375,10 +433,10 @@ elif page == "Documents":
375
  "enactment_date": "Enactment Date",
376
  "prio": "Prio"
377
  })
378
-
379
  # Add a checkbox column to the DataFrame at the beginning
380
  df.insert(0, "Select", False)
381
-
382
  # Configure grid options
383
  gb = GridOptionsBuilder.from_dataframe(df)
384
  gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
@@ -400,10 +458,10 @@ elif page == "Documents":
400
 
401
  # Get the selected rows
402
  selected_rows = grid_response['selected_rows']
403
-
404
  # Debug: Print the structure of selected_rows
405
  st.write("Debug - Selected Rows Structure:", selected_rows)
406
-
407
  if st.button("Process Selected PDFs"):
408
  if len(selected_rows) > 0: # Check if there are any selected rows
409
  # Convert selected_rows to a DataFrame
@@ -411,14 +469,14 @@ elif page == "Documents":
411
  st.session_state.assistant_id = create_assistant()
412
  with st.spinner("Processing PDFs and creating/updating assistant..."):
413
  file_ids = []
414
-
415
  for _, pdf in st.session_state.selected_pdfs.iterrows():
416
  # Debug: Print each pdf item
417
  st.write("Debug - PDF item:", pdf)
418
-
419
  file_href = pdf['File Href']
420
  doc_title = pdf['Doc Title']
421
-
422
  # Pass doc_title to download_pdf
423
  file_name = download_pdf(file_href, doc_title)
424
  if file_name:
@@ -430,23 +488,31 @@ elif page == "Documents":
430
  st.warning(f"Failed to upload {doc_title}. Skipping this file.")
431
  else:
432
  st.warning(f"Failed to download {doc_title}. Skipping this file.")
433
-
434
  st.session_state.file_ids = file_ids
435
  st.success("PDFs processed successfully. You can now chat on the Home page.")
436
  else:
437
  st.warning("Select at least one PDF.")
438
-
439
- if st.button("Go to Home"):
440
- switch_page("Home")
441
 
442
  elif page == "Admin":
443
  st.title("Admin Panel")
444
  st.header("Vector Stores Information")
445
-
446
  vector_stores = get_vector_stores()
447
  json_vector_stores = json.dumps([vs.model_dump() for vs in vector_stores])
448
  st.write(json_vector_stores)
449
 
450
- # Add a button to go back to the main page
451
- if st.button("Back to Home"):
452
- switch_page("Home")
 
 
 
 
 
 
 
 
 
 
 
14
  import json
15
  import pandas as pd
16
  from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode
17
+ import time
18
+ import random
19
+ import aiohttp
20
+ import asyncio
21
+ from PyPDF2 import PdfWriter
22
+
23
+ load_dotenv()
24
+
25
  # ---------------------- Configuration ----------------------
26
  st.set_page_config(page_title="Building Regulations Chatbot", layout="wide", initial_sidebar_state="expanded")
27
  # Load environment variables from .env file
 
47
  if 'file_ids' not in st.session_state:
48
  st.session_state.file_ids = []
49
 
50
+
51
  # ---------------------- Helper Functions ----------------------
52
 
53
  def get_vector_stores():
 
57
  except Exception as e:
58
  return f"Error retrieving vector stores: {str(e)}"
59
 
60
+
61
  def fetch_pdfs(city_code):
62
  url = f"http://91.203.213.50:5000/oereblex/{city_code}"
63
  response = requests.get(url)
 
69
  st.error(f"Failed to fetch PDFs for city code {city_code}")
70
  return None
71
 
72
+
73
  def download_pdf(url, doc_title):
74
  # Add 'https://' scheme if it's missing
75
  if not url.startswith(('http://', 'https://')):
 
98
  st.error(f"Failed to download PDF from {url}. Error: {str(e)}")
99
  return None
100
 
101
+
102
  # Helper function to upload file to OpenAI
103
  def upload_file_to_openai(file_path):
104
  try:
 
111
  st.error(f"Failed to upload file {file_path}. Error: {str(e)}")
112
  return None
113
 
114
+
115
  def create_assistant():
116
  assistant = client.beta.assistants.create(
117
  name="Building Regulations Assistant",
 
122
  st.session_state.assistant_id = assistant.id
123
  return assistant.id
124
 
125
+
126
+ def format_response(response, citations):
127
+ """Format the response with proper markdown structure."""
128
+ formatted_text = f"""
129
+ ### Response
130
+ {response}
131
+
132
+ {"### Citations" if citations else ""}
133
+ {"".join([f"- {citation}\n" for citation in citations]) if citations else ""}
134
+ """
135
+ return formatted_text.strip()
136
+
137
+ def response_generator(response, citations):
138
+ """Generator for streaming response with structured output."""
139
+ # First yield the response header
140
+ yield "### Response\n\n"
141
+ time.sleep(0.1)
142
+
143
+ # Yield the main response word by word
144
+ words = response.split()
145
+ for i, word in enumerate(words):
146
+ yield word + " "
147
+ # Add natural pauses at punctuation
148
+ if word.endswith(('.', '!', '?', ':')):
149
+ time.sleep(0.1)
150
+ else:
151
+ time.sleep(0.05)
152
+
153
+ # If there are citations, yield them with proper formatting
154
+ if citations:
155
+ # Add some spacing before citations
156
+ yield "\n\n### Citations\n\n"
157
+ time.sleep(0.1)
158
+
159
+ for citation in citations:
160
+ yield f"- {citation}\n"
161
+ time.sleep(0.05)
162
+
163
  def chat_with_assistant(file_ids, user_message):
164
  print("----- Starting chat_with_assistant -----")
165
  print("Received file_ids:", file_ids)
166
  print("Received user_message:", user_message)
167
+
168
  # Create attachments for each file_id
169
  attachments = [{"file_id": file_id, "tools": [{"type": "file_search"}]} for file_id in file_ids]
170
  print("Attachments created:", attachments)
171
+
172
  if st.session_state.thread_id is None:
173
  print("No existing thread_id found. Creating a new thread.")
174
  thread = client.beta.threads.create(
 
184
  print("New thread created with id:", st.session_state.thread_id)
185
  else:
186
  print(f"Existing thread_id found: {st.session_state.thread_id}. Adding message to the thread.")
 
187
  message = client.beta.threads.messages.create(
188
  thread_id=st.session_state.thread_id,
189
  role="user",
 
191
  attachments=attachments
192
  )
193
  print("Message added to thread with id:", message.id)
194
+
 
 
195
  try:
196
  thread = client.beta.threads.retrieve(thread_id=st.session_state.thread_id)
197
  print("Retrieved thread:", thread)
198
  except Exception as e:
199
  print(f"Error retrieving thread with id {st.session_state.thread_id}: {e}")
200
  return "An error occurred while processing your request.", []
201
+
 
 
 
 
 
 
 
 
 
 
 
202
  try:
203
  run = client.beta.threads.runs.create_and_poll(
204
  thread_id=thread.id, assistant_id=st.session_state.assistant_id
 
207
  except Exception as e:
208
  print("Error during run creation and polling:", e)
209
  return "An error occurred while processing your request.", []
210
+
 
211
  try:
212
  messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
213
  print("Retrieved messages:", messages)
214
  except Exception as e:
215
  print("Error retrieving messages:", e)
216
  return "An error occurred while retrieving messages.", []
217
+
218
  # Process the first message content
219
  if messages and messages[0].content:
220
  message_content = messages[0].content[0].text
221
  print("Raw message content:", message_content)
222
+
223
  annotations = message_content.annotations
 
 
224
  citations = []
225
+ seen_citations = set()
226
+
227
+ # Process annotations and citations
228
  for index, annotation in enumerate(annotations):
 
229
  message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
230
  if file_citation := getattr(annotation, "file_citation", None):
231
  try:
232
  cited_file = client.files.retrieve(file_citation.file_id)
233
  citation_entry = f"[{index}] {cited_file.filename}"
234
+ if citation_entry not in seen_citations:
235
+ citations.append(citation_entry)
236
+ seen_citations.add(citation_entry)
237
  except Exception as e:
238
  print(f"Error retrieving cited file for annotation {index}: {e}")
239
+
240
+ # Create a container for the response with proper styling
241
+ response_container = st.container()
242
+ with response_container:
243
+ message_placeholder = st.empty()
244
+ streaming_content = ""
245
+
246
+ # Stream the response with structure
247
+ for chunk in response_generator(message_content.value, citations):
248
+ streaming_content += chunk
249
+ # Use markdown for proper formatting during streaming
250
+ message_placeholder.markdown(streaming_content + "▌")
251
+
252
+ # Final formatted response
253
+ final_formatted_response = format_response(message_content.value, citations)
254
+ message_placeholder.markdown(final_formatted_response)
255
+
256
+ return final_formatted_response, citations
257
  else:
 
258
  return "No response received from the assistant.", []
259
 
260
 
 
261
  # ---------------------- Streamlit App ----------------------
262
 
263
  # ---------------------- Custom CSS Injection ----------------------
 
269
  .chat-container {
270
  display: flex;
271
  flex-direction: column;
272
+ gap: 1.5rem;
273
  }
274
 
275
  /* Style for individual chat messages */
276
  .chat-message {
277
+ margin-bottom: 1.5rem;
278
  }
279
 
280
  /* Style for user messages */
281
  .chat-message.user > div:first-child {
282
  color: #1E90FF; /* Dodger Blue for "You" */
283
+ font-weight: bold;
284
+ margin-bottom: 0.5rem;
285
  }
286
 
287
  /* Style for assistant messages */
288
  .chat-message.assistant > div:first-child {
289
  color: #32CD32; /* Lime Green for "Assistant" */
290
+ font-weight: bold;
291
+ margin-bottom: 0.5rem;
292
  }
293
 
294
  /* Style for the message content */
295
  .message-content {
296
+ padding: 1rem;
297
+ border-radius: 0.5rem;
298
+ line-height: 1.5;
 
 
299
  }
300
 
301
+ .message-content h3 {
302
+ color: #444;
303
+ margin-top: 1rem;
304
+ margin-bottom: 0.5rem;
305
+ font-size: 1.1rem;
306
+ }
307
+
308
+ .message-content ul {
309
+ margin-top: 0.5rem;
310
+ margin-bottom: 0.5rem;
311
+ padding-left: 1.5rem;
312
+ }
313
+
314
+ .message-content li {
315
+ margin-bottom: 0.25rem;
316
  }
317
  </style>
318
  """, unsafe_allow_html=True)
 
374
  if submit and user_input.strip() != "":
375
  # Add user message to chat history
376
  st.session_state.chat_history.append({"role": "user", "content": user_input})
377
+
378
  if not st.session_state.file_ids:
379
  st.error("Please process PDFs first.")
380
  else:
381
  with st.spinner("Generating response..."):
382
  try:
383
  response, citations = chat_with_assistant(st.session_state.file_ids, user_input)
384
+ # The response is already formatted, so we can add it directly to chat history
385
+ st.session_state.chat_history.append({
386
+ "role": "assistant",
387
+ "content": response
388
+ })
389
  except Exception as e:
390
  st.error(f"Error generating response: {str(e)}")
391
 
 
418
 
419
  if 'available_pdfs' in st.session_state:
420
  st.write(f"Total PDFs: {len(st.session_state.available_pdfs)}")
421
+
422
  # Create a DataFrame from the available PDFs
423
  df = pd.DataFrame(st.session_state.available_pdfs)
424
+
425
  # Select and rename only the specified columns
426
  df = df[['municipality', 'abbreviation', 'doc_title', 'file_title', 'file_href', 'enactment_date', 'prio']]
427
  df = df.rename(columns={
 
433
  "enactment_date": "Enactment Date",
434
  "prio": "Prio"
435
  })
436
+
437
  # Add a checkbox column to the DataFrame at the beginning
438
  df.insert(0, "Select", False)
439
+
440
  # Configure grid options
441
  gb = GridOptionsBuilder.from_dataframe(df)
442
  gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
 
458
 
459
  # Get the selected rows
460
  selected_rows = grid_response['selected_rows']
461
+
462
  # Debug: Print the structure of selected_rows
463
  st.write("Debug - Selected Rows Structure:", selected_rows)
464
+
465
  if st.button("Process Selected PDFs"):
466
  if len(selected_rows) > 0: # Check if there are any selected rows
467
  # Convert selected_rows to a DataFrame
 
469
  st.session_state.assistant_id = create_assistant()
470
  with st.spinner("Processing PDFs and creating/updating assistant..."):
471
  file_ids = []
472
+
473
  for _, pdf in st.session_state.selected_pdfs.iterrows():
474
  # Debug: Print each pdf item
475
  st.write("Debug - PDF item:", pdf)
476
+
477
  file_href = pdf['File Href']
478
  doc_title = pdf['Doc Title']
479
+
480
  # Pass doc_title to download_pdf
481
  file_name = download_pdf(file_href, doc_title)
482
  if file_name:
 
488
  st.warning(f"Failed to upload {doc_title}. Skipping this file.")
489
  else:
490
  st.warning(f"Failed to download {doc_title}. Skipping this file.")
491
+
492
  st.session_state.file_ids = file_ids
493
  st.success("PDFs processed successfully. You can now chat on the Home page.")
494
  else:
495
  st.warning("Select at least one PDF.")
496
+
 
 
497
 
498
  elif page == "Admin":
499
  st.title("Admin Panel")
500
  st.header("Vector Stores Information")
501
+
502
  vector_stores = get_vector_stores()
503
  json_vector_stores = json.dumps([vs.model_dump() for vs in vector_stores])
504
  st.write(json_vector_stores)
505
 
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+