Spaces:

poemsforaphrodite
/

Building_Regulations_Chatbot

Sleeping

App Files Files Community

Update app.py

by Jashan1 - opened 29 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+137

-71

Files changed (1) hide show

app.py +137 -71

app.py CHANGED Viewed

@@ -14,6 +14,14 @@ from streamlit_extras.switch_page_button import switch_page
 import json
 import pandas as pd
 from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode
 # ---------------------- Configuration ----------------------
 st.set_page_config(page_title="Building Regulations Chatbot", layout="wide", initial_sidebar_state="expanded")
 # Load environment variables from .env file
@@ -39,6 +47,7 @@ if 'thread_id' not in st.session_state:
 if 'file_ids' not in st.session_state:
     st.session_state.file_ids = []
 # ---------------------- Helper Functions ----------------------
 def get_vector_stores():
@@ -48,6 +57,7 @@ def get_vector_stores():
     except Exception as e:
         return f"Error retrieving vector stores: {str(e)}"
 def fetch_pdfs(city_code):
     url = f"http://91.203.213.50:5000/oereblex/{city_code}"
     response = requests.get(url)
@@ -59,6 +69,7 @@ def fetch_pdfs(city_code):
         st.error(f"Failed to fetch PDFs for city code {city_code}")
         return None
 def download_pdf(url, doc_title):
     # Add 'https://' scheme if it's missing
     if not url.startswith(('http://', 'https://')):
@@ -87,6 +98,7 @@ def download_pdf(url, doc_title):
         st.error(f"Failed to download PDF from {url}. Error: {str(e)}")
         return None
 # Helper function to upload file to OpenAI
 def upload_file_to_openai(file_path):
     try:
@@ -99,6 +111,7 @@ def upload_file_to_openai(file_path):
         st.error(f"Failed to upload file {file_path}. Error: {str(e)}")
         return None
 def create_assistant():
     assistant = client.beta.assistants.create(
         name="Building Regulations Assistant",
@@ -109,15 +122,53 @@ def create_assistant():
     st.session_state.assistant_id = assistant.id
     return assistant.id
 def chat_with_assistant(file_ids, user_message):
     print("----- Starting chat_with_assistant -----")
     print("Received file_ids:", file_ids)
     print("Received user_message:", user_message)
     # Create attachments for each file_id
     attachments = [{"file_id": file_id, "tools": [{"type": "file_search"}]} for file_id in file_ids]
     print("Attachments created:", attachments)
     if st.session_state.thread_id is None:
         print("No existing thread_id found. Creating a new thread.")
         thread = client.beta.threads.create(
@@ -133,7 +184,6 @@ def chat_with_assistant(file_ids, user_message):
         print("New thread created with id:", st.session_state.thread_id)
     else:
         print(f"Existing thread_id found: {st.session_state.thread_id}. Adding message to the thread.")
-        # Add a message to the existing thread without updating thread_id
         message = client.beta.threads.messages.create(
             thread_id=st.session_state.thread_id,
             role="user",
@@ -141,27 +191,14 @@ def chat_with_assistant(file_ids, user_message):
             attachments=attachments
         )
         print("Message added to thread with id:", message.id)
-        # Do NOT update st.session_state.thread_id here
-    # Retrieve the thread object using the thread_id
     try:
         thread = client.beta.threads.retrieve(thread_id=st.session_state.thread_id)
         print("Retrieved thread:", thread)
     except Exception as e:
         print(f"Error retrieving thread with id {st.session_state.thread_id}: {e}")
         return "An error occurred while processing your request.", []
-    # Debugging tool resources
-    try:
-        tool_resources = thread.tool_resources.file_search
-        print("Thread tool resources (file_search):", tool_resources)
-    except AttributeError:
-        print("No tool_resources.file_search found in thread.")
-    print("Assistant ID:", st.session_state.assistant_id)
-    print("Thread ID:", thread.id)
-    # Create and poll the run
     try:
         run = client.beta.threads.runs.create_and_poll(
             thread_id=thread.id, assistant_id=st.session_state.assistant_id
@@ -170,46 +207,57 @@ def chat_with_assistant(file_ids, user_message):
     except Exception as e:
         print("Error during run creation and polling:", e)
         return "An error occurred while processing your request.", []
-    # Retrieve messages
     try:
         messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
         print("Retrieved messages:", messages)
     except Exception as e:
         print("Error retrieving messages:", e)
         return "An error occurred while retrieving messages.", []
     # Process the first message content
     if messages and messages[0].content:
         message_content = messages[0].content[0].text
         print("Raw message content:", message_content)
         annotations = message_content.annotations
-        print("Annotations found:", annotations)
         citations = []
         for index, annotation in enumerate(annotations):
-            print(f"Processing annotation {index}: {annotation.text}")
             message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
             if file_citation := getattr(annotation, "file_citation", None):
                 try:
                     cited_file = client.files.retrieve(file_citation.file_id)
                     citation_entry = f"[{index}] {cited_file.filename}"
-                    citations.append(citation_entry)
-                    print(f"Citation added: {citation_entry}")
                 except Exception as e:
                     print(f"Error retrieving cited file for annotation {index}: {e}")
-        print("Final message content after replacements:", message_content.value)
-        print("All citations:", citations)
-        print("----- Ending chat_with_assistant -----")
-        return message_content.value, citations
     else:
-        print("No messages or content found in the retrieved messages.")
         return "No response received from the assistant.", []
 # ---------------------- Streamlit App ----------------------
 # ---------------------- Custom CSS Injection ----------------------
@@ -221,40 +269,50 @@ st.markdown("""
     .chat-container {
         display: flex;
         flex-direction: column;
     }
     /* Style for individual chat messages */
     .chat-message {
-        margin-bottom: 20px; /* Increased space between messages */
     }
     /* Style for user messages */
     .chat-message.user > div:first-child {
         color: #1E90FF;  /* Dodger Blue for "You" */
-        font-size: 1.2em;
-        margin-bottom: 5px;
     }
     /* Style for assistant messages */
     .chat-message.assistant > div:first-child {
         color: #32CD32;  /* Lime Green for "Assistant" */
-        font-size: 1.2em;
-        margin-bottom: 5px;
     }
     /* Style for the message content */
     .message-content {
-        /* Removed the background color to maintain original background */
-        padding: 10px;
-        border-radius: 5px;
-        /* Optionally, you can set a semi-transparent background or match it with your theme */
-        /* background-color: rgba(241, 241, 241, 0.8); */
     }
-    /* Optional: Add more spacing between messages */
-    .chat-message.user, .chat-message.assistant {
-        padding-top: 10px;
-        padding-bottom: 10px;
     }
     </style>
     """, unsafe_allow_html=True)
@@ -316,18 +374,18 @@ if page == "Home":
     if submit and user_input.strip() != "":
         # Add user message to chat history
         st.session_state.chat_history.append({"role": "user", "content": user_input})
-        print("chat history:", st.session_state.chat_history)
         if not st.session_state.file_ids:
             st.error("Please process PDFs first.")
         else:
             with st.spinner("Generating response..."):
                 try:
                     response, citations = chat_with_assistant(st.session_state.file_ids, user_input)
-                    # Add assistant response to chat history
-                    print("response:", response)
-                    print("citations:", citations)
-                    st.session_state.chat_history.append({"role": "assistant", "content": response+"\n\n"+"\n".join(citations)})
-                    print("chat history:", st.session_state.chat_history)
                 except Exception as e:
                     st.error(f"Error generating response: {str(e)}")
@@ -360,10 +418,10 @@ elif page == "Documents":
     if 'available_pdfs' in st.session_state:
         st.write(f"Total PDFs: {len(st.session_state.available_pdfs)}")
         # Create a DataFrame from the available PDFs
         df = pd.DataFrame(st.session_state.available_pdfs)
         # Select and rename only the specified columns
         df = df[['municipality', 'abbreviation', 'doc_title', 'file_title', 'file_href', 'enactment_date', 'prio']]
         df = df.rename(columns={
@@ -375,10 +433,10 @@ elif page == "Documents":
             "enactment_date": "Enactment Date",
             "prio": "Prio"
         })
         # Add a checkbox column to the DataFrame at the beginning
         df.insert(0, "Select", False)
         # Configure grid options
         gb = GridOptionsBuilder.from_dataframe(df)
         gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
@@ -400,10 +458,10 @@ elif page == "Documents":
         # Get the selected rows
         selected_rows = grid_response['selected_rows']
         # Debug: Print the structure of selected_rows
         st.write("Debug - Selected Rows Structure:", selected_rows)
         if st.button("Process Selected PDFs"):
             if len(selected_rows) > 0:  # Check if there are any selected rows
                 # Convert selected_rows to a DataFrame
@@ -411,14 +469,14 @@ elif page == "Documents":
                 st.session_state.assistant_id = create_assistant()
                 with st.spinner("Processing PDFs and creating/updating assistant..."):
                     file_ids = []
                     for _, pdf in st.session_state.selected_pdfs.iterrows():
                         # Debug: Print each pdf item
                         st.write("Debug - PDF item:", pdf)
                         file_href = pdf['File Href']
                         doc_title = pdf['Doc Title']
                         # Pass doc_title to download_pdf
                         file_name = download_pdf(file_href, doc_title)
                         if file_name:
@@ -430,23 +488,31 @@ elif page == "Documents":
                                 st.warning(f"Failed to upload {doc_title}. Skipping this file.")
                         else:
                             st.warning(f"Failed to download {doc_title}. Skipping this file.")
                     st.session_state.file_ids = file_ids
                 st.success("PDFs processed successfully. You can now chat on the Home page.")
             else:
                 st.warning("Select at least one PDF.")
-    if st.button("Go to Home"):
-        switch_page("Home")
 elif page == "Admin":
     st.title("Admin Panel")
     st.header("Vector Stores Information")
     vector_stores = get_vector_stores()
     json_vector_stores = json.dumps([vs.model_dump() for vs in vector_stores])
     st.write(json_vector_stores)
-    # Add a button to go back to the main page
-    if st.button("Back to Home"):
-        switch_page("Home")

 import json
 import pandas as pd
 from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode
+import time
+import random
+import aiohttp
+import asyncio
+from PyPDF2 import PdfWriter
+load_dotenv()
 # ---------------------- Configuration ----------------------
 st.set_page_config(page_title="Building Regulations Chatbot", layout="wide", initial_sidebar_state="expanded")
 # Load environment variables from .env file
 if 'file_ids' not in st.session_state:
     st.session_state.file_ids = []
 # ---------------------- Helper Functions ----------------------
 def get_vector_stores():
     except Exception as e:
         return f"Error retrieving vector stores: {str(e)}"
 def fetch_pdfs(city_code):
     url = f"http://91.203.213.50:5000/oereblex/{city_code}"
     response = requests.get(url)
         st.error(f"Failed to fetch PDFs for city code {city_code}")
         return None
 def download_pdf(url, doc_title):
     # Add 'https://' scheme if it's missing
     if not url.startswith(('http://', 'https://')):
         st.error(f"Failed to download PDF from {url}. Error: {str(e)}")
         return None
 # Helper function to upload file to OpenAI
 def upload_file_to_openai(file_path):
     try:
         st.error(f"Failed to upload file {file_path}. Error: {str(e)}")
         return None
 def create_assistant():
     assistant = client.beta.assistants.create(
         name="Building Regulations Assistant",
     st.session_state.assistant_id = assistant.id
     return assistant.id
+def format_response(response, citations):
+    """Format the response with proper markdown structure."""
+    formatted_text = f"""
+### Response
+{response}
+{"### Citations" if citations else ""}
+{"".join([f"- {citation}\n" for citation in citations]) if citations else ""}
+"""
+    return formatted_text.strip()
+def response_generator(response, citations):
+    """Generator for streaming response with structured output."""
+    # First yield the response header
+    yield "### Response\n\n"
+    time.sleep(0.1)
+    # Yield the main response word by word
+    words = response.split()
+    for i, word in enumerate(words):
+        yield word + " "
+        # Add natural pauses at punctuation
+        if word.endswith(('.', '!', '?', ':')):
+            time.sleep(0.1)
+        else:
+            time.sleep(0.05)
+    # If there are citations, yield them with proper formatting
+    if citations:
+        # Add some spacing before citations
+        yield "\n\n### Citations\n\n"
+        time.sleep(0.1)
+        for citation in citations:
+            yield f"- {citation}\n"
+            time.sleep(0.05)
 def chat_with_assistant(file_ids, user_message):
     print("----- Starting chat_with_assistant -----")
     print("Received file_ids:", file_ids)
     print("Received user_message:", user_message)
     # Create attachments for each file_id
     attachments = [{"file_id": file_id, "tools": [{"type": "file_search"}]} for file_id in file_ids]
     print("Attachments created:", attachments)
     if st.session_state.thread_id is None:
         print("No existing thread_id found. Creating a new thread.")
         thread = client.beta.threads.create(
         print("New thread created with id:", st.session_state.thread_id)
     else:
         print(f"Existing thread_id found: {st.session_state.thread_id}. Adding message to the thread.")
         message = client.beta.threads.messages.create(
             thread_id=st.session_state.thread_id,
             role="user",
             attachments=attachments
         )
         print("Message added to thread with id:", message.id)
     try:
         thread = client.beta.threads.retrieve(thread_id=st.session_state.thread_id)
         print("Retrieved thread:", thread)
     except Exception as e:
         print(f"Error retrieving thread with id {st.session_state.thread_id}: {e}")
         return "An error occurred while processing your request.", []
     try:
         run = client.beta.threads.runs.create_and_poll(
             thread_id=thread.id, assistant_id=st.session_state.assistant_id
     except Exception as e:
         print("Error during run creation and polling:", e)
         return "An error occurred while processing your request.", []
     try:
         messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
         print("Retrieved messages:", messages)
     except Exception as e:
         print("Error retrieving messages:", e)
         return "An error occurred while retrieving messages.", []
     # Process the first message content
     if messages and messages[0].content:
         message_content = messages[0].content[0].text
         print("Raw message content:", message_content)
         annotations = message_content.annotations
         citations = []
+        seen_citations = set()
+        # Process annotations and citations
         for index, annotation in enumerate(annotations):
             message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
             if file_citation := getattr(annotation, "file_citation", None):
                 try:
                     cited_file = client.files.retrieve(file_citation.file_id)
                     citation_entry = f"[{index}] {cited_file.filename}"
+                    if citation_entry not in seen_citations:
+                        citations.append(citation_entry)
+                        seen_citations.add(citation_entry)
                 except Exception as e:
                     print(f"Error retrieving cited file for annotation {index}: {e}")
+        # Create a container for the response with proper styling
+        response_container = st.container()
+        with response_container:
+            message_placeholder = st.empty()
+            streaming_content = ""
+            # Stream the response with structure
+            for chunk in response_generator(message_content.value, citations):
+                streaming_content += chunk
+                # Use markdown for proper formatting during streaming
+                message_placeholder.markdown(streaming_content + "▌")
+            # Final formatted response
+            final_formatted_response = format_response(message_content.value, citations)
+            message_placeholder.markdown(final_formatted_response)
+            return final_formatted_response, citations
     else:
         return "No response received from the assistant.", []
 # ---------------------- Streamlit App ----------------------
 # ---------------------- Custom CSS Injection ----------------------
     .chat-container {
         display: flex;
         flex-direction: column;
+        gap: 1.5rem;
     }
     /* Style for individual chat messages */
     .chat-message {
+        margin-bottom: 1.5rem;
     }
     /* Style for user messages */
     .chat-message.user > div:first-child {
         color: #1E90FF;  /* Dodger Blue for "You" */
+        font-weight: bold;
+        margin-bottom: 0.5rem;
     }
     /* Style for assistant messages */
     .chat-message.assistant > div:first-child {
         color: #32CD32;  /* Lime Green for "Assistant" */
+        font-weight: bold;
+        margin-bottom: 0.5rem;
     }
     /* Style for the message content */
     .message-content {
+        padding: 1rem;
+        border-radius: 0.5rem;
+        line-height: 1.5;
     }
+    .message-content h3 {
+        color: #444;
+        margin-top: 1rem;
+        margin-bottom: 0.5rem;
+        font-size: 1.1rem;
+    }
+    .message-content ul {
+        margin-top: 0.5rem;
+        margin-bottom: 0.5rem;
+        padding-left: 1.5rem;
+    }
+    .message-content li {
+        margin-bottom: 0.25rem;
     }
     </style>
     """, unsafe_allow_html=True)
     if submit and user_input.strip() != "":
         # Add user message to chat history
         st.session_state.chat_history.append({"role": "user", "content": user_input})
         if not st.session_state.file_ids:
             st.error("Please process PDFs first.")
         else:
             with st.spinner("Generating response..."):
                 try:
                     response, citations = chat_with_assistant(st.session_state.file_ids, user_input)
+                    # The response is already formatted, so we can add it directly to chat history
+                    st.session_state.chat_history.append({
+                        "role": "assistant",
+                        "content": response
+                    })
                 except Exception as e:
                     st.error(f"Error generating response: {str(e)}")
     if 'available_pdfs' in st.session_state:
         st.write(f"Total PDFs: {len(st.session_state.available_pdfs)}")
         # Create a DataFrame from the available PDFs
         df = pd.DataFrame(st.session_state.available_pdfs)
         # Select and rename only the specified columns
         df = df[['municipality', 'abbreviation', 'doc_title', 'file_title', 'file_href', 'enactment_date', 'prio']]
         df = df.rename(columns={
             "enactment_date": "Enactment Date",
             "prio": "Prio"
         })
         # Add a checkbox column to the DataFrame at the beginning
         df.insert(0, "Select", False)
         # Configure grid options
         gb = GridOptionsBuilder.from_dataframe(df)
         gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
         # Get the selected rows
         selected_rows = grid_response['selected_rows']
         # Debug: Print the structure of selected_rows
         st.write("Debug - Selected Rows Structure:", selected_rows)
         if st.button("Process Selected PDFs"):
             if len(selected_rows) > 0:  # Check if there are any selected rows
                 # Convert selected_rows to a DataFrame
                 st.session_state.assistant_id = create_assistant()
                 with st.spinner("Processing PDFs and creating/updating assistant..."):
                     file_ids = []
                     for _, pdf in st.session_state.selected_pdfs.iterrows():
                         # Debug: Print each pdf item
                         st.write("Debug - PDF item:", pdf)
                         file_href = pdf['File Href']
                         doc_title = pdf['Doc Title']
                         # Pass doc_title to download_pdf
                         file_name = download_pdf(file_href, doc_title)
                         if file_name:
                                 st.warning(f"Failed to upload {doc_title}. Skipping this file.")
                         else:
                             st.warning(f"Failed to download {doc_title}. Skipping this file.")
                     st.session_state.file_ids = file_ids
                 st.success("PDFs processed successfully. You can now chat on the Home page.")
             else:
                 st.warning("Select at least one PDF.")
 elif page == "Admin":
     st.title("Admin Panel")
     st.header("Vector Stores Information")
     vector_stores = get_vector_stores()
     json_vector_stores = json.dumps([vs.model_dump() for vs in vector_stores])
     st.write(json_vector_stores)