Spaces:

acecalisto3
/

CEEMEESEEK

Runtime error

App Files Files Community

acecalisto3 commited on Oct 6

Commit

deaafee

•

1 Parent(s): ab65de3

Update app.py

Browse files

Files changed (1) hide show

app.py +360 -148

app.py CHANGED Viewed

@@ -53,6 +53,7 @@ STOP_THREADS = False  # Flag to stop scraping threads
 def get_db_connection():
     """
     Establishes and returns a MySQL database connection using environment variables.
     """
     try:
         connection = mysql.connector.connect(
@@ -66,11 +67,11 @@ def get_db_connection():
             return connection
     except mysql.connector.Error as err:
         if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
-            logging.error("Invalid database credentials.")
         elif err.errno == errorcode.ER_BAD_DB_ERROR:
-            logging.error("Database does not exist.")
         else:
-            logging.error(err)
     return None
 # Initialize Database
@@ -80,7 +81,7 @@ def initialize_database():
     """
     connection = get_db_connection()
     if connection is None:
-        logging.error("Failed to connect to the database. Initialization aborted.")
         return
     cursor = connection.cursor()
@@ -124,7 +125,7 @@ def monitor_urls(
     selector: str = None,
 ):
     """
-    Monitors the specified URLs for changes and logs any detected changes to the database.
     """
     global HISTORY, STOP_THREADS
     previous_hashes = {url: "" for url in urls}
@@ -171,21 +172,28 @@ def monitor_urls(
                         date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                         HISTORY.append(f"Change detected at {url} on {date_time_str}")
-                        # Insert change into MySQL database
                         connection = get_db_connection()
                         if connection:
-                            cursor = connection.cursor()
-                            insert_query = """
-                            INSERT INTO scraped_data (url, content_hash, change_detected)
-                            VALUES (%s, %s, %s)
-                            """
-                            cursor.execute(insert_query, (url, current_hash, date_time_str))
-                            connection.commit()
-                            cursor.close()
-                            connection.close()
-                            logging.info(f"Change detected at {url} on {date_time_str} and logged to database.")
                         else:
-                            logging.error("Failed to connect to database. Change not logged.")
                 except (
                     NoSuchElementException,
@@ -199,6 +207,33 @@ def monitor_urls(
         driver.quit()
         logging.info("ChromeDriver session ended.")
 # Function to create WebDriver
 def create_driver(options: Options) -> webdriver.Chrome:
     """
@@ -296,21 +331,28 @@ def start_scraping(
             if initial_hash:
                 HISTORY.append(f"Initial observation at {url}: {initial_hash}")
-                # Insert initial observation into MySQL database
                 connection = get_db_connection()
                 if connection:
-                    cursor = connection.cursor()
-                    insert_query = """
-                    INSERT INTO scraped_data (url, content_hash, change_detected)
-                    VALUES (%s, %s, %s)
-                    """
-                    cursor.execute(insert_query, (url, initial_hash, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
-                    connection.commit()
-                    cursor.close()
-                    connection.close()
-                    logging.info(f"Initial observation logged for {url}")
                 else:
-                    logging.error("Failed to connect to database. Initial observation not logged.")
         except Exception as e:
             HISTORY.append(f"Error accessing {url}: {e}")
@@ -339,91 +381,157 @@ def stop_scraping() -> str:
     logging.info("Scraping stop signal sent.")
     return "Scraping has been stopped."
-# Function to display CSV content from MySQL
 def display_csv(storage_location: str, url: str) -> str:
     """
-    Fetches and returns the scraped data for a given URL from the MySQL database.
     """
     try:
         connection = get_db_connection()
-        if not connection:
-            return "Failed to connect to the database."
-        cursor = connection.cursor(dictionary=True)
-        query = "SELECT * FROM scraped_data WHERE url = %s ORDER BY change_detected DESC"
-        cursor.execute(query, (url,))
-        results = cursor.fetchall()
-        if not results:
-            return "No data available for the selected URL."
-        df = pd.DataFrame(results)
-        cursor.close()
-        connection.close()
-        return df.to_string(index=False)
     except Exception as e:
         logging.error(f"Error fetching data for {url}: {e}")
         return f"Error fetching data for {url}: {e}"
-# Function to generate RSS feed from MySQL data
 def generate_rss_feed(storage_location: str, url: str) -> str:
     """
-    Generates an RSS feed for the latest changes detected on a given URL from the MySQL database.
     """
     try:
         connection = get_db_connection()
-        if not connection:
-            return "Failed to connect to the database."
-        cursor = connection.cursor(dictionary=True)
-        query = "SELECT * FROM scraped_data WHERE url = %s ORDER BY change_detected DESC LIMIT 10"
-        cursor.execute(query, (url,))
-        results = cursor.fetchall()
-        if not results:
-            return "No changes detected to include in RSS feed."
-        # Create the root RSS element
-        rss = ET.Element("rss", version="2.0")
-        channel = ET.SubElement(rss, "channel")
-        # Add channel elements
-        title = ET.SubElement(channel, "title")
-        title.text = f"RSS Feed for {urlparse(url).hostname}"
-        link = ET.SubElement(channel, "link")
-        link.text = url
-        description = ET.SubElement(channel, "description")
-        description.text = "Recent changes detected on the website."
-        # Add items to the feed
-        for row in results:
-            item = ET.SubElement(channel, "item")
-            item_title = ET.SubElement(item, "title")
-            item_title.text = f"Change detected at {row['url']}"
-            item_link = ET.SubElement(item, "link")
-            item_link.text = row["url"]
-            item_description = ET.SubElement(item, "description")
-            item_description.text = f"Content changed on {row['change_detected']}"
-            pub_date = ET.SubElement(item, "pubDate")
-            pub_date.text = datetime.datetime.strptime(
-                str(row['change_detected']), "%Y-%m-%d %H:%M:%S"
-            ).strftime("%a, %d %b %Y %H:%M:%S +0000")
-        # Generate the XML string
-        rss_feed = ET.tostring(rss, encoding="utf-8", method="xml")
-        return rss_feed.decode("utf-8")
     except Exception as e:
         logging.error(f"Error generating RSS feed for {url}: {e}")
         return f"Error generating RSS feed for {url}: {e}"
-    finally:
-        cursor.close()
-        connection.close()
 # Function to load the Mistral model
 def load_model():
@@ -500,20 +608,62 @@ def filter_data(column: str, words: list) -> str:
     Saves the filtered data to a new CSV file.
     """
     try:
-        latest_csv = get_latest_csv()
-        if not latest_csv:
             return "No CSV files found to filter."
         df = pd.read_csv(latest_csv)
-        # Create a regex pattern to match any of the words
-        pattern = '|'.join(words)
-        filtered_df = df[df[column].astype(str).str.contains(pattern, case=False, na=False)]
         if filtered_df.empty:
             return f"No records found with words {words} in column '{column}'."
         # Save the filtered data to a new CSV
-        filtered_csv = latest_csv.replace(".csv", "_filtered.csv")
         filtered_df.to_csv(filtered_csv, index=False)
         logging.info(f"Data filtered on column '{column}' for words {words}.")
         return f"Data filtered and saved to {filtered_csv}."
@@ -527,16 +677,56 @@ def sort_data(column: str, order: str) -> str:
     Saves the sorted data to a new CSV file.
     """
     try:
-        latest_csv = get_latest_csv()
-        if not latest_csv:
             return "No CSV files found to sort."
         df = pd.read_csv(latest_csv)
         ascending = True if order.lower() == "ascending" else False
         sorted_df = df.sort_values(by=column, ascending=ascending)
         # Save the sorted data to a new CSV
-        sorted_csv = latest_csv.replace(".csv", f"_sorted_{order.lower()}.csv")
         sorted_df.to_csv(sorted_csv, index=False)
         logging.info(f"Data sorted on column '{column}' in {order} order.")
         return f"Data sorted and saved to {sorted_csv}."
@@ -549,12 +739,40 @@ def export_csv(filename: str) -> str:
     Exports the latest scraped data to a specified CSV filename.
     """
     try:
-        latest_csv = get_latest_csv()
-        if not latest_csv:
             return "No CSV files found to export."
-        export_path = os.path.join(os.path.dirname(latest_csv), filename)
         df = pd.read_csv(latest_csv)
         df.to_csv(export_path, index=False)
         logging.info(f"Data exported to {export_path}.")
         return f"Data exported to {export_path}."
@@ -564,50 +782,68 @@ def export_csv(filename: str) -> str:
 def log_action(action: str) -> str:
     """
-    Logs a custom action message to the MySQL database.
     """
     try:
         connection = get_db_connection()
-        if not connection:
-            return "Failed to connect to the database."
-        cursor = connection.cursor()
-        insert_query = """
-        INSERT INTO action_logs (action)
-        VALUES (%s)
-        """
-        cursor.execute(insert_query, (action,))
-        connection.commit()
-        cursor.close()
-        connection.close()
-        HISTORY.append(f"User Action Logged: {action}")
-        logging.info(f"Action logged: {action}")
-        return f"Action logged: {action}"
     except Exception as e:
         logging.error(f"Error logging action: {e}")
         return f"Error logging action: {e}"
 def get_latest_csv() -> str:
     """
     Retrieves the latest CSV file from the storage directory based on modification time.
     """
     try:
-        storage_dirs = [d for d in os.listdir(DEFAULT_FILE_PATH) if os.path.isdir(os.path.join(DEFAULT_FILE_PATH, d))]
-        if not storage_dirs:
             return None
-        latest_csv = None
-        latest_time = 0
-        for dir_name in storage_dirs:
-            dir_path = os.path.join(DEFAULT_FILE_PATH, dir_name)
-            csv_files = [f for f in os.listdir(dir_path) if f.endswith("_changes.csv") or f.endswith("_filtered.csv") or f.endswith("_sorted_asc.csv") or f.endswith("_sorted_desc.csv")]
-            for csv_file in csv_files:
-                csv_path = os.path.join(dir_path, csv_file)
-                mod_time = os.path.getmtime(csv_path)
-                if mod_time > latest_time:
-                    latest_time = mod_time
-                    latest_csv = csv_path
         return latest_csv
     except Exception as e:
         logging.error(f"Error retrieving latest CSV: {e}")
@@ -658,30 +894,6 @@ def respond(
         logging.error(f"Error generating response: {e}")
         return "Error generating response."
-# Function to load the Mistral model
-def load_model():
-    """
-    Loads the Mistral model and tokenizer once and returns the pipeline.
-    """
-    model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            device=0 if torch.cuda.is_available() else -1,
-        )
-        logging.info("Mistral model loaded successfully.")
-        return pipe
-    except Exception as e:
-        logging.error(f"Error loading Mistral model: {e}")
-        return None
-# Load the model once at the start
-chat_pipeline = load_model()
 # Define the Gradio interface
 def create_interface() -> gr.Blocks:
     """

 def get_db_connection():
     """
     Establishes and returns a MySQL database connection using environment variables.
+    Returns None if connection fails.
     """
     try:
         connection = mysql.connector.connect(
             return connection
     except mysql.connector.Error as err:
         if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
+            logging.warning("Invalid database credentials. Falling back to CSV storage.")
         elif err.errno == errorcode.ER_BAD_DB_ERROR:
+            logging.warning("Database does not exist. Falling back to CSV storage.")
         else:
+            logging.warning(f"MySQL connection error: {err}. Falling back to CSV storage.")
     return None
 # Initialize Database
     """
     connection = get_db_connection()
     if connection is None:
+        logging.info("Database initialization skipped. Using CSV storage.")
         return
     cursor = connection.cursor()
     selector: str = None,
 ):
     """
+    Monitors the specified URLs for changes and logs any detected changes to the database or CSV.
     """
     global HISTORY, STOP_THREADS
     previous_hashes = {url: "" for url in urls}
                         date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                         HISTORY.append(f"Change detected at {url} on {date_time_str}")
+                        # Attempt to log to database
                         connection = get_db_connection()
                         if connection:
+                            try:
+                                cursor = connection.cursor()
+                                insert_query = """
+                                INSERT INTO scraped_data (url, content_hash, change_detected)
+                                VALUES (%s, %s, %s)
+                                """
+                                cursor.execute(insert_query, (url, current_hash, date_time_str))
+                                connection.commit()
+                                logging.info(f"Change detected at {url} on {date_time_str} and logged to database.")
+                            except mysql.connector.Error as err:
+                                logging.error(f"Error inserting data into database: {err}")
+                                # Fallback to CSV
+                                log_to_csv(storage_location, url, current_hash, date_time_str)
+                            finally:
+                                cursor.close()
+                                connection.close()
                         else:
+                            # Fallback to CSV
+                            log_to_csv(storage_location, url, current_hash, date_time_str)
                 except (
                     NoSuchElementException,
         driver.quit()
         logging.info("ChromeDriver session ended.")
+def log_to_csv(storage_location: str, url: str, content_hash: str, change_detected: str):
+    """
+    Logs the change to a CSV file in the storage_location.
+    """
+    try:
+        os.makedirs(storage_location, exist_ok=True)
+        csv_file_path = os.path.join(storage_location, f"{urlparse(url).hostname}_changes.csv")
+        file_exists = os.path.isfile(csv_file_path)
+        with open(csv_file_path, "a", newline="", encoding="utf-8") as csvfile:
+            fieldnames = ["date", "time", "url", "content_hash", "change"]
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            if not file_exists:
+                writer.writeheader()
+            writer.writerow(
+                {
+                    "date": change_detected.split()[0],
+                    "time": change_detected.split()[1],
+                    "url": url,
+                    "content_hash": content_hash,
+                    "change": "Content changed",
+                }
+            )
+        logging.info(f"Change detected at {url} on {change_detected} and logged to CSV.")
+    except Exception as e:
+        logging.error(f"Error logging data to CSV: {e}")
 # Function to create WebDriver
 def create_driver(options: Options) -> webdriver.Chrome:
     """
             if initial_hash:
                 HISTORY.append(f"Initial observation at {url}: {initial_hash}")
+                # Attempt to log to database
                 connection = get_db_connection()
                 if connection:
+                    try:
+                        cursor = connection.cursor()
+                        insert_query = """
+                        INSERT INTO scraped_data (url, content_hash, change_detected)
+                        VALUES (%s, %s, %s)
+                        """
+                        cursor.execute(insert_query, (url, initial_hash, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
+                        connection.commit()
+                        logging.info(f"Initial observation logged for {url} in database.")
+                    except mysql.connector.Error as err:
+                        logging.error(f"Error inserting initial observation into database: {err}")
+                        # Fallback to CSV
+                        log_to_csv(storage_location, url, initial_hash, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+                    finally:
+                        cursor.close()
+                        connection.close()
                 else:
+                    # Fallback to CSV
+                    log_to_csv(storage_location, url, initial_hash, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
         except Exception as e:
             HISTORY.append(f"Error accessing {url}: {e}")
     logging.info("Scraping stop signal sent.")
     return "Scraping has been stopped."
+# Function to display CSV content from MySQL or CSV
 def display_csv(storage_location: str, url: str) -> str:
     """
+    Fetches and returns the scraped data for a given URL from the MySQL database or CSV.
     """
     try:
         connection = get_db_connection()
+        if connection:
+            try:
+                cursor = connection.cursor(dictionary=True)
+                query = "SELECT * FROM scraped_data WHERE url = %s ORDER BY change_detected DESC"
+                cursor.execute(query, (url,))
+                results = cursor.fetchall()
+                if not results:
+                    return "No data available for the selected URL."
+                df = pd.DataFrame(results)
+                cursor.close()
+                connection.close()
+                return df.to_string(index=False)
+            except mysql.connector.Error as err:
+                logging.error(f"Error fetching data from database: {err}")
+                # Fallback to CSV
+        else:
+            logging.info("No database connection. Fetching data from CSV.")
+        # Fallback to CSV
+        hostname = urlparse(url).hostname
+        csv_path = os.path.join(storage_location, f"{hostname}_changes.csv")
+        if os.path.exists(csv_path):
+            df = pd.read_csv(csv_path)
+            return df.to_string(index=False)
+        else:
+            return "No data available."
     except Exception as e:
         logging.error(f"Error fetching data for {url}: {e}")
         return f"Error fetching data for {url}: {e}"
+# Function to generate RSS feed from MySQL or CSV data
 def generate_rss_feed(storage_location: str, url: str) -> str:
     """
+    Generates an RSS feed for the latest changes detected on a given URL from the MySQL database or CSV.
     """
     try:
         connection = get_db_connection()
+        rss_feed = ""
+        if connection:
+            try:
+                cursor = connection.cursor(dictionary=True)
+                query = "SELECT * FROM scraped_data WHERE url = %s ORDER BY change_detected DESC LIMIT 10"
+                cursor.execute(query, (url,))
+                results = cursor.fetchall()
+                if not results:
+                    return "No changes detected to include in RSS feed."
+                # Create the root RSS element
+                rss = ET.Element("rss", version="2.0")
+                channel = ET.SubElement(rss, "channel")
+                # Add channel elements
+                title = ET.SubElement(channel, "title")
+                title.text = f"RSS Feed for {urlparse(url).hostname}"
+                link = ET.SubElement(channel, "link")
+                link.text = url
+                description = ET.SubElement(channel, "description")
+                description.text = "Recent changes detected on the website."
+                # Add items to the feed
+                for row in results:
+                    item = ET.SubElement(channel, "item")
+                    item_title = ET.SubElement(item, "title")
+                    item_title.text = f"Change detected at {row['url']}"
+                    item_link = ET.SubElement(item, "link")
+                    item_link.text = row["url"]
+                    item_description = ET.SubElement(item, "description")
+                    item_description.text = f"Content changed on {row['change_detected']}"
+                    pub_date = ET.SubElement(item, "pubDate")
+                    pub_date.text = datetime.datetime.strptime(
+                        str(row['change_detected']), "%Y-%m-%d %H:%M:%S"
+                    ).strftime("%a, %d %b %Y %H:%M:%S +0000")
+                # Generate the XML string
+                rss_feed = ET.tostring(rss, encoding="utf-8", method="xml").decode("utf-8")
+                cursor.close()
+                connection.close()
+                return rss_feed
+            except mysql.connector.Error as err:
+                logging.error(f"Error fetching data from database: {err}")
+                # Fallback to CSV
+        else:
+            logging.info("No database connection. Generating RSS feed from CSV.")
+        # Fallback to CSV
+        hostname = urlparse(url).hostname
+        csv_path = os.path.join(storage_location, f"{hostname}_changes.csv")
+        if os.path.exists(csv_path):
+            df = pd.read_csv(csv_path).tail(10)
+            if df.empty:
+                return "No changes detected to include in RSS feed."
+            # Create the root RSS element
+            rss = ET.Element("rss", version="2.0")
+            channel = ET.SubElement(rss, "channel")
+            # Add channel elements
+            title = ET.SubElement(channel, "title")
+            title.text = f"RSS Feed for {hostname}"
+            link = ET.SubElement(channel, "link")
+            link.text = url
+            description = ET.SubElement(channel, "description")
+            description.text = "Recent changes detected on the website."
+            # Add items to the feed
+            for _, row in df.iterrows():
+                item = ET.SubElement(channel, "item")
+                item_title = ET.SubElement(item, "title")
+                item_title.text = f"Change detected at {row['url']}"
+                item_link = ET.SubElement(item, "link")
+                item_link.text = row["url"]
+                item_description = ET.SubElement(item, "description")
+                item_description.text = f"Content changed on {row['date']} at {row['time']}"
+                pub_date = ET.SubElement(item, "pubDate")
+                pub_date.text = datetime.datetime.strptime(
+                    f"{row['date']} {row['time']}", "%Y-%m-%d %H:%M:%S"
+                ).strftime("%a, %d %b %Y %H:%M:%S +0000")
+            # Generate the XML string
+            rss_feed = ET.tostring(rss, encoding="utf-8", method="xml").decode("utf-8")
+            return rss_feed
+        else:
+            return "No data available."
     except Exception as e:
         logging.error(f"Error generating RSS feed for {url}: {e}")
         return f"Error generating RSS feed for {url}: {e}"
 # Function to load the Mistral model
 def load_model():
     Saves the filtered data to a new CSV file.
     """
     try:
+        storage_location = DEFAULT_FILE_PATH
+        url = ""  # Placeholder since filtering isn't URL-specific here
+        connection = get_db_connection()
+        if connection:
+            try:
+                cursor = connection.cursor(dictionary=True)
+                # Fetch all data
+                query = "SELECT * FROM scraped_data"
+                cursor.execute(query)
+                results = cursor.fetchall()
+                if not results:
+                    return "No data available to filter."
+                df = pd.DataFrame(results)
+                # Create a regex pattern to match any of the words
+                pattern = '|'.join(words)
+                if column not in df.columns:
+                    return f"Column '{column}' does not exist in the data."
+                filtered_df = df[df[column].astype(str).str.contains(pattern, case=False, na=False)]
+                if filtered_df.empty:
+                    return f"No records found with words {words} in column '{column}'."
+                # Save the filtered data to a new CSV
+                filtered_csv = os.path.join(storage_location, f"filtered_data_{int(time.time())}.csv")
+                filtered_df.to_csv(filtered_csv, index=False)
+                logging.info(f"Data filtered on column '{column}' for words {words}.")
+                return f"Data filtered and saved to {filtered_csv}."
+            except mysql.connector.Error as err:
+                logging.error(f"Error fetching data from database: {err}")
+                # Fallback to CSV
+        else:
+            logging.info("No database connection. Filtering data from CSV.")
+        # Fallback to CSV
+        csv_files = [f for f in os.listdir(storage_location) if f.endswith("_changes.csv") or f.endswith("_filtered.csv") or f.endswith("_sorted_asc.csv") or f.endswith("_sorted_desc.csv")]
+        if not csv_files:
             return "No CSV files found to filter."
+        # Assume the latest CSV is the target
+        latest_csv = max([os.path.join(storage_location, f) for f in csv_files], key=os.path.getmtime)
         df = pd.read_csv(latest_csv)
+        if column not in df.columns:
+            return f"Column '{column}' does not exist in the data."
+        filtered_df = df[df[column].astype(str).str.contains('|'.join(words), case=False, na=False)]
         if filtered_df.empty:
             return f"No records found with words {words} in column '{column}'."
         # Save the filtered data to a new CSV
+        filtered_csv = latest_csv.replace(".csv", f"_filtered_{int(time.time())}.csv")
         filtered_df.to_csv(filtered_csv, index=False)
         logging.info(f"Data filtered on column '{column}' for words {words}.")
         return f"Data filtered and saved to {filtered_csv}."
     Saves the sorted data to a new CSV file.
     """
     try:
+        storage_location = DEFAULT_FILE_PATH
+        url = ""  # Placeholder since sorting isn't URL-specific here
+        connection = get_db_connection()
+        if connection:
+            try:
+                cursor = connection.cursor(dictionary=True)
+                # Fetch all data
+                query = "SELECT * FROM scraped_data"
+                cursor.execute(query)
+                results = cursor.fetchall()
+                if not results:
+                    return "No data available to sort."
+                df = pd.DataFrame(results)
+                if column not in df.columns:
+                    return f"Column '{column}' does not exist in the data."
+                ascending = True if order.lower() == "ascending" else False
+                sorted_df = df.sort_values(by=column, ascending=ascending)
+                # Save the sorted data to a new CSV
+                sorted_csv = os.path.join(storage_location, f"sorted_data_{column}_{order.lower()}_{int(time.time())}.csv")
+                sorted_df.to_csv(sorted_csv, index=False)
+                logging.info(f"Data sorted on column '{column}' in {order} order.")
+                return f"Data sorted and saved to {sorted_csv}."
+            except mysql.connector.Error as err:
+                logging.error(f"Error fetching data from database: {err}")
+                # Fallback to CSV
+        else:
+            logging.info("No database connection. Sorting data from CSV.")
+        # Fallback to CSV
+        csv_files = [f for f in os.listdir(storage_location) if f.endswith("_changes.csv") or f.endswith("_filtered.csv") or f.endswith("_sorted_asc.csv") or f.endswith("_sorted_desc.csv")]
+        if not csv_files:
             return "No CSV files found to sort."
+        # Assume the latest CSV is the target
+        latest_csv = max([os.path.join(storage_location, f) for f in csv_files], key=os.path.getmtime)
         df = pd.read_csv(latest_csv)
+        if column not in df.columns:
+            return f"Column '{column}' does not exist in the data."
         ascending = True if order.lower() == "ascending" else False
         sorted_df = df.sort_values(by=column, ascending=ascending)
         # Save the sorted data to a new CSV
+        sorted_csv = latest_csv.replace(".csv", f"_sorted_{order.lower()}_{int(time.time())}.csv")
         sorted_df.to_csv(sorted_csv, index=False)
         logging.info(f"Data sorted on column '{column}' in {order} order.")
         return f"Data sorted and saved to {sorted_csv}."
     Exports the latest scraped data to a specified CSV filename.
     """
     try:
+        storage_location = DEFAULT_FILE_PATH
+        connection = get_db_connection()
+        if connection:
+            try:
+                cursor = connection.cursor(dictionary=True)
+                # Fetch all data
+                query = "SELECT * FROM scraped_data"
+                cursor.execute(query)
+                results = cursor.fetchall()
+                if not results:
+                    return "No data available to export."
+                df = pd.DataFrame(results)
+                export_path = os.path.join(storage_location, filename)
+                df.to_csv(export_path, index=False)
+                logging.info(f"Data exported to {export_path}.")
+                return f"Data exported to {export_path}."
+            except mysql.connector.Error as err:
+                logging.error(f"Error exporting data from database: {err}")
+                # Fallback to CSV
+        else:
+            logging.info("No database connection. Exporting data from CSV.")
+        # Fallback to CSV
+        csv_files = [f for f in os.listdir(storage_location) if f.endswith("_changes.csv") or f.endswith("_filtered.csv") or f.endswith("_sorted_asc.csv") or f.endswith("_sorted_desc.csv")]
+        if not csv_files:
             return "No CSV files found to export."
+        # Assume the latest CSV is the target
+        latest_csv = max([os.path.join(storage_location, f) for f in csv_files], key=os.path.getmtime)
         df = pd.read_csv(latest_csv)
+        export_path = os.path.join(storage_location, filename)
         df.to_csv(export_path, index=False)
         logging.info(f"Data exported to {export_path}.")
         return f"Data exported to {export_path}."
 def log_action(action: str) -> str:
     """
+    Logs a custom action message to the MySQL database or CSV.
     """
     try:
         connection = get_db_connection()
+        if connection:
+            try:
+                cursor = connection.cursor()
+                insert_query = """
+                INSERT INTO action_logs (action)
+                VALUES (%s)
+                """
+                cursor.execute(insert_query, (action,))
+                connection.commit()
+                logging.info(f"Action logged in database: {action}")
+                cursor.close()
+                connection.close()
+                return f"Action logged: {action}"
+            except mysql.connector.Error as err:
+                logging.error(f"Error logging action to database: {err}")
+                # Fallback to CSV
+        else:
+            logging.info("No database connection. Logging action to CSV.")
+        # Fallback to CSV
+        storage_location = DEFAULT_FILE_PATH
+        try:
+            os.makedirs(storage_location, exist_ok=True)
+            csv_file_path = os.path.join(storage_location, "action_logs.csv")
+            file_exists = os.path.isfile(csv_file_path)
+            with open(csv_file_path, "a", newline="", encoding="utf-8") as csvfile:
+                fieldnames = ["timestamp", "action"]
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                if not file_exists:
+                    writer.writeheader()
+                writer.writerow(
+                    {
+                        "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        "action": action,
+                    }
+                )
+            logging.info(f"Action logged to CSV: {action}")
+            return f"Action logged: {action}"
+        except Exception as e:
+            logging.error(f"Error logging action to CSV: {e}")
+            return f"Error logging action: {e}"
     except Exception as e:
         logging.error(f"Error logging action: {e}")
         return f"Error logging action: {e}"
+# Function to get the latest CSV file based on modification time
 def get_latest_csv() -> str:
     """
     Retrieves the latest CSV file from the storage directory based on modification time.
     """
     try:
+        storage_location = DEFAULT_FILE_PATH
+        csv_files = [f for f in os.listdir(storage_location) if f.endswith(".csv")]
+        if not csv_files:
             return None
+        latest_csv = max([os.path.join(storage_location, f) for f in csv_files], key=os.path.getmtime)
         return latest_csv
     except Exception as e:
         logging.error(f"Error retrieving latest CSV: {e}")
         logging.error(f"Error generating response: {e}")
         return "Error generating response."
 # Define the Gradio interface
 def create_interface() -> gr.Blocks:
     """