Spaces:

acecalisto3
/

CEEMEESEEK

Runtime error

App Files Files Community

acecalisto3 commited on 23 days ago

Commit

2288f21

•

1 Parent(s): ec77d41

Update app.py

Browse files

Files changed (1) hide show

app.py +278 -147

app.py CHANGED Viewed

@@ -1,161 +1,292 @@
-import gradio as gr
-import pandas as pd
-import sqlite3
-from feedgen.feed import FeedGenerator
 import datetime
 import os
-import logging
-import sys
 import csv
-import traceback
-sys.path.append('/home/user')
-from .background_tasks import start_background_monitoring, create_database
-# Set up absolute paths
-BASE_DIR = '/home/user/app/scraped_data/culver'
-LOG_FILE = os.path.join(BASE_DIR, 'main.log')
-CSV_FILE = os.path.join(BASE_DIR, 'culvers_changes.csv')
-DB_FILE = os.path.join(BASE_DIR, 'culvers_changes.db')
-XML_FILE = os.path.join(BASE_DIR, 'culvers_changes.xml')
-# Ensure the directory exists
-try:
-    os.makedirs(BASE_DIR, exist_ok=True)
-    print(f"Directory created or already exists: {BASE_DIR}")
-except Exception as e:
-    print(f"Error creating directory: {e}")
-    traceback.print_exc()
-# Configure logging
-try:
-    logging.basicConfig(filename=LOG_FILE, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-    print(f"Logging configured. Log file: {LOG_FILE}")
-except Exception as e:
-    print(f"Error configuring logging: {e}")
-    traceback.print_exc()
-# Write directly to log file
-try:
-    with open(LOG_FILE, 'w') as log_file:
-        log_file.write(f"Log file created at {datetime.datetime.now()}\n")
-    print(f"Log file created: {LOG_FILE}")
-except Exception as e:
-    print(f"Error writing to log file: {e}")
-    traceback.print_exc()
-# Write directly to CSV file
-try:
-    with open(CSV_FILE, 'w', newline='') as csv_file:
-        writer = csv.writer(csv_file)
-        writer.writerow(['date', 'time', 'url', 'change'])
-        writer.writerow([datetime.datetime.now().strftime("%Y-%m-%d"), datetime.datetime.now().strftime("%H:%M:%S"), 'Initial', 'CSV file created'])
-    print(f"CSV file created: {CSV_FILE}")
-except Exception as e:
-    print(f"Error writing to CSV file: {e}")
-    traceback.print_exc()
-# Start background monitoring
-urls = ["https://www.culver.k12.in.us/", "https://www.facebook.com/CulverCommunitySchools"]
-try:
-    start_background_monitoring(CSV_FILE, urls, 1, "text")  # Changed interval to 1 minute for testing
-    print("Background monitoring started")
-except Exception as e:
-    print(f"Error starting background monitoring: {e}")
-    traceback.print_exc()
-logging.info("Background monitoring initiated from main.py")
-def view_scraped_data():
     try:
-        create_database()  # Ensure the database and table exist
-        conn = sqlite3.connect(DB_FILE)
-        df = pd.read_sql_query("SELECT * FROM changes ORDER BY date DESC, time DESC LIMIT 50", conn)
-        conn.close()
-        return df
     except Exception as e:
-        print(f"Error viewing scraped data: {e}")
-        traceback.print_exc()
-        return pd.DataFrame()
-def view_rss_feed():
     try:
-        with open(XML_FILE, 'r') as file:
-            return file.read()
-    except FileNotFoundError:
-        return "RSS feed not generated yet."
     except Exception as e:
-        print(f"Error viewing RSS feed: {e}")
-        traceback.print_exc()
-        return "Error viewing RSS feed"
 def generate_rss_feed():
-    try:
-        create_database()  # Ensure the database and table exist
-        fg = FeedGenerator()
-        fg.title('Culvers Site Changes')
-        fg.link(href='http://example.com', rel='alternate')
-        fg.description('Recent changes detected on Culvers websites')
-        conn = sqlite3.connect(DB_FILE)
-        c = conn.cursor()
-        c.execute("SELECT * FROM changes ORDER BY date DESC, time DESC LIMIT 20")
-        changes = c.fetchall()
-        for change in changes:
-            fe = fg.add_entry()
-            fe.id(str(change[0]))
-            fe.title(f'Change detected at {change[3]}')
-            fe.link(href=change[3])
-            fe.description(change[4])
-            fe.pubDate(datetime.datetime.strptime(f"{change[1]} {change[2]}", "%Y-%m-%d %H:%M:%S"))
-        conn.close()
-        fg.rss_file(XML_FILE)
-        return "RSS feed generated successfully."
-    except Exception as e:
-        print(f"Error generating RSS feed: {e}")
-        traceback.print_exc()
-        return "Error generating RSS feed"
-def create_viewer():
-    with gr.Blocks() as demo:
-        gr.Markdown("# Culvers Site Monitor and Viewer")
-        with gr.Tab("Monitor Status"):
-            gr.Markdown("Continuous monitoring is active for the following URLs:")
-            for url in urls:
-                gr.Markdown(f"- {url}")
-            gr.Markdown(f"Monitoring interval: 1 minute")
-            gr.Markdown(f"Data is being stored in: {CSV_FILE}")
-        with gr.Tab("View Scraped Data"):
-            gr.DataFrame(view_scraped_data, label="Recent Changes")
-        with gr.Tab("View RSS Feed"):
-            gr.TextArea(view_rss_feed, label="RSS Feed Content")
-            gr.Button("Generate RSS Feed").click(generate_rss_feed, outputs=gr.TextArea(label="Generation Status"))
-    return demo
-if __name__ == "__main__":
-    try:
-        # Create the database and table before launching the viewer
-        create_database()
-        print("Database created")
-        # Create and launch the viewer
-        viewer = create_viewer()
-        print("Viewer created")
-        viewer.launch()
-        print("Viewer launched")
-        logging.info("Web-based viewer created and launched with continuous monitoring.")
-    except Exception as e:
-        print(f"Error in main execution: {e}")
-        traceback.print_exc()
-print("Main application file updated with error handling, console logging, and all necessary functions.")

 import datetime
 import os
 import csv
+import time
+import hashlib
+import threading
+from pathlib import Path
+import gradio as gr
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from webdriver_manager.chrome import ChromeDriverManager
+from huggingface_hub import InferenceClient
+import mysql.connector
+import feedparser  # For parsing RSS feeds
+import sqlite3  # For simple local storage if needed
+# Configuration (replace with your actual values or environment variables)
+DB_HOST = os.environ.get("DB_HOST", "your_host")
+DB_USER = os.environ.get("DB_USER", "your_user")
+DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
+DB_NAME = os.environ.get("DB_NAME", "your_database")
+# Global variables
+monitoring_thread = None
+stop_event = threading.Event()
+db_connection = None
+current_task = None
+history = []
+# Function to establish a database connection
+def get_db_connection():
+    global db_connection
+    if db_connection is None or not db_connection.is_connected():
+        try:
+            db_connection = mysql.connector.connect(
+                host=DB_HOST,
+                user=DB_USER,
+                password=DB_PASSWORD,
+                database=DB_NAME
+            )
+            return db_connection
+        except Exception as e:
+            print(f"Error connecting to database: {e}")
+            return None
+    else:
+        return db_connection
+# Function to create the articles table if it doesn't exist
+def create_articles_table():
+    conn = get_db_connection()
+    if conn:
+        cursor = conn.cursor()
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS articles (
+                id INT AUTO_INCREMENT PRIMARY KEY,
+                url VARCHAR(255) NOT NULL,
+                title VARCHAR(255),
+                content TEXT,
+                hash VARCHAR(32),
+                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+        conn.commit()
+        cursor.close()
+# Initialize the articles table
+create_articles_table()
+# Function to monitor URLs for changes
+def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
+    global history
+    previous_hashes = {url: "" for url in target_urls}
+    options = Options()
+    options.headless = True
+    options.add_argument("--disable-gpu")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
     try:
+        while not stop_event.is_set():
+            for url in target_urls:
+                try:
+                    driver.get(url)
+                    time.sleep(2)
+                    current_content = driver.page_source
+                    current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
+                    if current_hash != previous_hashes[url]:
+                        previous_hashes[url] = current_hash
+                        timestamp = datetime.datetime.now()
+                        title_element = driver.find_element(By.TAG_NAME, "title")
+                        title = title_element.text if title_element else "No Title"
+                        history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
+                        if storage_location:
+                            save_to_storage(storage_location, url, title, current_content, timestamp)
+                        if feed_rss:
+                            save_to_database(url, title, current_content, current_hash)
+                except Exception as e:
+                    print(f"Error accessing {url}: {e}")
+            if not stop_event.is_set():
+                time.sleep(300)  # Wait for 5 minutes
     except Exception as e:
+        print(f"Unexpected error in monitoring thread: {e}")
+    finally:
+        driver.quit()
+        print("Monitoring thread has been stopped.")
+# Function to save data to local storage (CSV)
+def save_to_storage(storage_location, url, title, content, timestamp):
     try:
+        with open(storage_location, "a", newline='', encoding='utf-8') as csvfile:
+            csv_writer = csv.writer(csvfile)
+            csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
     except Exception as e:
+        print(f"Error saving to storage: {e}")
+# Function to save data to the database
+def save_to_database(url, title, content, hash):
+    conn = get_db_connection()
+    if conn:
+        cursor = conn.cursor()
+        try:
+            sql = "INSERT INTO articles (url, title, content, hash) VALUES (%s, %s, %s, %s)"
+            val = (url, title, content, hash)
+            cursor.execute(sql, val)
+            conn.commit()
+        except Exception as e:
+            print(f"Error saving to database: {e}")
+        finally:
+            cursor.close()
+# Function to generate RSS feed from the database
 def generate_rss_feed():
+    conn = get_db_connection()
+    if conn:
+        cursor = conn.cursor()
+        try:
+            cursor.execute("SELECT * FROM articles ORDER BY timestamp DESC")
+            articles = cursor.fetchall()
+            feed = feedparser.FeedParserDict()
+            feed['title'] = 'Website Changes Feed'
+            feed['link'] = 'http://yourwebsite.com/feed'  # Replace with your actual feed URL
+            feed['description'] = 'Feed of changes detected on monitored websites.'
+            feed['entries'] = []
+            for article in articles:
+                entry = feedparser.FeedParserDict()
+                entry['title'] = article[2]  # Title
+                entry['link'] = article[1]   # URL
+                entry['description'] = article[3]  # Content
+                entry['published'] = article[5]  # Timestamp
+                feed['entries'].append(entry)
+            return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
+        except Exception as e:
+            print(f"Error generating RSS feed: {e}")
+        finally:
+            cursor.close()
+    return None
+# Function to start monitoring
+def start_monitoring(target_urls, storage_location, feed_rss):
+    global monitoring_thread, stop_event, current_task, history
+    if monitoring_thread and monitoring_thread.is_alive():
+        return "Monitoring is already running.", history
+    stop_event.clear()
+    current_task = f"Monitoring URLs: {', '.join(target_urls)}"
+    history.append(f"Task started: {current_task}")
+    monitoring_thread = threading.Thread(
+        target=monitor_urls,
+        args=(target_urls, storage_location, feed_rss, stop_event),
+        daemon=True
+    )
+    monitoring_thread.start()
+    return "Monitoring started.", history
+# Function to stop monitoring
+def stop_monitoring():
+    global monitoring_thread, stop_event, current_task, history
+    if monitoring_thread and monitoring_thread.is_alive():
+        stop_event.set()
+        monitoring_thread.join()
+        current_task = None
+        history.append("Monitoring stopped by user.")
+        return "Monitoring stopped.", history
+    else:
+        return "No monitoring task is currently running.", history
+# Function to handle chatbot responses
+def chatbot_response(message, history):
+    # Replace this with your actual chatbot logic using 'mistralai/Mixtral-8x7B-Instruct-v0.1'
+    # You'll need to load and use the model from Hugging Face's InferenceClient
+    client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+    response = client.inference(message)
+    history.append((message, response))
+    return history, history
+# --- Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Website Monitor and Chatbot")
+    # Configuration Tab
+    with gr.Tab("Configuration"):
+        with gr.Row():
+            target_urls = gr.Textbox(
+                label="Target URLs (comma-separated)",
+                placeholder="https://example.com, https://another-site.com"
+            )
+        with gr.Row():
+            storage_location = gr.Textbox(
+                label="Storage Location (CSV file path)",
+                placeholder="/path/to/your/file.csv",
+                visible=False
+            )
+        with gr.Row():
+            feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
+        with gr.Row():
+            start_button = gr.Button("Start Monitoring")
+            stop_button = gr.Button("Stop Monitoring")
+        with gr.Row():
+            status_text = gr.Textbox(label="Status", interactive=False)
+        with gr.Row():
+            history_text = gr.Textbox(
+                label="History", lines=10, interactive=False
+            )
+    # User-End View Tab
+    with gr.Tab("User-End View"):
+        with gr.Row():
+            feed_content = gr.JSON(label="RSS Feed Content")
+    # Chatbot Tab
+    with gr.Tab("Chatbot"):
+        chatbot_interface = gr.Chatbot()
+        with gr.Row():
+            message_input = gr.Textbox(placeholder="Type your message here...")
+            send_button = gr.Button("Send")
+    # --- Event Handlers ---
+    # Start monitoring button click
+    def on_start_click(target_urls_str, storage_loc, feed_enabled):
+        global history
+        try:
+            target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
+            if not all(target_urls):
+                return "Please enter valid URLs.", history
+            status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
+            return status, history
+        except Exception as e:
+            return f"Error starting monitoring: {e}", history
+    start_button.click(
+        on_start_click,
+        inputs=[target_urls, storage_location, feed_rss_checkbox],
+        outputs=[status_text, history_text]
+    )
+    # Stop monitoring button click
+    stop_button.click(
+        stop_monitoring,
+        outputs=[status_text, history_text]
+    )
+    # Send message to chatbot button click
+    send_button.click(
+        chatbot_response,
+        inputs=[message_input, chatbot_interface],
+        outputs=[chatbot_interface, chatbot_interface]
+    )
+    # Update RSS feed content periodically
+    def update_feed_content():
+        return generate_rss_feed()
+    demo.load(update_feed_content, outputs=feed_content, every=5)  # Update every 5 seconds
+if __name__ == "__main__":
+    demo.launch()