acecalisto3 commited on
Commit
2288f21
1 Parent(s): ec77d41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +278 -147
app.py CHANGED
@@ -1,161 +1,292 @@
1
-
2
- import gradio as gr
3
- import pandas as pd
4
- import sqlite3
5
- from feedgen.feed import FeedGenerator
6
  import datetime
7
  import os
8
- import logging
9
- import sys
10
  import csv
11
- import traceback
12
-
13
- sys.path.append('/home/user')
14
-
15
- from .background_tasks import start_background_monitoring, create_database
16
-
17
- # Set up absolute paths
18
- BASE_DIR = '/home/user/app/scraped_data/culver'
19
- LOG_FILE = os.path.join(BASE_DIR, 'main.log')
20
- CSV_FILE = os.path.join(BASE_DIR, 'culvers_changes.csv')
21
- DB_FILE = os.path.join(BASE_DIR, 'culvers_changes.db')
22
- XML_FILE = os.path.join(BASE_DIR, 'culvers_changes.xml')
23
-
24
- # Ensure the directory exists
25
- try:
26
- os.makedirs(BASE_DIR, exist_ok=True)
27
- print(f"Directory created or already exists: {BASE_DIR}")
28
- except Exception as e:
29
- print(f"Error creating directory: {e}")
30
- traceback.print_exc()
31
-
32
- # Configure logging
33
- try:
34
- logging.basicConfig(filename=LOG_FILE, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
35
- print(f"Logging configured. Log file: {LOG_FILE}")
36
- except Exception as e:
37
- print(f"Error configuring logging: {e}")
38
- traceback.print_exc()
39
-
40
- # Write directly to log file
41
- try:
42
- with open(LOG_FILE, 'w') as log_file:
43
- log_file.write(f"Log file created at {datetime.datetime.now()}\n")
44
- print(f"Log file created: {LOG_FILE}")
45
- except Exception as e:
46
- print(f"Error writing to log file: {e}")
47
- traceback.print_exc()
48
-
49
- # Write directly to CSV file
50
- try:
51
- with open(CSV_FILE, 'w', newline='') as csv_file:
52
- writer = csv.writer(csv_file)
53
- writer.writerow(['date', 'time', 'url', 'change'])
54
- writer.writerow([datetime.datetime.now().strftime("%Y-%m-%d"), datetime.datetime.now().strftime("%H:%M:%S"), 'Initial', 'CSV file created'])
55
- print(f"CSV file created: {CSV_FILE}")
56
- except Exception as e:
57
- print(f"Error writing to CSV file: {e}")
58
- traceback.print_exc()
59
-
60
- # Start background monitoring
61
- urls = ["https://www.culver.k12.in.us/", "https://www.facebook.com/CulverCommunitySchools"]
62
- try:
63
- start_background_monitoring(CSV_FILE, urls, 1, "text") # Changed interval to 1 minute for testing
64
- print("Background monitoring started")
65
- except Exception as e:
66
- print(f"Error starting background monitoring: {e}")
67
- traceback.print_exc()
68
-
69
- logging.info("Background monitoring initiated from main.py")
70
-
71
- def view_scraped_data():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  try:
73
- create_database() # Ensure the database and table exist
74
- conn = sqlite3.connect(DB_FILE)
75
- df = pd.read_sql_query("SELECT * FROM changes ORDER BY date DESC, time DESC LIMIT 50", conn)
76
- conn.close()
77
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  except Exception as e:
79
- print(f"Error viewing scraped data: {e}")
80
- traceback.print_exc()
81
- return pd.DataFrame()
 
82
 
83
- def view_rss_feed():
 
84
  try:
85
- with open(XML_FILE, 'r') as file:
86
- return file.read()
87
- except FileNotFoundError:
88
- return "RSS feed not generated yet."
89
  except Exception as e:
90
- print(f"Error viewing RSS feed: {e}")
91
- traceback.print_exc()
92
- return "Error viewing RSS feed"
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def generate_rss_feed():
95
- try:
96
- create_database() # Ensure the database and table exist
97
- fg = FeedGenerator()
98
- fg.title('Culvers Site Changes')
99
- fg.link(href='http://example.com', rel='alternate')
100
- fg.description('Recent changes detected on Culvers websites')
101
-
102
- conn = sqlite3.connect(DB_FILE)
103
- c = conn.cursor()
104
- c.execute("SELECT * FROM changes ORDER BY date DESC, time DESC LIMIT 20")
105
- changes = c.fetchall()
106
-
107
- for change in changes:
108
- fe = fg.add_entry()
109
- fe.id(str(change[0]))
110
- fe.title(f'Change detected at {change[3]}')
111
- fe.link(href=change[3])
112
- fe.description(change[4])
113
- fe.pubDate(datetime.datetime.strptime(f"{change[1]} {change[2]}", "%Y-%m-%d %H:%M:%S"))
114
-
115
- conn.close()
116
-
117
- fg.rss_file(XML_FILE)
118
- return "RSS feed generated successfully."
119
- except Exception as e:
120
- print(f"Error generating RSS feed: {e}")
121
- traceback.print_exc()
122
- return "Error generating RSS feed"
123
-
124
- def create_viewer():
125
- with gr.Blocks() as demo:
126
- gr.Markdown("# Culvers Site Monitor and Viewer")
127
-
128
- with gr.Tab("Monitor Status"):
129
- gr.Markdown("Continuous monitoring is active for the following URLs:")
130
- for url in urls:
131
- gr.Markdown(f"- {url}")
132
- gr.Markdown(f"Monitoring interval: 1 minute")
133
- gr.Markdown(f"Data is being stored in: {CSV_FILE}")
134
-
135
- with gr.Tab("View Scraped Data"):
136
- gr.DataFrame(view_scraped_data, label="Recent Changes")
137
-
138
- with gr.Tab("View RSS Feed"):
139
- gr.TextArea(view_rss_feed, label="RSS Feed Content")
140
- gr.Button("Generate RSS Feed").click(generate_rss_feed, outputs=gr.TextArea(label="Generation Status"))
141
-
142
- return demo
143
 
144
- if __name__ == "__main__":
145
- try:
146
- # Create the database and table before launching the viewer
147
- create_database()
148
- print("Database created")
149
 
150
- # Create and launch the viewer
151
- viewer = create_viewer()
152
- print("Viewer created")
153
- viewer.launch()
154
- print("Viewer launched")
 
 
155
 
156
- logging.info("Web-based viewer created and launched with continuous monitoring.")
157
- except Exception as e:
158
- print(f"Error in main execution: {e}")
159
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- print("Main application file updated with error handling, console logging, and all necessary functions.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import datetime
2
  import os
 
 
3
  import csv
4
+ import time
5
+ import hashlib
6
+ import threading
7
+ from pathlib import Path
8
+
9
+ import gradio as gr
10
+ from selenium import webdriver
11
+ from selenium.webdriver.chrome.service import Service
12
+ from selenium.webdriver.chrome.options import Options
13
+ from selenium.webdriver.common.by import By
14
+ from webdriver_manager.chrome import ChromeDriverManager
15
+ from huggingface_hub import InferenceClient
16
+ import mysql.connector
17
+ import feedparser # For parsing RSS feeds
18
+ import sqlite3 # For simple local storage if needed
19
+
20
+ # Configuration (replace with your actual values or environment variables)
21
+ DB_HOST = os.environ.get("DB_HOST", "your_host")
22
+ DB_USER = os.environ.get("DB_USER", "your_user")
23
+ DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
24
+ DB_NAME = os.environ.get("DB_NAME", "your_database")
25
+
26
+ # Global variables
27
+ monitoring_thread = None
28
+ stop_event = threading.Event()
29
+ db_connection = None
30
+ current_task = None
31
+ history = []
32
+
33
+ # Function to establish a database connection
34
+ def get_db_connection():
35
+ global db_connection
36
+ if db_connection is None or not db_connection.is_connected():
37
+ try:
38
+ db_connection = mysql.connector.connect(
39
+ host=DB_HOST,
40
+ user=DB_USER,
41
+ password=DB_PASSWORD,
42
+ database=DB_NAME
43
+ )
44
+ return db_connection
45
+ except Exception as e:
46
+ print(f"Error connecting to database: {e}")
47
+ return None
48
+ else:
49
+ return db_connection
50
+
51
+ # Function to create the articles table if it doesn't exist
52
+ def create_articles_table():
53
+ conn = get_db_connection()
54
+ if conn:
55
+ cursor = conn.cursor()
56
+ cursor.execute("""
57
+ CREATE TABLE IF NOT EXISTS articles (
58
+ id INT AUTO_INCREMENT PRIMARY KEY,
59
+ url VARCHAR(255) NOT NULL,
60
+ title VARCHAR(255),
61
+ content TEXT,
62
+ hash VARCHAR(32),
63
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
64
+ )
65
+ """)
66
+ conn.commit()
67
+ cursor.close()
68
+
69
+ # Initialize the articles table
70
+ create_articles_table()
71
+
72
+ # Function to monitor URLs for changes
73
+ def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
74
+ global history
75
+ previous_hashes = {url: "" for url in target_urls}
76
+ options = Options()
77
+ options.headless = True
78
+ options.add_argument("--disable-gpu")
79
+ options.add_argument("--no-sandbox")
80
+ options.add_argument("--disable-dev-shm-usage")
81
+
82
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
83
+
84
  try:
85
+ while not stop_event.is_set():
86
+ for url in target_urls:
87
+ try:
88
+ driver.get(url)
89
+ time.sleep(2)
90
+ current_content = driver.page_source
91
+ current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
92
+
93
+ if current_hash != previous_hashes[url]:
94
+ previous_hashes[url] = current_hash
95
+ timestamp = datetime.datetime.now()
96
+ title_element = driver.find_element(By.TAG_NAME, "title")
97
+ title = title_element.text if title_element else "No Title"
98
+
99
+ history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
100
+
101
+ if storage_location:
102
+ save_to_storage(storage_location, url, title, current_content, timestamp)
103
+
104
+ if feed_rss:
105
+ save_to_database(url, title, current_content, current_hash)
106
+
107
+ except Exception as e:
108
+ print(f"Error accessing {url}: {e}")
109
+
110
+ if not stop_event.is_set():
111
+ time.sleep(300) # Wait for 5 minutes
112
+
113
  except Exception as e:
114
+ print(f"Unexpected error in monitoring thread: {e}")
115
+ finally:
116
+ driver.quit()
117
+ print("Monitoring thread has been stopped.")
118
 
119
+ # Function to save data to local storage (CSV)
120
+ def save_to_storage(storage_location, url, title, content, timestamp):
121
  try:
122
+ with open(storage_location, "a", newline='', encoding='utf-8') as csvfile:
123
+ csv_writer = csv.writer(csvfile)
124
+ csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
 
125
  except Exception as e:
126
+ print(f"Error saving to storage: {e}")
 
 
127
 
128
+ # Function to save data to the database
129
+ def save_to_database(url, title, content, hash):
130
+ conn = get_db_connection()
131
+ if conn:
132
+ cursor = conn.cursor()
133
+ try:
134
+ sql = "INSERT INTO articles (url, title, content, hash) VALUES (%s, %s, %s, %s)"
135
+ val = (url, title, content, hash)
136
+ cursor.execute(sql, val)
137
+ conn.commit()
138
+ except Exception as e:
139
+ print(f"Error saving to database: {e}")
140
+ finally:
141
+ cursor.close()
142
+
143
+ # Function to generate RSS feed from the database
144
  def generate_rss_feed():
145
+ conn = get_db_connection()
146
+ if conn:
147
+ cursor = conn.cursor()
148
+ try:
149
+ cursor.execute("SELECT * FROM articles ORDER BY timestamp DESC")
150
+ articles = cursor.fetchall()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ feed = feedparser.FeedParserDict()
153
+ feed['title'] = 'Website Changes Feed'
154
+ feed['link'] = 'http://yourwebsite.com/feed' # Replace with your actual feed URL
155
+ feed['description'] = 'Feed of changes detected on monitored websites.'
156
+ feed['entries'] = []
157
 
158
+ for article in articles:
159
+ entry = feedparser.FeedParserDict()
160
+ entry['title'] = article[2] # Title
161
+ entry['link'] = article[1] # URL
162
+ entry['description'] = article[3] # Content
163
+ entry['published'] = article[5] # Timestamp
164
+ feed['entries'].append(entry)
165
 
166
+ return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
167
+ except Exception as e:
168
+ print(f"Error generating RSS feed: {e}")
169
+ finally:
170
+ cursor.close()
171
+ return None
172
+
173
+ # Function to start monitoring
174
+ def start_monitoring(target_urls, storage_location, feed_rss):
175
+ global monitoring_thread, stop_event, current_task, history
176
+ if monitoring_thread and monitoring_thread.is_alive():
177
+ return "Monitoring is already running.", history
178
+
179
+ stop_event.clear()
180
+ current_task = f"Monitoring URLs: {', '.join(target_urls)}"
181
+ history.append(f"Task started: {current_task}")
182
+ monitoring_thread = threading.Thread(
183
+ target=monitor_urls,
184
+ args=(target_urls, storage_location, feed_rss, stop_event),
185
+ daemon=True
186
+ )
187
+ monitoring_thread.start()
188
+ return "Monitoring started.", history
189
+
190
+ # Function to stop monitoring
191
+ def stop_monitoring():
192
+ global monitoring_thread, stop_event, current_task, history
193
+ if monitoring_thread and monitoring_thread.is_alive():
194
+ stop_event.set()
195
+ monitoring_thread.join()
196
+ current_task = None
197
+ history.append("Monitoring stopped by user.")
198
+ return "Monitoring stopped.", history
199
+ else:
200
+ return "No monitoring task is currently running.", history
201
+
202
+ # Function to handle chatbot responses
203
+ def chatbot_response(message, history):
204
+ # Replace this with your actual chatbot logic using 'mistralai/Mixtral-8x7B-Instruct-v0.1'
205
+ # You'll need to load and use the model from Hugging Face's InferenceClient
206
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
207
+ response = client.inference(message)
208
+ history.append((message, response))
209
+ return history, history
210
+
211
+ # --- Gradio Interface ---
212
+ with gr.Blocks() as demo:
213
+ gr.Markdown("# Website Monitor and Chatbot")
214
+
215
+ # Configuration Tab
216
+ with gr.Tab("Configuration"):
217
+ with gr.Row():
218
+ target_urls = gr.Textbox(
219
+ label="Target URLs (comma-separated)",
220
+ placeholder="https://example.com, https://another-site.com"
221
+ )
222
+ with gr.Row():
223
+ storage_location = gr.Textbox(
224
+ label="Storage Location (CSV file path)",
225
+ placeholder="/path/to/your/file.csv",
226
+ visible=False
227
+ )
228
+ with gr.Row():
229
+ feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
230
+ with gr.Row():
231
+ start_button = gr.Button("Start Monitoring")
232
+ stop_button = gr.Button("Stop Monitoring")
233
+ with gr.Row():
234
+ status_text = gr.Textbox(label="Status", interactive=False)
235
+ with gr.Row():
236
+ history_text = gr.Textbox(
237
+ label="History", lines=10, interactive=False
238
+ )
239
 
240
+ # User-End View Tab
241
+ with gr.Tab("User-End View"):
242
+ with gr.Row():
243
+ feed_content = gr.JSON(label="RSS Feed Content")
244
+
245
+ # Chatbot Tab
246
+ with gr.Tab("Chatbot"):
247
+ chatbot_interface = gr.Chatbot()
248
+ with gr.Row():
249
+ message_input = gr.Textbox(placeholder="Type your message here...")
250
+ send_button = gr.Button("Send")
251
+
252
+ # --- Event Handlers ---
253
+
254
+ # Start monitoring button click
255
+ def on_start_click(target_urls_str, storage_loc, feed_enabled):
256
+ global history
257
+ try:
258
+ target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
259
+ if not all(target_urls):
260
+ return "Please enter valid URLs.", history
261
+ status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
262
+ return status, history
263
+ except Exception as e:
264
+ return f"Error starting monitoring: {e}", history
265
+
266
+ start_button.click(
267
+ on_start_click,
268
+ inputs=[target_urls, storage_location, feed_rss_checkbox],
269
+ outputs=[status_text, history_text]
270
+ )
271
+
272
+ # Stop monitoring button click
273
+ stop_button.click(
274
+ stop_monitoring,
275
+ outputs=[status_text, history_text]
276
+ )
277
+
278
+ # Send message to chatbot button click
279
+ send_button.click(
280
+ chatbot_response,
281
+ inputs=[message_input, chatbot_interface],
282
+ outputs=[chatbot_interface, chatbot_interface]
283
+ )
284
+
285
+ # Update RSS feed content periodically
286
+ def update_feed_content():
287
+ return generate_rss_feed()
288
+
289
+ demo.load(update_feed_content, outputs=feed_content, every=5) # Update every 5 seconds
290
+
291
+ if __name__ == "__main__":
292
+ demo.launch()