Spaces:
Runtime error
Runtime error
acecalisto3
commited on
Commit
•
d7e93db
1
Parent(s):
2288f21
Update app.py
Browse files
app.py
CHANGED
@@ -5,23 +5,29 @@ import time
|
|
5 |
import hashlib
|
6 |
import threading
|
7 |
from pathlib import Path
|
|
|
8 |
|
9 |
import gradio as gr
|
10 |
from selenium import webdriver
|
11 |
from selenium.webdriver.chrome.service import Service
|
12 |
from selenium.webdriver.chrome.options import Options
|
13 |
from selenium.webdriver.common.by import By
|
|
|
14 |
from webdriver_manager.chrome import ChromeDriverManager
|
15 |
from huggingface_hub import InferenceClient
|
16 |
import mysql.connector
|
17 |
import feedparser # For parsing RSS feeds
|
18 |
import sqlite3 # For simple local storage if needed
|
19 |
|
|
|
|
|
|
|
20 |
# Configuration (replace with your actual values or environment variables)
|
21 |
DB_HOST = os.environ.get("DB_HOST", "your_host")
|
22 |
DB_USER = os.environ.get("DB_USER", "your_user")
|
23 |
DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
|
24 |
DB_NAME = os.environ.get("DB_NAME", "your_database")
|
|
|
25 |
|
26 |
# Global variables
|
27 |
monitoring_thread = None
|
@@ -29,6 +35,7 @@ stop_event = threading.Event()
|
|
29 |
db_connection = None
|
30 |
current_task = None
|
31 |
history = []
|
|
|
32 |
|
33 |
# Function to establish a database connection
|
34 |
def get_db_connection():
|
@@ -43,7 +50,7 @@ def get_db_connection():
|
|
43 |
)
|
44 |
return db_connection
|
45 |
except Exception as e:
|
46 |
-
|
47 |
return None
|
48 |
else:
|
49 |
return db_connection
|
@@ -71,7 +78,7 @@ create_articles_table()
|
|
71 |
|
72 |
# Function to monitor URLs for changes
|
73 |
def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
|
74 |
-
global history
|
75 |
previous_hashes = {url: "" for url in target_urls}
|
76 |
options = Options()
|
77 |
options.headless = True
|
@@ -85,16 +92,23 @@ def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
|
|
85 |
while not stop_event.is_set():
|
86 |
for url in target_urls:
|
87 |
try:
|
|
|
|
|
|
|
88 |
driver.get(url)
|
89 |
-
time.sleep(2)
|
90 |
current_content = driver.page_source
|
91 |
current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
|
92 |
|
93 |
if current_hash != previous_hashes[url]:
|
94 |
previous_hashes[url] = current_hash
|
95 |
timestamp = datetime.datetime.now()
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
98 |
|
99 |
history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
|
100 |
|
@@ -104,17 +118,27 @@ def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
|
|
104 |
if feed_rss:
|
105 |
save_to_database(url, title, current_content, current_hash)
|
106 |
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
if not stop_event.is_set():
|
111 |
-
time.sleep(
|
112 |
|
113 |
except Exception as e:
|
114 |
-
|
115 |
finally:
|
116 |
driver.quit()
|
117 |
-
|
118 |
|
119 |
# Function to save data to local storage (CSV)
|
120 |
def save_to_storage(storage_location, url, title, content, timestamp):
|
@@ -123,7 +147,7 @@ def save_to_storage(storage_location, url, title, content, timestamp):
|
|
123 |
csv_writer = csv.writer(csvfile)
|
124 |
csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
|
125 |
except Exception as e:
|
126 |
-
|
127 |
|
128 |
# Function to save data to the database
|
129 |
def save_to_database(url, title, content, hash):
|
@@ -136,7 +160,7 @@ def save_to_database(url, title, content, hash):
|
|
136 |
cursor.execute(sql, val)
|
137 |
conn.commit()
|
138 |
except Exception as e:
|
139 |
-
|
140 |
finally:
|
141 |
cursor.close()
|
142 |
|
@@ -165,7 +189,7 @@ def generate_rss_feed():
|
|
165 |
|
166 |
return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
|
167 |
except Exception as e:
|
168 |
-
|
169 |
finally:
|
170 |
cursor.close()
|
171 |
return None
|
@@ -201,12 +225,15 @@ def stop_monitoring():
|
|
201 |
|
202 |
# Function to handle chatbot responses
|
203 |
def chatbot_response(message, history):
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
|
|
210 |
|
211 |
# --- Gradio Interface ---
|
212 |
with gr.Blocks() as demo:
|
@@ -223,7 +250,7 @@ with gr.Blocks() as demo:
|
|
223 |
storage_location = gr.Textbox(
|
224 |
label="Storage Location (CSV file path)",
|
225 |
placeholder="/path/to/your/file.csv",
|
226 |
-
visible=False
|
227 |
)
|
228 |
with gr.Row():
|
229 |
feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
|
@@ -253,11 +280,15 @@ with gr.Blocks() as demo:
|
|
253 |
|
254 |
# Start monitoring button click
|
255 |
def on_start_click(target_urls_str, storage_loc, feed_enabled):
|
256 |
-
global history
|
257 |
try:
|
258 |
target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
|
259 |
if not all(target_urls):
|
260 |
return "Please enter valid URLs.", history
|
|
|
|
|
|
|
|
|
261 |
status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
|
262 |
return status, history
|
263 |
except Exception as e:
|
|
|
5 |
import hashlib
|
6 |
import threading
|
7 |
from pathlib import Path
|
8 |
+
import logging
|
9 |
|
10 |
import gradio as gr
|
11 |
from selenium import webdriver
|
12 |
from selenium.webdriver.chrome.service import Service
|
13 |
from selenium.webdriver.chrome.options import Options
|
14 |
from selenium.webdriver.common.by import By
|
15 |
+
from selenium.common.exceptions import WebDriverException, NoSuchElementException
|
16 |
from webdriver_manager.chrome import ChromeDriverManager
|
17 |
from huggingface_hub import InferenceClient
|
18 |
import mysql.connector
|
19 |
import feedparser # For parsing RSS feeds
|
20 |
import sqlite3 # For simple local storage if needed
|
21 |
|
22 |
+
# Configure logging
|
23 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
24 |
+
|
25 |
# Configuration (replace with your actual values or environment variables)
|
26 |
DB_HOST = os.environ.get("DB_HOST", "your_host")
|
27 |
DB_USER = os.environ.get("DB_USER", "your_user")
|
28 |
DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
|
29 |
DB_NAME = os.environ.get("DB_NAME", "your_database")
|
30 |
+
HUGGINGFACE_API_KEY = os.environ.get("HUGGINGFACE_API_KEY") # Add API key
|
31 |
|
32 |
# Global variables
|
33 |
monitoring_thread = None
|
|
|
35 |
db_connection = None
|
36 |
current_task = None
|
37 |
history = []
|
38 |
+
url_monitoring_intervals = {} # Store monitoring intervals for each URL
|
39 |
|
40 |
# Function to establish a database connection
|
41 |
def get_db_connection():
|
|
|
50 |
)
|
51 |
return db_connection
|
52 |
except Exception as e:
|
53 |
+
logging.error(f"Error connecting to database: {e}")
|
54 |
return None
|
55 |
else:
|
56 |
return db_connection
|
|
|
78 |
|
79 |
# Function to monitor URLs for changes
|
80 |
def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
|
81 |
+
global history, url_monitoring_intervals
|
82 |
previous_hashes = {url: "" for url in target_urls}
|
83 |
options = Options()
|
84 |
options.headless = True
|
|
|
92 |
while not stop_event.is_set():
|
93 |
for url in target_urls:
|
94 |
try:
|
95 |
+
# Dynamic monitoring interval
|
96 |
+
interval = url_monitoring_intervals.get(url, 300) # Default 5 minutes
|
97 |
+
|
98 |
driver.get(url)
|
99 |
+
time.sleep(2) # Allow page to load
|
100 |
current_content = driver.page_source
|
101 |
current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
|
102 |
|
103 |
if current_hash != previous_hashes[url]:
|
104 |
previous_hashes[url] = current_hash
|
105 |
timestamp = datetime.datetime.now()
|
106 |
+
|
107 |
+
try:
|
108 |
+
title_element = driver.find_element(By.TAG_NAME, "title")
|
109 |
+
title = title_element.text
|
110 |
+
except NoSuchElementException:
|
111 |
+
title = "No Title"
|
112 |
|
113 |
history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
|
114 |
|
|
|
118 |
if feed_rss:
|
119 |
save_to_database(url, title, current_content, current_hash)
|
120 |
|
121 |
+
# Adjust monitoring interval based on change frequency (example)
|
122 |
+
url_monitoring_intervals[url] = 60 # Check more frequently after a change
|
123 |
+
|
124 |
+
else:
|
125 |
+
# Increase interval if no changes detected (example)
|
126 |
+
url_monitoring_intervals[url] = min(url_monitoring_intervals[url] + 60, 600) # Max 10 min
|
127 |
+
|
128 |
+
except WebDriverException as e:
|
129 |
+
logging.error(f"Error accessing {url}: {e}")
|
130 |
+
|
131 |
+
if stop_event.is_set():
|
132 |
+
break # Exit inner loop if stop event is set
|
133 |
|
134 |
if not stop_event.is_set():
|
135 |
+
time.sleep(interval)
|
136 |
|
137 |
except Exception as e:
|
138 |
+
logging.error(f"Unexpected error in monitoring thread: {e}")
|
139 |
finally:
|
140 |
driver.quit()
|
141 |
+
logging.info("Monitoring thread has been stopped.")
|
142 |
|
143 |
# Function to save data to local storage (CSV)
|
144 |
def save_to_storage(storage_location, url, title, content, timestamp):
|
|
|
147 |
csv_writer = csv.writer(csvfile)
|
148 |
csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
|
149 |
except Exception as e:
|
150 |
+
logging.error(f"Error saving to storage: {e}")
|
151 |
|
152 |
# Function to save data to the database
|
153 |
def save_to_database(url, title, content, hash):
|
|
|
160 |
cursor.execute(sql, val)
|
161 |
conn.commit()
|
162 |
except Exception as e:
|
163 |
+
logging.error(f"Error saving to database: {e}")
|
164 |
finally:
|
165 |
cursor.close()
|
166 |
|
|
|
189 |
|
190 |
return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
|
191 |
except Exception as e:
|
192 |
+
logging.error(f"Error generating RSS feed: {e}")
|
193 |
finally:
|
194 |
cursor.close()
|
195 |
return None
|
|
|
225 |
|
226 |
# Function to handle chatbot responses
|
227 |
def chatbot_response(message, history):
|
228 |
+
try:
|
229 |
+
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HUGGINGFACE_API_KEY)
|
230 |
+
response = client.inference(message)
|
231 |
+
history.append((message, response))
|
232 |
+
return history, history
|
233 |
+
except Exception as e:
|
234 |
+
logging.error(f"Error getting chatbot response: {e}")
|
235 |
+
history.append((message, "Error: Could not get a response from the chatbot."))
|
236 |
+
return history, history
|
237 |
|
238 |
# --- Gradio Interface ---
|
239 |
with gr.Blocks() as demo:
|
|
|
250 |
storage_location = gr.Textbox(
|
251 |
label="Storage Location (CSV file path)",
|
252 |
placeholder="/path/to/your/file.csv",
|
253 |
+
visible=False # You can enable this if you want CSV storage
|
254 |
)
|
255 |
with gr.Row():
|
256 |
feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
|
|
|
280 |
|
281 |
# Start monitoring button click
|
282 |
def on_start_click(target_urls_str, storage_loc, feed_enabled):
|
283 |
+
global history, url_monitoring_intervals
|
284 |
try:
|
285 |
target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
|
286 |
if not all(target_urls):
|
287 |
return "Please enter valid URLs.", history
|
288 |
+
|
289 |
+
# Reset monitoring intervals when starting
|
290 |
+
url_monitoring_intervals = {url: 300 for url in target_urls}
|
291 |
+
|
292 |
status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
|
293 |
return status, history
|
294 |
except Exception as e:
|