acecalisto3 commited on
Commit
d7e93db
1 Parent(s): 2288f21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -21
app.py CHANGED
@@ -5,23 +5,29 @@ import time
5
  import hashlib
6
  import threading
7
  from pathlib import Path
 
8
 
9
  import gradio as gr
10
  from selenium import webdriver
11
  from selenium.webdriver.chrome.service import Service
12
  from selenium.webdriver.chrome.options import Options
13
  from selenium.webdriver.common.by import By
 
14
  from webdriver_manager.chrome import ChromeDriverManager
15
  from huggingface_hub import InferenceClient
16
  import mysql.connector
17
  import feedparser # For parsing RSS feeds
18
  import sqlite3 # For simple local storage if needed
19
 
 
 
 
20
  # Configuration (replace with your actual values or environment variables)
21
  DB_HOST = os.environ.get("DB_HOST", "your_host")
22
  DB_USER = os.environ.get("DB_USER", "your_user")
23
  DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
24
  DB_NAME = os.environ.get("DB_NAME", "your_database")
 
25
 
26
  # Global variables
27
  monitoring_thread = None
@@ -29,6 +35,7 @@ stop_event = threading.Event()
29
  db_connection = None
30
  current_task = None
31
  history = []
 
32
 
33
  # Function to establish a database connection
34
  def get_db_connection():
@@ -43,7 +50,7 @@ def get_db_connection():
43
  )
44
  return db_connection
45
  except Exception as e:
46
- print(f"Error connecting to database: {e}")
47
  return None
48
  else:
49
  return db_connection
@@ -71,7 +78,7 @@ create_articles_table()
71
 
72
  # Function to monitor URLs for changes
73
  def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
74
- global history
75
  previous_hashes = {url: "" for url in target_urls}
76
  options = Options()
77
  options.headless = True
@@ -85,16 +92,23 @@ def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
85
  while not stop_event.is_set():
86
  for url in target_urls:
87
  try:
 
 
 
88
  driver.get(url)
89
- time.sleep(2)
90
  current_content = driver.page_source
91
  current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
92
 
93
  if current_hash != previous_hashes[url]:
94
  previous_hashes[url] = current_hash
95
  timestamp = datetime.datetime.now()
96
- title_element = driver.find_element(By.TAG_NAME, "title")
97
- title = title_element.text if title_element else "No Title"
 
 
 
 
98
 
99
  history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
100
 
@@ -104,17 +118,27 @@ def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
104
  if feed_rss:
105
  save_to_database(url, title, current_content, current_hash)
106
 
107
- except Exception as e:
108
- print(f"Error accessing {url}: {e}")
 
 
 
 
 
 
 
 
 
 
109
 
110
  if not stop_event.is_set():
111
- time.sleep(300) # Wait for 5 minutes
112
 
113
  except Exception as e:
114
- print(f"Unexpected error in monitoring thread: {e}")
115
  finally:
116
  driver.quit()
117
- print("Monitoring thread has been stopped.")
118
 
119
  # Function to save data to local storage (CSV)
120
  def save_to_storage(storage_location, url, title, content, timestamp):
@@ -123,7 +147,7 @@ def save_to_storage(storage_location, url, title, content, timestamp):
123
  csv_writer = csv.writer(csvfile)
124
  csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
125
  except Exception as e:
126
- print(f"Error saving to storage: {e}")
127
 
128
  # Function to save data to the database
129
  def save_to_database(url, title, content, hash):
@@ -136,7 +160,7 @@ def save_to_database(url, title, content, hash):
136
  cursor.execute(sql, val)
137
  conn.commit()
138
  except Exception as e:
139
- print(f"Error saving to database: {e}")
140
  finally:
141
  cursor.close()
142
 
@@ -165,7 +189,7 @@ def generate_rss_feed():
165
 
166
  return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
167
  except Exception as e:
168
- print(f"Error generating RSS feed: {e}")
169
  finally:
170
  cursor.close()
171
  return None
@@ -201,12 +225,15 @@ def stop_monitoring():
201
 
202
  # Function to handle chatbot responses
203
  def chatbot_response(message, history):
204
- # Replace this with your actual chatbot logic using 'mistralai/Mixtral-8x7B-Instruct-v0.1'
205
- # You'll need to load and use the model from Hugging Face's InferenceClient
206
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
207
- response = client.inference(message)
208
- history.append((message, response))
209
- return history, history
 
 
 
210
 
211
  # --- Gradio Interface ---
212
  with gr.Blocks() as demo:
@@ -223,7 +250,7 @@ with gr.Blocks() as demo:
223
  storage_location = gr.Textbox(
224
  label="Storage Location (CSV file path)",
225
  placeholder="/path/to/your/file.csv",
226
- visible=False
227
  )
228
  with gr.Row():
229
  feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
@@ -253,11 +280,15 @@ with gr.Blocks() as demo:
253
 
254
  # Start monitoring button click
255
  def on_start_click(target_urls_str, storage_loc, feed_enabled):
256
- global history
257
  try:
258
  target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
259
  if not all(target_urls):
260
  return "Please enter valid URLs.", history
 
 
 
 
261
  status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
262
  return status, history
263
  except Exception as e:
 
5
  import hashlib
6
  import threading
7
  from pathlib import Path
8
+ import logging
9
 
10
  import gradio as gr
11
  from selenium import webdriver
12
  from selenium.webdriver.chrome.service import Service
13
  from selenium.webdriver.chrome.options import Options
14
  from selenium.webdriver.common.by import By
15
+ from selenium.common.exceptions import WebDriverException, NoSuchElementException
16
  from webdriver_manager.chrome import ChromeDriverManager
17
  from huggingface_hub import InferenceClient
18
  import mysql.connector
19
  import feedparser # For parsing RSS feeds
20
  import sqlite3 # For simple local storage if needed
21
 
22
+ # Configure logging
23
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24
+
25
  # Configuration (replace with your actual values or environment variables)
26
  DB_HOST = os.environ.get("DB_HOST", "your_host")
27
  DB_USER = os.environ.get("DB_USER", "your_user")
28
  DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
29
  DB_NAME = os.environ.get("DB_NAME", "your_database")
30
+ HUGGINGFACE_API_KEY = os.environ.get("HUGGINGFACE_API_KEY") # Add API key
31
 
32
  # Global variables
33
  monitoring_thread = None
 
35
  db_connection = None
36
  current_task = None
37
  history = []
38
+ url_monitoring_intervals = {} # Store monitoring intervals for each URL
39
 
40
  # Function to establish a database connection
41
  def get_db_connection():
 
50
  )
51
  return db_connection
52
  except Exception as e:
53
+ logging.error(f"Error connecting to database: {e}")
54
  return None
55
  else:
56
  return db_connection
 
78
 
79
  # Function to monitor URLs for changes
80
  def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
81
+ global history, url_monitoring_intervals
82
  previous_hashes = {url: "" for url in target_urls}
83
  options = Options()
84
  options.headless = True
 
92
  while not stop_event.is_set():
93
  for url in target_urls:
94
  try:
95
+ # Dynamic monitoring interval
96
+ interval = url_monitoring_intervals.get(url, 300) # Default 5 minutes
97
+
98
  driver.get(url)
99
+ time.sleep(2) # Allow page to load
100
  current_content = driver.page_source
101
  current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
102
 
103
  if current_hash != previous_hashes[url]:
104
  previous_hashes[url] = current_hash
105
  timestamp = datetime.datetime.now()
106
+
107
+ try:
108
+ title_element = driver.find_element(By.TAG_NAME, "title")
109
+ title = title_element.text
110
+ except NoSuchElementException:
111
+ title = "No Title"
112
 
113
  history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
114
 
 
118
  if feed_rss:
119
  save_to_database(url, title, current_content, current_hash)
120
 
121
+ # Adjust monitoring interval based on change frequency (example)
122
+ url_monitoring_intervals[url] = 60 # Check more frequently after a change
123
+
124
+ else:
125
+ # Increase interval if no changes detected (example)
126
+ url_monitoring_intervals[url] = min(url_monitoring_intervals[url] + 60, 600) # Max 10 min
127
+
128
+ except WebDriverException as e:
129
+ logging.error(f"Error accessing {url}: {e}")
130
+
131
+ if stop_event.is_set():
132
+ break # Exit inner loop if stop event is set
133
 
134
  if not stop_event.is_set():
135
+ time.sleep(interval)
136
 
137
  except Exception as e:
138
+ logging.error(f"Unexpected error in monitoring thread: {e}")
139
  finally:
140
  driver.quit()
141
+ logging.info("Monitoring thread has been stopped.")
142
 
143
  # Function to save data to local storage (CSV)
144
  def save_to_storage(storage_location, url, title, content, timestamp):
 
147
  csv_writer = csv.writer(csvfile)
148
  csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
149
  except Exception as e:
150
+ logging.error(f"Error saving to storage: {e}")
151
 
152
  # Function to save data to the database
153
  def save_to_database(url, title, content, hash):
 
160
  cursor.execute(sql, val)
161
  conn.commit()
162
  except Exception as e:
163
+ logging.error(f"Error saving to database: {e}")
164
  finally:
165
  cursor.close()
166
 
 
189
 
190
  return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
191
  except Exception as e:
192
+ logging.error(f"Error generating RSS feed: {e}")
193
  finally:
194
  cursor.close()
195
  return None
 
225
 
226
  # Function to handle chatbot responses
227
  def chatbot_response(message, history):
228
+ try:
229
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HUGGINGFACE_API_KEY)
230
+ response = client.inference(message)
231
+ history.append((message, response))
232
+ return history, history
233
+ except Exception as e:
234
+ logging.error(f"Error getting chatbot response: {e}")
235
+ history.append((message, "Error: Could not get a response from the chatbot."))
236
+ return history, history
237
 
238
  # --- Gradio Interface ---
239
  with gr.Blocks() as demo:
 
250
  storage_location = gr.Textbox(
251
  label="Storage Location (CSV file path)",
252
  placeholder="/path/to/your/file.csv",
253
+ visible=False # You can enable this if you want CSV storage
254
  )
255
  with gr.Row():
256
  feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
 
280
 
281
  # Start monitoring button click
282
  def on_start_click(target_urls_str, storage_loc, feed_enabled):
283
+ global history, url_monitoring_intervals
284
  try:
285
  target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
286
  if not all(target_urls):
287
  return "Please enter valid URLs.", history
288
+
289
+ # Reset monitoring intervals when starting
290
+ url_monitoring_intervals = {url: 300 for url in target_urls}
291
+
292
  status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
293
  return status, history
294
  except Exception as e: