Spaces:
Runtime error
Runtime error
acecalisto3
commited on
Commit
•
b723646
1
Parent(s):
14db2f5
Update app.py
Browse files
app.py
CHANGED
@@ -63,7 +63,6 @@ def monitor_urls(storage_location, urls, scrape_interval, content_type, stop_scr
|
|
63 |
except Exception as e:
|
64 |
logging.error(f"Error starting ChromeDriver: {e}")
|
65 |
|
66 |
-
# Define a function to start scraping
|
67 |
def start_scraping(storage_location, urls, scrape_interval, content_type, stop_scraping_flag):
|
68 |
global CURRENT_TASK, HISTORY
|
69 |
|
@@ -80,7 +79,7 @@ def start_scraping(storage_location, urls, scrape_interval, content_type, stop_s
|
|
80 |
try:
|
81 |
with webdriver.Chrome(service=Service(webdriver.ChromeDriverManager().install()), options=Options()) as driver:
|
82 |
driver.get(url)
|
83 |
-
time.sleep(2
|
84 |
if content_type == "text":
|
85 |
initial_content = driver.page_source
|
86 |
elif content_type == "media":
|
@@ -94,8 +93,8 @@ def start_scraping(storage_location, urls, scrape_interval, content_type, stop_s
|
|
94 |
except (NoSuchElementException, Exception) as e:
|
95 |
HISTORY.append(f"Error accessing {url}: {e}")
|
96 |
|
97 |
-
|
98 |
-
|
99 |
|
100 |
return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
|
101 |
|
|
|
63 |
except Exception as e:
|
64 |
logging.error(f"Error starting ChromeDriver: {e}")
|
65 |
|
|
|
66 |
def start_scraping(storage_location, urls, scrape_interval, content_type, stop_scraping_flag):
|
67 |
global CURRENT_TASK, HISTORY
|
68 |
|
|
|
79 |
try:
|
80 |
with webdriver.Chrome(service=Service(webdriver.ChromeDriverManager().install()), options=Options()) as driver:
|
81 |
driver.get(url)
|
82 |
+
time.sleep(2) # Wait for the page to load
|
83 |
if content_type == "text":
|
84 |
initial_content = driver.page_source
|
85 |
elif content_type == "media":
|
|
|
93 |
except (NoSuchElementException, Exception) as e:
|
94 |
HISTORY.append(f"Error accessing {url}: {e}")
|
95 |
|
96 |
+
# Start a new thread for monitoring URLs
|
97 |
+
threading.Thread(target=monitor_urls, args=(storage_location, urls, scrape_interval, content_type, stop_scraping_flag)).start()
|
98 |
|
99 |
return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
|
100 |
|