Update scraper.py
Browse files- scraper.py +5 -0
scraper.py
CHANGED
@@ -64,14 +64,19 @@ def scrape_courses_json(base_url, num_pages=5):
|
|
64 |
data = []
|
65 |
for page_num in range(1, num_pages + 1):
|
66 |
page_url = f"{base_url}{page_num}"
|
|
|
|
|
67 |
course_links = get_course_links(page_url)
|
|
|
68 |
|
69 |
for link in course_links:
|
70 |
try:
|
|
|
71 |
course_data = parse_course_page(link)
|
72 |
data.append(course_data)
|
73 |
time.sleep(1) # Be courteous to the server
|
74 |
except Exception as e:
|
75 |
print(f"Failed to scrape {link}: {e}")
|
76 |
|
|
|
77 |
return data
|
|
|
64 |
data = []
|
65 |
for page_num in range(1, num_pages + 1):
|
66 |
page_url = f"{base_url}{page_num}"
|
67 |
+
print(f"Scraping page {page_num}: {page_url}")
|
68 |
+
|
69 |
course_links = get_course_links(page_url)
|
70 |
+
print(f"Found {len(course_links)} courses on page {page_num}")
|
71 |
|
72 |
for link in course_links:
|
73 |
try:
|
74 |
+
print(f"Scraping course: {link}")
|
75 |
course_data = parse_course_page(link)
|
76 |
data.append(course_data)
|
77 |
time.sleep(1) # Be courteous to the server
|
78 |
except Exception as e:
|
79 |
print(f"Failed to scrape {link}: {e}")
|
80 |
|
81 |
+
print("Scraping completed.")
|
82 |
return data
|