raghuv-aditya commited on
Commit
11eb809
·
verified ·
1 Parent(s): d683b77

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +5 -0
scraper.py CHANGED
@@ -64,14 +64,19 @@ def scrape_courses_json(base_url, num_pages=5):
64
  data = []
65
  for page_num in range(1, num_pages + 1):
66
  page_url = f"{base_url}{page_num}"
 
 
67
  course_links = get_course_links(page_url)
 
68
 
69
  for link in course_links:
70
  try:
 
71
  course_data = parse_course_page(link)
72
  data.append(course_data)
73
  time.sleep(1) # Be courteous to the server
74
  except Exception as e:
75
  print(f"Failed to scrape {link}: {e}")
76
 
 
77
  return data
 
64
  data = []
65
  for page_num in range(1, num_pages + 1):
66
  page_url = f"{base_url}{page_num}"
67
+ print(f"Scraping page {page_num}: {page_url}")
68
+
69
  course_links = get_course_links(page_url)
70
+ print(f"Found {len(course_links)} courses on page {page_num}")
71
 
72
  for link in course_links:
73
  try:
74
+ print(f"Scraping course: {link}")
75
  course_data = parse_course_page(link)
76
  data.append(course_data)
77
  time.sleep(1) # Be courteous to the server
78
  except Exception as e:
79
  print(f"Failed to scrape {link}: {e}")
80
 
81
+ print("Scraping completed.")
82
  return data