import os import requests from bs4 import BeautifulSoup import sqlite3 from dotenv import load_dotenv # Load environment variables load_dotenv() # Database setup def create_database(): conn = sqlite3.connect('courses.db') c = conn.cursor() # Create table with an additional column 'price' to indicate free or paid c.execute('''CREATE TABLE IF NOT EXISTS courses (id INTEGER PRIMARY KEY, title TEXT, description TEXT, price TEXT)''') conn.commit() conn.close() # Web scraping function to get course data from a specific page def scrape_courses_from_page(page_number): url = f"https://courses.analyticsvidhya.com/collections/courses?page={page_number}" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') courses = [] # Find all course items from the products__list course_items = soup.find_all('li', class_='products__list-item') for course_item in course_items: # Extract the course title title_tag = course_item.find('h3') title = title_tag.text.strip() if title_tag else 'No title available' # Extract course price price_tag = course_item.find('span', class_='course-card__price') price = price_tag.text.strip() if price_tag else "Price not available" # Description is not always explicitly provided, using course body for more details description_tag = course_item.find('h4') description = description_tag.text.strip() if description_tag else 'No description available' # Append course details (title, description, price) courses.append((title, description, price)) return courses # Scrape all pages (total 8 pages) and insert data into the database def scrape_all_pages(): all_courses = [] # Loop through pages 1 to 8 for page in range(1, 9): print(f"Scraping page {page}...") courses = scrape_courses_from_page(page) all_courses.extend(courses) return all_courses # Insert scraped data into the database def insert_data_to_db(courses): conn = sqlite3.connect('courses.db') c = conn.cursor() c.executemany('INSERT INTO courses (title, description, price) VALUES (?, ?, ?)', courses) conn.commit() conn.close() if __name__ == "__main__": create_database() all_courses = scrape_all_pages() insert_data_to_db(all_courses) print(f"Data from all pages has been successfully scraped and inserted into the database.")