File size: 2,578 Bytes
cdc9adf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import requests
from bs4 import BeautifulSoup
import sqlite3
from dotenv import load_dotenv

# Load environment variables
load_dotenv()


# Database setup
def create_database():
    conn = sqlite3.connect('courses.db')
    c = conn.cursor()
    # Create table with an additional column 'price' to indicate free or paid
    c.execute('''CREATE TABLE IF NOT EXISTS courses

                 (id INTEGER PRIMARY KEY, title TEXT, description TEXT, price TEXT)''')
    conn.commit()
    conn.close()


# Web scraping function to get course data from a specific page
def scrape_courses_from_page(page_number):
    url = f"https://courses.analyticsvidhya.com/collections/courses?page={page_number}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    courses = []

    # Find all course items from the products__list
    course_items = soup.find_all('li', class_='products__list-item')

    for course_item in course_items:
        # Extract the course title
        title_tag = course_item.find('h3')
        title = title_tag.text.strip() if title_tag else 'No title available'

        # Extract course price
        price_tag = course_item.find('span', class_='course-card__price')
        price = price_tag.text.strip() if price_tag else "Price not available"

        # Description is not always explicitly provided, using course body for more details
        description_tag = course_item.find('h4')
        description = description_tag.text.strip() if description_tag else 'No description available'

        # Append course details (title, description, price)
        courses.append((title, description, price))

    return courses


# Scrape all pages (total 8 pages) and insert data into the database
def scrape_all_pages():
    all_courses = []

    # Loop through pages 1 to 8
    for page in range(1, 9):
        print(f"Scraping page {page}...")
        courses = scrape_courses_from_page(page)
        all_courses.extend(courses)

    return all_courses


# Insert scraped data into the database
def insert_data_to_db(courses):
    conn = sqlite3.connect('courses.db')
    c = conn.cursor()
    c.executemany('INSERT INTO courses (title, description, price) VALUES (?, ?, ?)', courses)
    conn.commit()
    conn.close()


if __name__ == "__main__":
    create_database()
    all_courses = scrape_all_pages()
    insert_data_to_db(all_courses)
    print(f"Data from all pages has been successfully scraped and inserted into the database.")