laxminarasimha6's picture
Upload 3 files
cdc9adf verified
import os
import requests
from bs4 import BeautifulSoup
import sqlite3
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Database setup
def create_database():
conn = sqlite3.connect('courses.db')
c = conn.cursor()
# Create table with an additional column 'price' to indicate free or paid
c.execute('''CREATE TABLE IF NOT EXISTS courses
(id INTEGER PRIMARY KEY, title TEXT, description TEXT, price TEXT)''')
conn.commit()
conn.close()
# Web scraping function to get course data from a specific page
def scrape_courses_from_page(page_number):
url = f"https://courses.analyticsvidhya.com/collections/courses?page={page_number}"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
courses = []
# Find all course items from the products__list
course_items = soup.find_all('li', class_='products__list-item')
for course_item in course_items:
# Extract the course title
title_tag = course_item.find('h3')
title = title_tag.text.strip() if title_tag else 'No title available'
# Extract course price
price_tag = course_item.find('span', class_='course-card__price')
price = price_tag.text.strip() if price_tag else "Price not available"
# Description is not always explicitly provided, using course body for more details
description_tag = course_item.find('h4')
description = description_tag.text.strip() if description_tag else 'No description available'
# Append course details (title, description, price)
courses.append((title, description, price))
return courses
# Scrape all pages (total 8 pages) and insert data into the database
def scrape_all_pages():
all_courses = []
# Loop through pages 1 to 8
for page in range(1, 9):
print(f"Scraping page {page}...")
courses = scrape_courses_from_page(page)
all_courses.extend(courses)
return all_courses
# Insert scraped data into the database
def insert_data_to_db(courses):
conn = sqlite3.connect('courses.db')
c = conn.cursor()
c.executemany('INSERT INTO courses (title, description, price) VALUES (?, ?, ?)', courses)
conn.commit()
conn.close()
if __name__ == "__main__":
create_database()
all_courses = scrape_all_pages()
insert_data_to_db(all_courses)
print(f"Data from all pages has been successfully scraped and inserted into the database.")