laxminarasimha6 commited on
Commit
cdc9adf
·
verified ·
1 Parent(s): 28d6d8a

Upload 3 files

Browse files
Files changed (3) hide show
  1. courses.db +0 -0
  2. main.py +78 -0
  3. requirements.txt +7 -0
courses.db ADDED
Binary file (20.5 kB). View file
 
main.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import sqlite3
5
+ from dotenv import load_dotenv
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+
10
+
11
+ # Database setup
12
+ def create_database():
13
+ conn = sqlite3.connect('courses.db')
14
+ c = conn.cursor()
15
+ # Create table with an additional column 'price' to indicate free or paid
16
+ c.execute('''CREATE TABLE IF NOT EXISTS courses
17
+ (id INTEGER PRIMARY KEY, title TEXT, description TEXT, price TEXT)''')
18
+ conn.commit()
19
+ conn.close()
20
+
21
+
22
+ # Web scraping function to get course data from a specific page
23
+ def scrape_courses_from_page(page_number):
24
+ url = f"https://courses.analyticsvidhya.com/collections/courses?page={page_number}"
25
+ response = requests.get(url)
26
+ soup = BeautifulSoup(response.text, 'html.parser')
27
+
28
+ courses = []
29
+
30
+ # Find all course items from the products__list
31
+ course_items = soup.find_all('li', class_='products__list-item')
32
+
33
+ for course_item in course_items:
34
+ # Extract the course title
35
+ title_tag = course_item.find('h3')
36
+ title = title_tag.text.strip() if title_tag else 'No title available'
37
+
38
+ # Extract course price
39
+ price_tag = course_item.find('span', class_='course-card__price')
40
+ price = price_tag.text.strip() if price_tag else "Price not available"
41
+
42
+ # Description is not always explicitly provided, using course body for more details
43
+ description_tag = course_item.find('h4')
44
+ description = description_tag.text.strip() if description_tag else 'No description available'
45
+
46
+ # Append course details (title, description, price)
47
+ courses.append((title, description, price))
48
+
49
+ return courses
50
+
51
+
52
+ # Scrape all pages (total 8 pages) and insert data into the database
53
+ def scrape_all_pages():
54
+ all_courses = []
55
+
56
+ # Loop through pages 1 to 8
57
+ for page in range(1, 9):
58
+ print(f"Scraping page {page}...")
59
+ courses = scrape_courses_from_page(page)
60
+ all_courses.extend(courses)
61
+
62
+ return all_courses
63
+
64
+
65
+ # Insert scraped data into the database
66
+ def insert_data_to_db(courses):
67
+ conn = sqlite3.connect('courses.db')
68
+ c = conn.cursor()
69
+ c.executemany('INSERT INTO courses (title, description, price) VALUES (?, ?, ?)', courses)
70
+ conn.commit()
71
+ conn.close()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ create_database()
76
+ all_courses = scrape_all_pages()
77
+ insert_data_to_db(all_courses)
78
+ print(f"Data from all pages has been successfully scraped and inserted into the database.")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ requests
2
+ beautifulsoup4
3
+ gradio
4
+ streamlit
5
+ sqlite3
6
+ python-dotenv
7
+ openai