claytonsamples commited on
Commit
7a70225
1 Parent(s): ebd602f

Create scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +19 -0
scraper.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+
6
+ class WebScraper:
7
+ def __init__(self, urls):
8
+ self.urls = urls
9
+ self.data = pd.DataFrame()
10
+
11
+ def scrape(self):
12
+ for url in self.urls:
13
+ response = requests.get(url)
14
+ soup = BeautifulSoup(response.text, 'html.parser')
15
+ text = ' '.join(map(lambda p: p.text, soup.find_all('p')))
16
+ self.data = self.data.append({'url': url, 'content': text}, ignore_index=True)
17
+
18
+ def get_data(self):
19
+ return self.data