newsletter2.0 / scraper.py
claytonsamples's picture
Create scraper.py
7a70225
raw
history blame contribute delete
572 Bytes
# Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
class WebScraper:
def __init__(self, urls):
self.urls = urls
self.data = pd.DataFrame()
def scrape(self):
for url in self.urls:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
text = ' '.join(map(lambda p: p.text, soup.find_all('p')))
self.data = self.data.append({'url': url, 'content': text}, ignore_index=True)
def get_data(self):
return self.data