Spaces:
Running
Running
# Import necessary libraries | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
class WebScraper: | |
def __init__(self, urls): | |
self.urls = urls | |
self.data = pd.DataFrame() | |
def scrape(self): | |
for url in self.urls: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
text = ' '.join(map(lambda p: p.text, soup.find_all('p'))) | |
self.data = self.data.append({'url': url, 'content': text}, ignore_index=True) | |
def get_data(self): | |
return self.data |