SR05 commited on
Commit
7083c8e
1 Parent(s): 74c5d60

Create Test.py

Browse files
Files changed (1) hide show
  1. Test.py +47 -0
Test.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ####
2
+ ''' A simple web scraping script using requests and BeautifulSoup libraries. I am trying to achive the following:
3
+ 1. Send an HTTP GET request to a website.
4
+ 2. Parse the HTML content of the page.
5
+ 3. Extract relevant data (e.g., tables, headings, paragraphs).
6
+ 4. Save the extracted data to a file or display it in the terminal.'''
7
+
8
+
9
+ import requests
10
+ from bs4 import BeautifulSoup
11
+
12
+ # URL of the website to scrape
13
+ url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
14
+
15
+ # Headers to mimic a browser request
16
+ headers = {
17
+ "User-Agent": (
18
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
19
+ "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
20
+ )
21
+ }
22
+
23
+ # Send an HTTP GET request to the website with headers
24
+ response = requests.get(url, headers=headers)
25
+
26
+ # Check if the request was successful (status code 200)
27
+ if response.status_code == 200:
28
+ # Parse the HTML content of the page
29
+ soup = BeautifulSoup(response.content, 'html.parser')
30
+
31
+ # Extract relevant data (e.g., tables, headings, paragraphs)
32
+ paragraphs = soup.find_all('p')
33
+ for i, paragraph in enumerate(paragraphs, start=1):
34
+ print(f"Paragraph {i}: {paragraph.get_text(strip=True)}")
35
+ print("-" * 80)
36
+
37
+ # Example: Scraping tables (if there are any)
38
+ tables = soup.find_all('table')
39
+ for table in tables:
40
+ print("\nTable found:")
41
+ rows = table.find_all('tr')
42
+ for row in rows:
43
+ cells = row.find_all(['th', 'td'])
44
+ cell_data = [cell.get_text(strip=True) for cell in cells]
45
+ print("\t".join(cell_data))
46
+ else:
47
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")