SR05 commited on
Commit
660a1d2
1 Parent(s): c05f4f8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ # URL of the website to scrape
5
+ url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
6
+
7
+ # Headers to mimic a browser request
8
+ headers = {
9
+ "User-Agent": (
10
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
11
+ "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
12
+ )
13
+ }
14
+
15
+ # Send an HTTP GET request to the website with headers
16
+ response = requests.get(url, headers=headers)
17
+
18
+ # Check if the request was successful (status code 200)
19
+ if response.status_code == 200:
20
+ # Parse the HTML content of the page
21
+ soup = BeautifulSoup(response.content, 'html.parser')
22
+
23
+ # Extract relevant data (e.g., tables, headings, paragraphs)
24
+ paragraphs = soup.find_all('p')
25
+ for i, paragraph in enumerate(paragraphs, start=1):
26
+ print(f"Paragraph {i}: {paragraph.get_text(strip=True)}")
27
+ print("-" * 80)
28
+
29
+ # Example: Scraping tables (if there are any)
30
+ tables = soup.find_all('table')
31
+ for table in tables:
32
+ print("\nTable found:")
33
+ rows = table.find_all('tr')
34
+ for row in rows:
35
+ cells = row.find_all(['th', 'td'])
36
+ cell_data = [cell.get_text(strip=True) for cell in cells]
37
+ print("\t".join(cell_data))
38
+ else:
39
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")