Spaces:

SR05
/

Delhi_Irish_visa_decisions

Running

App Files Files Community

SR05 commited on Nov 8, 2024

Commit

7083c8e

verified ·

1 Parent(s): 74c5d60

Create Test.py

Browse files

Files changed (1) hide show

Test.py +47 -0

Test.py ADDED Viewed

	@@ -0,0 +1,47 @@

+####
+'''  A simple web scraping script using requests and BeautifulSoup libraries. I am trying to achive the following:
+1. Send an HTTP GET request to a website.
+2. Parse the HTML content of the page.
+3. Extract relevant data (e.g., tables, headings, paragraphs).
+4. Save the extracted data to a file or display it in the terminal.'''
+import requests
+from bs4 import BeautifulSoup
+# URL of the website to scrape
+url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
+# Headers to mimic a browser request
+headers = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+        "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
+    )
+}
+# Send an HTTP GET request to the website with headers
+response = requests.get(url, headers=headers)
+# Check if the request was successful (status code 200)
+if response.status_code == 200:
+    # Parse the HTML content of the page
+    soup = BeautifulSoup(response.content, 'html.parser')
+    # Extract relevant data (e.g., tables, headings, paragraphs)
+    paragraphs = soup.find_all('p')
+    for i, paragraph in enumerate(paragraphs, start=1):
+        print(f"Paragraph {i}: {paragraph.get_text(strip=True)}")
+        print("-" * 80)
+    # Example: Scraping tables (if there are any)
+    tables = soup.find_all('table')
+    for table in tables:
+        print("\nTable found:")
+        rows = table.find_all('tr')
+        for row in rows:
+            cells = row.find_all(['th', 'td'])
+            cell_data = [cell.get_text(strip=True) for cell in cells]
+            print("\t".join(cell_data))
+else:
+    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")