SR05 commited on
Commit
f8f8d94
1 Parent(s): e53f612

Upload visa_D_search.py

Browse files
Files changed (1) hide show
  1. visa_D_search.py +96 -0
visa_D_search.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[4]:
5
+
6
+
7
+ import requests
8
+ import pandas as pd
9
+ from io import BytesIO
10
+ from bs4 import BeautifulSoup
11
+
12
+ # URL of the website to scrape
13
+ url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
14
+
15
+ # Headers to mimic a browser request
16
+ headers = {
17
+ "User-Agent": (
18
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
19
+ "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
20
+ )
21
+ }
22
+
23
+ # Send an HTTP GET request to the website
24
+ response = requests.get(url, headers=headers)
25
+
26
+ # Check if the request was successful
27
+ if response.status_code == 200:
28
+ soup = BeautifulSoup(response.content, 'html.parser')
29
+
30
+ # Find all anchor tags
31
+ links = soup.find_all('a')
32
+
33
+ # Search for the link that contains the specific text
34
+ file_url = None
35
+ for link in links:
36
+ link_text = link.get_text(strip=True)
37
+ if "Visa decisions made from 1 January 2024 to" in link_text:
38
+ file_url = link.get('href')
39
+ break
40
+
41
+ if file_url:
42
+ # Make the link absolute if it's relative
43
+ if not file_url.startswith('http'):
44
+ file_url = requests.compat.urljoin(url, file_url)
45
+
46
+ ###print(f"Found link: {file_url}")
47
+
48
+ # Download the file into memory
49
+ file_response = requests.get(file_url, headers=headers)
50
+
51
+ if file_response.status_code == 200:
52
+ ods_file = BytesIO(file_response.content)
53
+
54
+ # Read the .ods file into a DataFrame
55
+ try:
56
+ df = pd.read_excel(ods_file, engine='odf')
57
+
58
+ # Step 1: Drop unnecessary columns ("Unnamed: 0" and "Unnamed: 1")
59
+ df = df.drop(columns=["Unnamed: 0", "Unnamed: 1"])
60
+
61
+ # Step 2: Find the index where data starts with "Application Number"
62
+ header_row_index = df[df['Unnamed: 2'] == 'Application Number'].index[0]
63
+
64
+ # Step 3: Set new headers and skip the rows before actual data
65
+ df.columns = df.iloc[header_row_index]
66
+ df = df[header_row_index + 1:].reset_index(drop=True)
67
+
68
+ # Step 4: Rename the columns for clarity
69
+ df.columns = ['Application Number', 'Decision']
70
+
71
+ # Step 5: Drop any rows with all NaN values (optional cleanup)
72
+ df = df.dropna(how='all')
73
+
74
+ # Remove the download and display part
75
+
76
+ except Exception as e:
77
+ print("Error reading the .ods file:", e)
78
+ else:
79
+ print("Failed to download the file. Status code:", file_response.status_code)
80
+ else:
81
+ print("The specified link was not found.")
82
+ else:
83
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
84
+
85
+
86
+ # In[ ]:
87
+
88
+
89
+
90
+
91
+
92
+ # In[ ]:
93
+
94
+
95
+
96
+