File size: 2,268 Bytes
065e3e9
3500eef
2f45c91
3500eef
065e3e9
2f0a65b
2f45c91
3500eef
 
2f0a65b
3500eef
 
 
 
2f0a65b
3500eef
80937eb
2f45c91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af07102
 
2f45c91
 
 
 
 
 
a990f92
2f45c91
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import requests
import pandas as pd
import streamlit as st
from io import BytesIO
from bs4 import BeautifulSoup

@st.cache_data(ttl=3600, max_entries=1)
def fetch_data():
    url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
        )
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        # Find the link to download the file
        file_url = None
        links = soup.find_all('a')
        for link in links:
            link_text = link.get_text(strip=True)
            if "Visa decisions made from 1 January 2024 to" in link_text:
                file_url = link.get('href')
                break
        
        if file_url:
            # Make the link absolute if it's relative
            if not file_url.startswith('http'):
                file_url = requests.compat.urljoin(url, file_url)
            
            file_response = requests.get(file_url, headers=headers)
            
            if file_response.status_code == 200:
                file_data = BytesIO(file_response.content)
                df = pd.read_excel(file_data, engine='odf')
                
                # Clean up and process the DataFrame
                df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore')
                df.dropna(how='all', inplace=True)
                df.reset_index(drop=True, inplace=True)
                
                # Assuming the header row is correct
                df.columns = ['Application Number', 'Decision']
                
                # Debugging: Display the first few rows of the dataframe
                #st.write("First few rows of the data:")
                #st.write(df.head())
                
                return df
            else:
                st.error("Failed to download the file.")
        else:
            st.error("The file link was not found on the webpage.")
    else:
        st.error("Failed to retrieve the webpage.")
    return None