Spaces:

SR05
/

Delhi_Irish_visa_decisions

Running

File size: 2,437 Bytes

dacba42
98dd89a
aa0cd67
42114b1
e762ee4
98dd89a
e762ee4
dacba42
 
f609061
2298f31
 
 
 
 
 
 
 
 
 
 
 
 
 
dacba42
2298f31
 
 
 
 
c5cba5d
2298f31
 
 
 
 
dacba42
2298f31
dacba42
2298f31
 
 
 
 
dacba42
2298f31
 
 
 
dacba42
2298f31
 
 
 
42114b1
2298f31
 
 
42114b1
2298f31
 
 
 
dacba42
2298f31
 
42114b1
dacba42
 
42114b1
dacba42


import requests  # For making HTTP requests
import pandas as pd
import streamlit as st
from io import BytesIO
from bs4 import BeautifulSoup  # Add this import for BeautifulSoup


# Assuming fetch_data() is already defined, call this function to fetch the data
@st.cache_data(ttl=3600)
def fetch_data():
    # URL of the website to scrape
    url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
        )
    }

    # Fetch the webpage
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        st.error("Failed to fetch the webpage. Please try again later.")
        return None

    # Parse the HTML to find the .ods link
    soup = BeautifulSoup(response.content, "html.parser")
    file_url = None
    for link in soup.find_all("a"):
        if "Visa decisions made from 1 January 2025" in link.get_text():
            file_url = link.get("href")
            if not file_url.startswith("http"):
                file_url = requests.compat.urljoin(url, file_url)
            break

    if not file_url:
        st.error("Could not find the visa decisions file link on the website.")
        return None

    # Fetch the .ods file
    ods_response = requests.get(file_url, headers=headers)
    if ods_response.status_code != 200:
        st.error("Failed to download the visa decisions file.")
        return None

    # Process the .ods file
    ods_file = BytesIO(ods_response.content)
    df = pd.read_excel(ods_file, engine="odf")
    
    # Drop unnecessary columns
    df.dropna(how="all", inplace=True)  # Drop rows with all NaN values
    df.reset_index(drop=True, inplace=True)

    # Keep only the first two columns
    if len(df.columns) > 2:
        df = df.iloc[:, :2]  # Keep only the first two columns
    
    # Rename columns
    if len(df.columns) == 2:
        df.columns = ["Application Number", "Decision"]
    else:
        st.error("Insufficient data columns detected.")
        return None

    df["Application Number"] = df["Application Number"].astype(str)

    # Store the dataframe as a global variable for future use
    return df

# Now, define precomputed_df to be used in other scripts
precomputed_df = fetch_data()  # Precompute the dataframe