File size: 2,437 Bytes
dacba42 98dd89a aa0cd67 42114b1 e762ee4 98dd89a e762ee4 dacba42 f609061 2298f31 dacba42 2298f31 c5cba5d 2298f31 dacba42 2298f31 dacba42 2298f31 dacba42 2298f31 dacba42 2298f31 42114b1 2298f31 42114b1 2298f31 dacba42 2298f31 42114b1 dacba42 42114b1 dacba42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import requests # For making HTTP requests
import pandas as pd
import streamlit as st
from io import BytesIO
from bs4 import BeautifulSoup # Add this import for BeautifulSoup
# Assuming fetch_data() is already defined, call this function to fetch the data
@st.cache_data(ttl=3600)
def fetch_data():
# URL of the website to scrape
url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
)
}
# Fetch the webpage
response = requests.get(url, headers=headers)
if response.status_code != 200:
st.error("Failed to fetch the webpage. Please try again later.")
return None
# Parse the HTML to find the .ods link
soup = BeautifulSoup(response.content, "html.parser")
file_url = None
for link in soup.find_all("a"):
if "Visa decisions made from 1 January 2025" in link.get_text():
file_url = link.get("href")
if not file_url.startswith("http"):
file_url = requests.compat.urljoin(url, file_url)
break
if not file_url:
st.error("Could not find the visa decisions file link on the website.")
return None
# Fetch the .ods file
ods_response = requests.get(file_url, headers=headers)
if ods_response.status_code != 200:
st.error("Failed to download the visa decisions file.")
return None
# Process the .ods file
ods_file = BytesIO(ods_response.content)
df = pd.read_excel(ods_file, engine="odf")
# Drop unnecessary columns
df.dropna(how="all", inplace=True) # Drop rows with all NaN values
df.reset_index(drop=True, inplace=True)
# Keep only the first two columns
if len(df.columns) > 2:
df = df.iloc[:, :2] # Keep only the first two columns
# Rename columns
if len(df.columns) == 2:
df.columns = ["Application Number", "Decision"]
else:
st.error("Insufficient data columns detected.")
return None
df["Application Number"] = df["Application Number"].astype(str)
# Store the dataframe as a global variable for future use
return df
# Now, define precomputed_df to be used in other scripts
precomputed_df = fetch_data() # Precompute the dataframe
|