Delhi_Irish_visa_decisions / visa_checker_with_download.py
SR05's picture
Update visa_checker_with_download.py
916b43f verified
raw
history blame
9.46 kB
import streamlit as st
import pandas as pd
import bisect
import requests
from io import BytesIO
from bs4 import BeautifulSoup
from fpdf import FPDF
# ------------------------------------------------------------------------------------
# Step 1: Load Data (Fetch and Prepare the DataFrame)
# ------------------------------------------------------------------------------------
@st.cache_data(ttl=3600)
def fetch_ods_file():
"""
Fetches the .ods file from the visa decisions website and returns its binary content.
Returns:
- A BytesIO object containing the file content if successful.
- The file name for naming convention.
- None, None if the file could not be fetched.
"""
url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
)
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.find_all('a')
# Find the link containing the specific text
file_url = None
file_name = None
for link in links:
link_text = link.get_text(strip=True)
if "Visa decisions made from 1 January 2024 to" in link_text:
file_url = link.get('href')
file_name = link_text.replace(" ", "_").replace("/", "-") + ".ods"
break
if file_url:
# Resolve relative URLs to absolute
if not file_url.startswith("http"):
file_url = requests.compat.urljoin(url, file_url)
file_response = requests.get(file_url, headers=headers)
if file_response.status_code == 200:
return BytesIO(file_response.content), file_name
return None, None
@st.cache_data
def prepare_dataframe(file):
"""
Prepares and cleans the DataFrame from the fetched .ods file.
Args:
file: The .ods file content as BytesIO.
Returns:
A cleaned and sorted DataFrame ready for searching.
"""
df = pd.read_excel(file, engine='odf')
df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors="ignore")
df.dropna(how="all", inplace=True)
df.reset_index(drop=True, inplace=True)
# Identify the header row
for idx, row in df.iterrows():
if row["Unnamed: 2"] == "Application Number" and row["Unnamed: 3"] == "Decision":
df.columns = ["Application Number", "Decision"]
df = df.iloc[idx + 1:] # Skip the header row
break
# Process application numbers and sort the DataFrame
df["Application Number"] = df["Application Number"].astype(str).str.strip().astype(int)
df.sort_values(by="Application Number", inplace=True)
df.reset_index(drop=True, inplace=True)
return df
# ------------------------------------------------------------------------------------
# Step 2: Binary Search Utility for Finding Nearest Application Numbers
# ------------------------------------------------------------------------------------
def binary_search_nearest(df, target):
"""
Uses binary search to find the nearest application numbers in the DataFrame.
Args:
df: The DataFrame containing the application numbers.
target: The target application number to search for.
Returns:
Two nearest application numbers (before and after the target).
"""
application_numbers = df["Application Number"].tolist()
pos = bisect.bisect_left(application_numbers, target)
before = application_numbers[pos - 1] if pos > 0 else None
after = application_numbers[pos] if pos < len(application_numbers) else None
return before, after
# ------------------------------------------------------------------------------------
# Step 3: Export DataFrame to PDF
# ------------------------------------------------------------------------------------
def export_to_pdf(df, file_name):
"""
Converts the cleaned DataFrame to a PDF file.
Args:
df: The DataFrame to be converted to PDF.
file_name: The name of the PDF file to create.
Returns:
A BytesIO object containing the PDF file.
"""
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)
# Title
pdf.set_font("Arial", style="B", size=16)
pdf.cell(0, 10, "Visa Decisions Data", ln=True, align="C")
pdf.ln(10)
# Table Header
pdf.set_font("Arial", style="B", size=12)
for col in df.columns:
pdf.cell(50, 10, col, border=1)
pdf.ln()
# Table Data
pdf.set_font("Arial", size=12)
for _, row in df.iterrows():
for cell in row:
pdf.cell(50, 10, str(cell), border=1)
pdf.ln()
pdf_output = BytesIO()
pdf.output(pdf_output)
pdf_output.seek(0)
return pdf_output
# ------------------------------------------------------------------------------------
# Step 4: Allow User to Download Cleaned DataFrame
# ------------------------------------------------------------------------------------
def download_dataframe(df, file_name):
"""
Provides a download link for the cleaned DataFrame as CSV or PDF.
Args:
df: The cleaned DataFrame to download.
file_name: The name of the original file for naming convention.
"""
cleaned_file_name_csv = f"{file_name.replace('.ods', '.csv')}"
csv_data = df.to_csv(index=False)
st.sidebar.download_button(
label="Download Cleaned Data as CSV",
data=csv_data,
file_name=cleaned_file_name_csv,
mime="text/csv"
)
cleaned_file_name_pdf = f"{file_name.replace('.ods', '.pdf')}"
pdf_data = export_to_pdf(df, cleaned_file_name_pdf)
st.sidebar.download_button(
label="Download Cleaned Data as PDF",
data=pdf_data,
file_name=cleaned_file_name_pdf,
mime="application/pdf"
)
# ------------------------------------------------------------------------------------
# Step 5: Search Application Status
# ------------------------------------------------------------------------------------
def search_application(df):
"""
Handles the user input and searches for the application number in the DataFrame.
Args:
df: The DataFrame containing application numbers and decisions.
"""
user_input = st.text_input("Enter your Application Number (including IRL if applicable):")
if user_input:
# Validate user input
if not user_input.isdigit() or len(user_input) < 8:
st.warning("Please enter at least 8 digits for your VISA application number.")
return
application_number = int(user_input)
# Search for the application number in the DataFrame
result = df[df["Application Number"] == application_number]
if not result.empty:
decision = result.iloc[0]["Decision"]
if decision.lower() == "refused":
st.error(f"Application Number: {application_number}\n\nDecision: **Refused**")
elif decision.lower() == "approved":
st.success(f"Application Number: {application_number}\n\nDecision: **Approved**")
else:
st.info(f"Application Number: {application_number}\n\nDecision: **{decision}**")
else:
st.warning(f"No record found for Application Number: {application_number}.")
before, after = binary_search_nearest(df, application_number)
nearest_records = pd.DataFrame({
"Nearest Application": ["Before", "After"],
"Application Number": [before, after],
"Decision": [
df[df["Application Number"] == before]["Decision"].values[0] if before else None,
df[df["Application Number"] == after]["Decision"].values[0] if after else None
],
"Difference": [
application_number - before if before else None,
after - application_number if after else None
]
}).dropna()
if not nearest_records.empty:
st.subheader("Nearest Application Numbers")
st.table(nearest_records.reset_index(drop=True))
else:
st.info("No nearest application numbers found.")
# ------------------------------------------------------------------------------------
# Main Streamlit Application Logic
# ------------------------------------------------------------------------------------
def main():
st.title("Visa Application Status Checker")
# Fetch and prepare the data
ods_file, original_file_name = fetch_ods_file()
if ods_file and original_file_name:
df = prepare_dataframe(ods_file)
if df is not None:
# Provide download option for cleaned DataFrame
st.sidebar.header("Download Options")
download_dataframe(df, original_file_name)
# Search application
search_application(df)
else:
st.error("Failed to prepare the data.")
else:
st.error("Failed to fetch the .ods file.")
if __name__ == "__main__":
main()