import pandas as pd import streamlit as st @st.cache_data def prepare_dataframe(file): try: # Read the .ods file into a DataFrame df = pd.read_excel(file, engine='odf') # Clean the DataFrame df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore') df.dropna(how='all', inplace=True) df.reset_index(drop=True, inplace=True) # Identify the header row and reformat the DataFrame for idx, row in df.iterrows(): if row['Unnamed: 2'] == 'Application Number' and row['Unnamed: 3'] == 'Decision': df.columns = ['Application Number', 'Decision'] df = df.iloc[idx + 1:] # Skip the header row break # Reset the index and preprocess application numbers df.reset_index(drop=True, inplace=True) df['Application Number'] = df['Application Number'].astype(str).str.strip().astype(int) # Sort the DataFrame by application number for faster search df = df.sort_values(by='Application Number').reset_index(drop=True) return df except Exception as e: st.error(f"Error preparing the DataFrame: {e}") return None