import pandas as pd
import streamlit as st

@st.cache_data
def prepare_dataframe(file):
    try:
        # Read the .ods file into a DataFrame
        df = pd.read_excel(file, engine='odf')

        # Clean the DataFrame
        df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore')
        df.dropna(how='all', inplace=True)
        df.reset_index(drop=True, inplace=True)

        # Identify the header row and reformat the DataFrame
        for idx, row in df.iterrows():
            if row['Unnamed: 2'] == 'Application Number' and row['Unnamed: 3'] == 'Decision':
                df.columns = ['Application Number', 'Decision']
                df = df.iloc[idx + 1:]  # Skip the header row
                break

        # Reset the index and preprocess application numbers
        df.reset_index(drop=True, inplace=True)
        df['Application Number'] = df['Application Number'].astype(str).str.strip().astype(int)

        # Sort the DataFrame by application number for faster search
        df = df.sort_values(by='Application Number').reset_index(drop=True)

        return df
    except Exception as e:
        st.error(f"Error preparing the DataFrame: {e}")
        return None