File size: 3,010 Bytes
1ba3f22 9a997e4 1ba3f22 74c0c8e c119738 1ba3f22 74c0c8e b47829b 74c0c8e 9a997e4 8e0d56d 18ba8c1 8e0d56d 9a997e4 1ba3f22 9a997e4 993f2a6 c119738 a241bb3 b47829b c119738 8e0d56d c119738 8e0d56d c119738 8e0d56d c119738 b47829b 8e0d56d c119738 9a997e4 74c0c8e 8e0d56d 18ba8c1 a241bb3 31284a7 8e0d56d a241bb3 9a997e4 31284a7 9a997e4 993f2a6 9a997e4 b47829b 9a997e4 b47829b 9a997e4 b47829b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
"All constants used in the project."
from pathlib import Path
import pandas
# The directory of this project
REPO_DIR = Path(__file__).parent
# Main necessary directories
DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
FHE_KEYS = REPO_DIR / ".fhe_keys"
CLIENT_FILES = REPO_DIR / "client_files"
SERVER_FILES = REPO_DIR / "server_files"
# ALl deployment directories
DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model"
# Path targeting pre-processor saved files
PRE_PROCESSOR_APPLICANT_PATH = DEPLOYMENT_PATH / 'pre_processor_applicant.pkl'
PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
PRE_PROCESSOR_CREDIT_BUREAU_PATH = DEPLOYMENT_PATH / 'pre_processor_credit_bureau.pkl'
# Create the necessary directories
FHE_KEYS.mkdir(exist_ok=True)
CLIENT_FILES.mkdir(exist_ok=True)
SERVER_FILES.mkdir(exist_ok=True)
# Store the server's URL
SERVER_URL = "http://localhost:8000/"
# Path to data file
DATA_PATH = "data/data.csv"
# Development settings
PROCESSED_INPUT_SHAPE = (1, 39)
CLIENT_TYPES = ["applicant", "bank", "credit_bureau"]
INPUT_INDEXES = {
"applicant": 0,
"bank": 1,
"credit_bureau": 2,
}
INPUT_SLICES = {
"applicant": slice(0, 36), # First position: start from 0
"bank": slice(36, 37), # Second position: start from n_feature_applicant
"credit_bureau": slice(37, 39), # Third position: start from n_feature_applicant + n_feature_bank
}
# Fix column order for pre-processing steps
APPLICANT_COLUMNS = [
'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size',
'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
'Occupation_type',
]
BANK_COLUMNS = ["Account_age"]
CREDIT_BUREAU_COLUMNS = ["Years_employed", "Employed"]
_data = pandas.read_csv(DATA_PATH, encoding="utf-8")
def get_min_max(data, column):
"""Get min/max values of a column in order to input them in Gradio's API as key arguments."""
return {
"minimum": int(data[column].min()),
"maximum": int(data[column].max()),
}
# App data min and max values
ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age")
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
AGE_MIN_MAX = get_min_max(_data, "Age")
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")
# Default values
INCOME_VALUE = 12000
AGE_VALUE = 30
# App data choices
INCOME_TYPES = list(_data["Income_type"].unique())
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
HOUSING_TYPES = list(_data["Housing_type"].unique())
EDUCATION_TYPES = list(_data["Education_type"].unique())
FAMILY_STATUS = list(_data["Family_status"].unique())
YEARS_EMPLOYED_BINS = ['0-2', '2-5', '5-8', '8-11', '11-18', '18+']
# Years_employed bin order
YEARS_EMPLOYED_BIN_NAME_TO_INDEX = {bin_name: i for i, bin_name in enumerate(YEARS_EMPLOYED_BINS)}
assert len(YEARS_EMPLOYED_BINS) == len(list(_data["Years_employed"].unique())), (
"Years_employed bins are not matching the expected list"
) |