|
"All constants used in the project." |
|
|
|
from pathlib import Path |
|
import pandas |
|
|
|
|
|
REPO_DIR = Path(__file__).parent |
|
|
|
|
|
DEPLOYMENT_PATH = REPO_DIR / "deployment_files" |
|
FHE_KEYS = REPO_DIR / ".fhe_keys" |
|
CLIENT_FILES = REPO_DIR / "client_files" |
|
SERVER_FILES = REPO_DIR / "server_files" |
|
|
|
|
|
DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model" |
|
|
|
|
|
PRE_PROCESSOR_APPLICANT_PATH = DEPLOYMENT_PATH / 'pre_processor_applicant.pkl' |
|
PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl' |
|
PRE_PROCESSOR_CREDIT_BUREAU_PATH = DEPLOYMENT_PATH / 'pre_processor_credit_bureau.pkl' |
|
|
|
|
|
FHE_KEYS.mkdir(exist_ok=True) |
|
CLIENT_FILES.mkdir(exist_ok=True) |
|
SERVER_FILES.mkdir(exist_ok=True) |
|
|
|
|
|
SERVER_URL = "http://localhost:8000/" |
|
|
|
|
|
DATA_PATH = "data/data.csv" |
|
|
|
|
|
PROCESSED_INPUT_SHAPE = (1, 39) |
|
|
|
CLIENT_TYPES = ["applicant", "bank", "credit_bureau"] |
|
INPUT_INDEXES = { |
|
"applicant": 0, |
|
"bank": 1, |
|
"credit_bureau": 2, |
|
} |
|
INPUT_SLICES = { |
|
"applicant": slice(0, 36), |
|
"bank": slice(36, 37), |
|
"credit_bureau": slice(37, 39), |
|
} |
|
|
|
|
|
APPLICANT_COLUMNS = [ |
|
'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size', |
|
'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type', |
|
'Occupation_type', |
|
] |
|
BANK_COLUMNS = ["Account_age"] |
|
CREDIT_BUREAU_COLUMNS = ["Years_employed", "Employed"] |
|
|
|
_data = pandas.read_csv(DATA_PATH, encoding="utf-8") |
|
|
|
def get_min_max(data, column): |
|
"""Get min/max values of a column in order to input them in Gradio's API as key arguments.""" |
|
return { |
|
"minimum": int(data[column].min()), |
|
"maximum": int(data[column].max()), |
|
} |
|
|
|
|
|
ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age") |
|
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children") |
|
INCOME_MIN_MAX = get_min_max(_data, "Total_income") |
|
AGE_MIN_MAX = get_min_max(_data, "Age") |
|
FAMILY_MIN_MAX = get_min_max(_data, "Household_size") |
|
|
|
|
|
INCOME_VALUE = 12000 |
|
AGE_VALUE = 30 |
|
|
|
|
|
INCOME_TYPES = list(_data["Income_type"].unique()) |
|
OCCUPATION_TYPES = list(_data["Occupation_type"].unique()) |
|
HOUSING_TYPES = list(_data["Housing_type"].unique()) |
|
EDUCATION_TYPES = list(_data["Education_type"].unique()) |
|
FAMILY_STATUS = list(_data["Family_status"].unique()) |
|
YEARS_EMPLOYED_BINS = ['0-2', '2-5', '5-8', '8-11', '11-18', '18+'] |
|
|
|
|
|
YEARS_EMPLOYED_BIN_NAME_TO_INDEX = {bin_name: i for i, bin_name in enumerate(YEARS_EMPLOYED_BINS)} |
|
|
|
assert len(YEARS_EMPLOYED_BINS) == len(list(_data["Years_employed"].unique())), ( |
|
"Years_employed bins are not matching the expected list" |
|
) |