import streamlit as st import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import joblib import os import shutil from xgboost import XGBClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import confusion_matrix from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer # Define the directory for FHE client/server files fhe_directory = '/tmp/fhe_client_server_files/' # Create the directory if it does not exist if not os.path.exists(fhe_directory): os.makedirs(fhe_directory) else: # If it exists, delete its contents shutil.rmtree(fhe_directory) os.makedirs(fhe_directory) # Streamlit title st.title("Heart Disease Prediction Model") # Load the data data = pd.read_csv('data/heart.xls') st.write("### Dataset Information") st.write(data.info()) # Correlation matrix data_corr = data.corr() plt.figure(figsize=(20, 20)) sns.heatmap(data=data_corr, annot=True) st.write("### Correlation Heatmap") st.pyplot(plt) # Feature selection based on correlation feature_value = np.abs(data_corr['output']) # Use absolute values for correlation features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation']) feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False) feature_selected = feature_sorted.index.tolist() st.write("### Selected Features Based on Correlation") st.write(feature_selected) clean_data = data[feature_selected] # Prepare data for model training X = clean_data.iloc[:, 1:] Y = clean_data['output'] x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0) st.write("### Training and Test Set Shapes") st.write(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}") # Feature scaling sc = StandardScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) # Train the model dt = XGBClassifier(max_depth=6) dt.fit(x_train, y_train) # Make predictions y_pred = dt.predict(x_test) # Confusion matrix conf_mat = confusion_matrix(y_test, y_pred) st.write("### Confusion Matrix") st.write(conf_mat) # Model accuracy accuracy = dt.score(x_test, y_test) st.write(f"### Model Accuracy: {round(accuracy * 100, 2)}%") # Save the model joblib.dump(dt, 'heart_disease_dt_model.pkl') # Prepare FHE compatible model fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits=10) fhe_compatible.compile(x_train) # Setup the development environment dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible) dev.save() # Setup the server server = FHEModelServer(path_dir=fhe_directory) server.load() # Setup the client client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client") serialized_evaluation_keys = client.get_serialized_evaluation_keys() # Load the dataset and perform correlation analysis data = pd.read_csv('data/heart.xls') data_corr = data.corr() # Select features based on correlation with 'output' feature_value = np.abs(data_corr['output']) features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation']) feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False) feature_selected = feature_sorted.index.tolist() # Clean the data by selecting the most correlated features clean_data = data[feature_selected] # Extract the first row of feature data for prediction sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # Reshape to 2D array for model input # Encrypt the sample data encrypted_data = client.quantize_encrypt_serialize(sample_data) # Run the server and get results encrypted_result = server.run(encrypted_data, serialized_evaluation_keys) result = client.deserialize_decrypt_dequantize(encrypted_result) st.write("### Prediction Result") st.write(result)