Spaces:

ppaihack
/

ZamaKlinikV2

Sleeping

File size: 4,010 Bytes

import streamlit as st
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import os
import shutil
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier
from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer

# Define the directory for FHE client/server files
fhe_directory = '/tmp/fhe_client_server_files/'

# Create the directory if it does not exist
if not os.path.exists(fhe_directory):
    os.makedirs(fhe_directory)
else:
    # If it exists, delete its contents
    shutil.rmtree(fhe_directory)
    os.makedirs(fhe_directory)

# Streamlit title
st.title("Heart Disease Prediction Model")

# Load the data
data = pd.read_csv('data/heart.xls')
st.write("### Dataset Information")
st.write(data.info())

# Correlation matrix
data_corr = data.corr()
plt.figure(figsize=(20, 20))
sns.heatmap(data=data_corr, annot=True)
st.write("### Correlation Heatmap")
st.pyplot(plt)

# Feature selection based on correlation
feature_value = np.abs(data_corr['output'])  # Use absolute values for correlation
features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
feature_selected = feature_sorted.index.tolist()

st.write("### Selected Features Based on Correlation")
st.write(feature_selected)

clean_data = data[feature_selected]

# Prepare data for model training
X = clean_data.iloc[:, 1:]
Y = clean_data['output']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

st.write("### Training and Test Set Shapes")
st.write(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")

# Feature scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Train the model
dt = XGBClassifier(max_depth=6)
dt.fit(x_train, y_train)

# Make predictions
y_pred = dt.predict(x_test)

# Confusion matrix
conf_mat = confusion_matrix(y_test, y_pred)
st.write("### Confusion Matrix")
st.write(conf_mat)

# Model accuracy
accuracy = dt.score(x_test, y_test)
st.write(f"### Model Accuracy: {round(accuracy * 100, 2)}%")

# Save the model
joblib.dump(dt, 'heart_disease_dt_model.pkl')

# Prepare FHE compatible model
fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits=10)
fhe_compatible.compile(x_train)

# Setup the development environment
dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
dev.save()

# Setup the server
server = FHEModelServer(path_dir=fhe_directory)
server.load()

# Setup the client
client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
serialized_evaluation_keys = client.get_serialized_evaluation_keys()

# Load the dataset and perform correlation analysis
data = pd.read_csv('data/heart.xls')
data_corr = data.corr()

# Select features based on correlation with 'output'
feature_value = np.abs(data_corr['output'])
features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
feature_selected = feature_sorted.index.tolist()

# Clean the data by selecting the most correlated features
clean_data = data[feature_selected]

# Extract the first row of feature data for prediction
sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1)  # Reshape to 2D array for model input

# Encrypt the sample data
encrypted_data = client.quantize_encrypt_serialize(sample_data)

# Run the server and get results
encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
result = client.deserialize_decrypt_dequantize(encrypted_result)

st.write("### Prediction Result")
st.write(result)