Spaces:

ppaihack
/

ZamaKlinikV2

Running

App Files Files Community

ZamaKlinikV2 / app.py

AeternumS

fix

5306193 1 day ago

raw

history blame

4.01 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import joblib
	import os
	import shutil
	from xgboost import XGBClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import confusion_matrix
	from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier
	from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer

	# Define the directory for FHE client/server files
	fhe_directory = '/tmp/fhe_client_server_files/'

	# Create the directory if it does not exist
	if not os.path.exists(fhe_directory):
	os.makedirs(fhe_directory)
	else:
	# If it exists, delete its contents
	shutil.rmtree(fhe_directory)
	os.makedirs(fhe_directory)

	# Streamlit title
	st.title("Heart Disease Prediction Model")

	# Load the data
	data = pd.read_csv('data/heart.xls')
	st.write("### Dataset Information")
	st.write(data.info())

	# Correlation matrix
	data_corr = data.corr()
	plt.figure(figsize=(20, 20))
	sns.heatmap(data=data_corr, annot=True)
	st.write("### Correlation Heatmap")
	st.pyplot(plt)

	# Feature selection based on correlation
	feature_value = np.abs(data_corr['output']) # Use absolute values for correlation
	features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
	feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
	feature_selected = feature_sorted.index.tolist()

	st.write("### Selected Features Based on Correlation")
	st.write(feature_selected)

	clean_data = data[feature_selected]

	# Prepare data for model training
	X = clean_data.iloc[:, 1:]
	Y = clean_data['output']
	x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

	st.write("### Training and Test Set Shapes")
	st.write(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")

	# Feature scaling
	sc = StandardScaler()
	x_train = sc.fit_transform(x_train)
	x_test = sc.transform(x_test)

	# Train the model
	dt = XGBClassifier(max_depth=6)
	dt.fit(x_train, y_train)

	# Make predictions
	y_pred = dt.predict(x_test)

	# Confusion matrix
	conf_mat = confusion_matrix(y_test, y_pred)
	st.write("### Confusion Matrix")
	st.write(conf_mat)

	# Model accuracy
	accuracy = dt.score(x_test, y_test)
	st.write(f"### Model Accuracy: {round(accuracy * 100, 2)}%")

	# Save the model
	joblib.dump(dt, 'heart_disease_dt_model.pkl')

	# Prepare FHE compatible model
	fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits=10)
	fhe_compatible.compile(x_train)

	# Setup the development environment
	dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
	dev.save()

	# Setup the server
	server = FHEModelServer(path_dir=fhe_directory)
	server.load()

	# Setup the client
	client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
	serialized_evaluation_keys = client.get_serialized_evaluation_keys()

	# Load the dataset and perform correlation analysis
	data = pd.read_csv('data/heart.xls')
	data_corr = data.corr()

	# Select features based on correlation with 'output'
	feature_value = np.abs(data_corr['output'])
	features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
	feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
	feature_selected = feature_sorted.index.tolist()

	# Clean the data by selecting the most correlated features
	clean_data = data[feature_selected]

	# Extract the first row of feature data for prediction
	sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # Reshape to 2D array for model input

	# Encrypt the sample data
	encrypted_data = client.quantize_encrypt_serialize(sample_data)

	# Run the server and get results
	encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
	result = client.deserialize_decrypt_dequantize(encrypted_result)

	st.write("### Prediction Result")
	st.write(result)