AeternumS commited on
Commit
ecec68f
1 Parent(s): eb57486
Files changed (1) hide show
  1. app.py +30 -47
app.py CHANGED
@@ -6,12 +6,11 @@ import matplotlib.pyplot as plt
6
  import joblib
7
  import os
8
  import shutil
9
- from xgboost import XGBClassifier
10
  from sklearn.tree import DecisionTreeClassifier
11
  from sklearn.model_selection import train_test_split
12
  from sklearn.preprocessing import StandardScaler
13
  from sklearn.metrics import confusion_matrix
14
- from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier
15
  from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
16
 
17
  # Define the directory for FHE client/server files
@@ -25,39 +24,43 @@ else:
25
  shutil.rmtree(fhe_directory)
26
  os.makedirs(fhe_directory)
27
 
28
- # Streamlit title
29
- st.title("Heart Disease Prediction Model")
30
-
31
  # Load the data
32
  data = pd.read_csv('data/heart.xls')
33
- st.write("### Dataset Information")
34
- st.write(data.info())
35
 
36
- # Correlation matrix
 
 
 
 
37
  data_corr = data.corr()
38
  plt.figure(figsize=(20, 20))
39
  sns.heatmap(data=data_corr, annot=True)
40
  st.write("### Correlation Heatmap")
41
  st.pyplot(plt)
42
 
43
- # Feature selection based on correlation
44
- feature_value = np.abs(data_corr['output']) # Use absolute values for correlation
 
 
 
45
  features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
46
  feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
47
- feature_selected = feature_sorted.index.tolist()
48
 
49
- st.write("### Selected Features Based on Correlation")
50
  st.write(feature_selected)
51
 
 
52
  clean_data = data[feature_selected]
53
 
54
- # Prepare data for model training
55
  X = clean_data.iloc[:, 1:]
56
  Y = clean_data['output']
57
  x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
58
 
59
- st.write("### Training and Test Set Shapes")
60
- st.write(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")
 
61
 
62
  # Feature scaling
63
  sc = StandardScaler()
@@ -65,33 +68,28 @@ x_train = sc.fit_transform(x_train)
65
  x_test = sc.transform(x_test)
66
 
67
  # Train the model
68
- dt = XGBClassifier(max_depth=6)
69
  dt.fit(x_train, y_train)
70
 
71
- # Make predictions
72
  y_pred = dt.predict(x_test)
73
-
74
- # Confusion matrix
75
  conf_mat = confusion_matrix(y_test, y_pred)
 
 
76
  st.write("### Confusion Matrix")
77
  st.write(conf_mat)
78
-
79
- # Model accuracy
80
- accuracy = dt.score(x_test, y_test)
81
- st.write(f"### Model Accuracy: {round(accuracy * 100, 2)}%")
82
 
83
  # Save the model
84
  joblib.dump(dt, 'heart_disease_dt_model.pkl')
85
 
86
- # Prepare FHE compatible model
87
- fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits=10)
88
  fhe_compatible.compile(x_train)
89
 
90
- # Setup the development environment
91
  dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
92
  dev.save()
93
-
94
- # Setup the server
95
  server = FHEModelServer(path_dir=fhe_directory)
96
  server.load()
97
 
@@ -99,28 +97,13 @@ server.load()
99
  client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
100
  serialized_evaluation_keys = client.get_serialized_evaluation_keys()
101
 
102
- # Load the dataset and perform correlation analysis
103
- data = pd.read_csv('data/heart.xls')
104
- data_corr = data.corr()
105
-
106
- # Select features based on correlation with 'output'
107
- feature_value = np.abs(data_corr['output'])
108
- features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
109
- feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
110
- feature_selected = feature_sorted.index.tolist()
111
-
112
- # Clean the data by selecting the most correlated features
113
- clean_data = data[feature_selected]
114
-
115
- # Extract the first row of feature data for prediction
116
- sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # Reshape to 2D array for model input
117
-
118
- # Encrypt the sample data
119
  encrypted_data = client.quantize_encrypt_serialize(sample_data)
120
 
121
- # Run the server and get results
122
  encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
123
  result = client.deserialize_decrypt_dequantize(encrypted_result)
124
 
125
- st.write("### Prediction Result")
126
  st.write(result)
 
6
  import joblib
7
  import os
8
  import shutil
 
9
  from sklearn.tree import DecisionTreeClassifier
10
  from sklearn.model_selection import train_test_split
11
  from sklearn.preprocessing import StandardScaler
12
  from sklearn.metrics import confusion_matrix
13
+ from concrete.ml.sklearn.tree import DecisionTreeClassifier as FHEDecisionTreeClassifier
14
  from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
15
 
16
  # Define the directory for FHE client/server files
 
24
  shutil.rmtree(fhe_directory)
25
  os.makedirs(fhe_directory)
26
 
 
 
 
27
  # Load the data
28
  data = pd.read_csv('data/heart.xls')
 
 
29
 
30
+ st.write("### Data Overview")
31
+ st.write(data.head())
32
+ data.info() # Show info in the Streamlit app
33
+
34
+ # Correlation analysis
35
  data_corr = data.corr()
36
  plt.figure(figsize=(20, 20))
37
  sns.heatmap(data=data_corr, annot=True)
38
  st.write("### Correlation Heatmap")
39
  st.pyplot(plt)
40
 
41
+ feature_value = np.array(data_corr['output'])
42
+ for i in range(len(feature_value)):
43
+ if feature_value[i] < 0:
44
+ feature_value[i] = -feature_value[i]
45
+
46
  features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
47
  feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
48
+ feature_selected = feature_sorted.index
49
 
50
+ st.write("### Selected Features")
51
  st.write(feature_selected)
52
 
53
+ # Clean the data by selecting the most correlated features
54
  clean_data = data[feature_selected]
55
 
56
+ # Prepare the dataset for training
57
  X = clean_data.iloc[:, 1:]
58
  Y = clean_data['output']
59
  x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
60
 
61
+ st.write("### Training Data Shape")
62
+ st.write(f"X Train Shape: {x_train.shape}, Y Train Shape: {y_train.shape}")
63
+ st.write(f"X Test Shape: {x_test.shape}, Y Test Shape: {y_test.shape}")
64
 
65
  # Feature scaling
66
  sc = StandardScaler()
 
68
  x_test = sc.transform(x_test)
69
 
70
  # Train the model
71
+ dt = DecisionTreeClassifier(criterion='entropy', max_depth=6)
72
  dt.fit(x_train, y_train)
73
 
74
+ # Predict and evaluate
75
  y_pred = dt.predict(x_test)
 
 
76
  conf_mat = confusion_matrix(y_test, y_pred)
77
+ accuracy = dt.score(x_test, y_test)
78
+
79
  st.write("### Confusion Matrix")
80
  st.write(conf_mat)
81
+ st.write(f"### Accuracy: {round(accuracy * 100, 2)}%")
 
 
 
82
 
83
  # Save the model
84
  joblib.dump(dt, 'heart_disease_dt_model.pkl')
85
 
86
+ # Convert the model for FHE
87
+ fhe_compatible = FHEDecisionTreeClassifier.from_sklearn_model(dt, x_train, n_bits=10)
88
  fhe_compatible.compile(x_train)
89
 
90
+ # Setup the server
91
  dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
92
  dev.save()
 
 
93
  server = FHEModelServer(path_dir=fhe_directory)
94
  server.load()
95
 
 
97
  client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
98
  serialized_evaluation_keys = client.get_serialized_evaluation_keys()
99
 
100
+ # Load the dataset and select the relevant features for prediction
101
+ sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # First sample for prediction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  encrypted_data = client.quantize_encrypt_serialize(sample_data)
103
 
104
+ # Run the server with encrypted data
105
  encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
106
  result = client.deserialize_decrypt_dequantize(encrypted_result)
107
 
108
+ st.write("### Encrypted Prediction Result")
109
  st.write(result)