AeternumS commited on
Commit
5306193
1 Parent(s): cd05ce2
Files changed (2) hide show
  1. app.py +83 -93
  2. server2.py +0 -150
app.py CHANGED
@@ -1,13 +1,18 @@
 
1
  import numpy as np
2
  import pandas as pd
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
5
  import joblib
6
- from sklearn.tree import DecisionTreeClassifier, XGBClassifier #using sklearn decisiontreeclassifier
7
- from sklearn.model_selection import train_test_split
8
-
9
  import os
10
  import shutil
 
 
 
 
 
 
 
11
 
12
  # Define the directory for FHE client/server files
13
  fhe_directory = '/tmp/fhe_client_server_files/'
@@ -20,113 +25,68 @@ else:
20
  shutil.rmtree(fhe_directory)
21
  os.makedirs(fhe_directory)
22
 
23
- data=pd.read_csv('data/heart.xls')
24
-
25
-
26
- data.info() #checking the info
27
-
28
- data_corr=data.corr()
29
-
30
- plt.figure(figsize=(20,20))
31
- sns.heatmap(data=data_corr,annot=True)
32
- #Heatmap for data
33
- """
34
- # Get the Data
35
- X_train, y_train, X_val, y_val = train_test_split()
36
- classifier = XGBClassifier()
37
- # Training the Model
38
- classifier = classifier.fit(X_train, y_train)
39
- # Trained Model Evaluation on Validation Dataset
40
- confidence = classifier.score(X_val, y_val)
41
- # Validation Data Prediction
42
- y_pred = classifier.predict(X_val)
43
- # Model Validation Accuracy
44
- accuracy = accuracy_score(y_val, y_pred)
45
- # Model Confusion Matrix
46
- conf_mat = confusion_matrix(y_val, y_pred)
47
- # Model Classification Report
48
- clf_report = classification_report(y_val, y_pred)
49
- # Model Cross Validation Score
50
- score = cross_val_score(classifier, X_val, y_val, cv=3)
51
-
52
- try:
53
- # Load Trained Model
54
- clf = load(str(self.model_save_path + saved_model_name + ".joblib"))
55
- except Exception as e:
56
- print("Model not found...")
57
-
58
- if test_data is not None:
59
- result = clf.predict(test_data)
60
- print(result)
61
- else:
62
- result = clf.predict(self.test_features)
63
- accuracy = accuracy_score(self.test_labels, result)
64
- clf_report = classification_report(self.test_labels, result)
65
- print(accuracy, clf_report)
66
- """
67
- ####################
68
- feature_value=np.array(data_corr['output'])
69
- for i in range(len(feature_value)):
70
- if feature_value[i]<0:
71
- feature_value[i]=-feature_value[i]
72
 
73
- print(feature_value)
 
 
 
74
 
75
- features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation'])
 
 
 
 
 
76
 
77
- feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False)
 
 
 
 
78
 
79
- feature_selected=feature_sorted.index
 
80
 
81
- feature_selected #selected features which are very much correalated
82
 
83
- clean_data=data[feature_selected]
 
 
 
84
 
85
- #making input and output dataset
86
- X=clean_data.iloc[:,1:]
87
- Y=clean_data['output']
88
 
89
- x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
 
 
 
90
 
91
- print(x_train.shape,y_train.shape,x_test.shape,y_test.shape) #data is splited in traing and testing dataset
 
 
92
 
93
- # feature scaling
94
- from sklearn.preprocessing import StandardScaler
95
- sc=StandardScaler()
96
- x_train=sc.fit_transform(x_train)
97
- x_test=sc.transform(x_test)
98
-
99
- #training our model
100
- dt=XGBClassifier(criterion='entropy',max_depth=6)
101
- dt.fit(x_train,y_train)
102
- #dt.compile(x_trqin)
103
 
104
- #predicting the value on testing data
105
- y_pred=dt.predict(x_test)
 
 
106
 
107
- #ploting the data
108
- from sklearn.metrics import confusion_matrix
109
- conf_mat=confusion_matrix(y_test,y_pred)
110
- print(conf_mat)
111
- accuracy=dt.score(x_test,y_test)
112
- print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%")
113
 
 
114
  joblib.dump(dt, 'heart_disease_dt_model.pkl')
115
 
116
- from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier
117
- from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier
118
-
119
- fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits = 10) #de FHE
120
  fhe_compatible.compile(x_train)
121
 
122
-
123
-
124
-
125
-
126
-
127
- #### server
128
- from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
129
-
130
  # Setup the development environment
131
  dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
132
  dev.save()
@@ -134,3 +94,33 @@ dev.save()
134
  # Setup the server
135
  server = FHEModelServer(path_dir=fhe_directory)
136
  server.load()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import numpy as np
3
  import pandas as pd
4
  import seaborn as sns
5
  import matplotlib.pyplot as plt
6
  import joblib
 
 
 
7
  import os
8
  import shutil
9
+ from xgboost import XGBClassifier
10
+ from sklearn.tree import DecisionTreeClassifier
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.preprocessing import StandardScaler
13
+ from sklearn.metrics import confusion_matrix
14
+ from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier
15
+ from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
16
 
17
  # Define the directory for FHE client/server files
18
  fhe_directory = '/tmp/fhe_client_server_files/'
 
25
  shutil.rmtree(fhe_directory)
26
  os.makedirs(fhe_directory)
27
 
28
+ # Streamlit title
29
+ st.title("Heart Disease Prediction Model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Load the data
32
+ data = pd.read_csv('data/heart.xls')
33
+ st.write("### Dataset Information")
34
+ st.write(data.info())
35
 
36
+ # Correlation matrix
37
+ data_corr = data.corr()
38
+ plt.figure(figsize=(20, 20))
39
+ sns.heatmap(data=data_corr, annot=True)
40
+ st.write("### Correlation Heatmap")
41
+ st.pyplot(plt)
42
 
43
+ # Feature selection based on correlation
44
+ feature_value = np.abs(data_corr['output']) # Use absolute values for correlation
45
+ features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
46
+ feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
47
+ feature_selected = feature_sorted.index.tolist()
48
 
49
+ st.write("### Selected Features Based on Correlation")
50
+ st.write(feature_selected)
51
 
52
+ clean_data = data[feature_selected]
53
 
54
+ # Prepare data for model training
55
+ X = clean_data.iloc[:, 1:]
56
+ Y = clean_data['output']
57
+ x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
58
 
59
+ st.write("### Training and Test Set Shapes")
60
+ st.write(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")
 
61
 
62
+ # Feature scaling
63
+ sc = StandardScaler()
64
+ x_train = sc.fit_transform(x_train)
65
+ x_test = sc.transform(x_test)
66
 
67
+ # Train the model
68
+ dt = XGBClassifier(max_depth=6)
69
+ dt.fit(x_train, y_train)
70
 
71
+ # Make predictions
72
+ y_pred = dt.predict(x_test)
 
 
 
 
 
 
 
 
73
 
74
+ # Confusion matrix
75
+ conf_mat = confusion_matrix(y_test, y_pred)
76
+ st.write("### Confusion Matrix")
77
+ st.write(conf_mat)
78
 
79
+ # Model accuracy
80
+ accuracy = dt.score(x_test, y_test)
81
+ st.write(f"### Model Accuracy: {round(accuracy * 100, 2)}%")
 
 
 
82
 
83
+ # Save the model
84
  joblib.dump(dt, 'heart_disease_dt_model.pkl')
85
 
86
+ # Prepare FHE compatible model
87
+ fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits=10)
 
 
88
  fhe_compatible.compile(x_train)
89
 
 
 
 
 
 
 
 
 
90
  # Setup the development environment
91
  dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
92
  dev.save()
 
94
  # Setup the server
95
  server = FHEModelServer(path_dir=fhe_directory)
96
  server.load()
97
+
98
+ # Setup the client
99
+ client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
100
+ serialized_evaluation_keys = client.get_serialized_evaluation_keys()
101
+
102
+ # Load the dataset and perform correlation analysis
103
+ data = pd.read_csv('data/heart.xls')
104
+ data_corr = data.corr()
105
+
106
+ # Select features based on correlation with 'output'
107
+ feature_value = np.abs(data_corr['output'])
108
+ features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
109
+ feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
110
+ feature_selected = feature_sorted.index.tolist()
111
+
112
+ # Clean the data by selecting the most correlated features
113
+ clean_data = data[feature_selected]
114
+
115
+ # Extract the first row of feature data for prediction
116
+ sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # Reshape to 2D array for model input
117
+
118
+ # Encrypt the sample data
119
+ encrypted_data = client.quantize_encrypt_serialize(sample_data)
120
+
121
+ # Run the server and get results
122
+ encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
123
+ result = client.deserialize_decrypt_dequantize(encrypted_result)
124
+
125
+ st.write("### Prediction Result")
126
+ st.write(result)
server2.py DELETED
@@ -1,150 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import seaborn as sns
4
- import matplotlib.pyplot as plt
5
- import joblib
6
-
7
- import os
8
- import shutil
9
-
10
- # Define the directory for FHE client/server files
11
- fhe_directory = '/tmp/fhe_client_server_files/'
12
-
13
- # Create the directory if it does not exist
14
- if not os.path.exists(fhe_directory):
15
- os.makedirs(fhe_directory)
16
- else:
17
- # If it exists, delete its contents
18
- shutil.rmtree(fhe_directory)
19
- os.makedirs(fhe_directory)
20
-
21
- data=pd.read_csv('data/heart.xls')
22
-
23
- data.info() #checking the info
24
-
25
- data_corr=data.corr()
26
-
27
- plt.figure(figsize=(20,20))
28
- sns.heatmap(data=data_corr,annot=True)
29
- #Heatmap for data
30
-
31
- feature_value=np.array(data_corr['output'])
32
- for i in range(len(feature_value)):
33
- if feature_value[i]<0:
34
- feature_value[i]=-feature_value[i]
35
-
36
- print(feature_value)
37
-
38
- features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation'])
39
-
40
- feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False)
41
-
42
- feature_selected=feature_sorted.index
43
-
44
- feature_selected #selected features which are very much correalated
45
-
46
- clean_data=data[feature_selected]
47
-
48
- from xgboost import XGBClassifier
49
- from sklearn.tree import DecisionTreeClassifier #using sklearn decisiontreeclassifier
50
- from sklearn.model_selection import train_test_split
51
-
52
- #making input and output dataset
53
- X=clean_data.iloc[:,1:]
54
- Y=clean_data['output']
55
-
56
- x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
57
-
58
- print(x_train.shape,y_train.shape,x_test.shape,y_test.shape) #data is splited in traing and testing dataset
59
-
60
- # feature scaling
61
- from sklearn.preprocessing import StandardScaler
62
- sc=StandardScaler()
63
- x_train=sc.fit_transform(x_train)
64
- x_test=sc.transform(x_test)
65
-
66
- #training our model
67
- dt=XGBClassifier(max_depth=6)
68
- dt.fit(x_train,y_train)
69
- #dt.compile(x_trqin)
70
-
71
- #predicting the value on testing data
72
- y_pred=dt.predict(x_test)
73
-
74
- #ploting the data
75
- from sklearn.metrics import confusion_matrix
76
- conf_mat=confusion_matrix(y_test,y_pred)
77
- print(conf_mat)
78
- accuracy=dt.score(x_test,y_test)
79
- print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%")
80
-
81
- joblib.dump(dt, 'heart_disease_dt_model.pkl')
82
-
83
- from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier
84
-
85
- fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits = 10)
86
- fhe_compatible.compile(x_train)
87
-
88
-
89
-
90
-
91
-
92
-
93
- #### server
94
- from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
95
-
96
- # Setup the development environment
97
- dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
98
- dev.save()
99
-
100
- # Setup the server
101
- server = FHEModelServer(path_dir=fhe_directory)
102
- server.load()
103
-
104
-
105
-
106
-
107
-
108
-
109
-
110
- ####### client
111
-
112
- from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
113
-
114
- # Setup the client
115
- client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
116
- serialized_evaluation_keys = client.get_serialized_evaluation_keys()
117
-
118
-
119
- # Load the dataset and select the relevant features
120
- data = pd.read_csv('data/heart.xls')
121
-
122
- # Perform the correlation analysis
123
- data_corr = data.corr()
124
-
125
- # Select features based on correlation with 'output'
126
- feature_value = np.array(data_corr['output'])
127
- for i in range(len(feature_value)):
128
- if feature_value[i] < 0:
129
- feature_value[i] = -feature_value[i]
130
-
131
- features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
132
- feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
133
- feature_selected = feature_sorted.index
134
-
135
- # Clean the data by selecting the most correlated features
136
- clean_data = data[feature_selected]
137
-
138
- # Extract the first row of feature data for prediction (excluding 'output' column)
139
- sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # Reshape to 2D array for model input
140
-
141
- encrypted_data = client.quantize_encrypt_serialize(sample_data)
142
-
143
-
144
-
145
- ##### end client
146
-
147
- encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
148
-
149
- result = client.deserialize_decrypt_dequantize(encrypted_result)
150
- print(result)