import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import joblib from sklearn.tree import DecisionTreeClassifier, XGBClassifier #using sklearn decisiontreeclassifier from sklearn.model_selection import train_test_split import os import shutil # Define the directory for FHE client/server files fhe_directory = '/tmp/fhe_client_server_files/' # Create the directory if it does not exist if not os.path.exists(fhe_directory): os.makedirs(fhe_directory) else: # If it exists, delete its contents shutil.rmtree(fhe_directory) os.makedirs(fhe_directory) data=pd.read_csv('data/heart.xls') data.info() #checking the info data_corr=data.corr() plt.figure(figsize=(20,20)) sns.heatmap(data=data_corr,annot=True) #Heatmap for data """ # Get the Data X_train, y_train, X_val, y_val = train_test_split() classifier = XGBClassifier() # Training the Model classifier = classifier.fit(X_train, y_train) # Trained Model Evaluation on Validation Dataset confidence = classifier.score(X_val, y_val) # Validation Data Prediction y_pred = classifier.predict(X_val) # Model Validation Accuracy accuracy = accuracy_score(y_val, y_pred) # Model Confusion Matrix conf_mat = confusion_matrix(y_val, y_pred) # Model Classification Report clf_report = classification_report(y_val, y_pred) # Model Cross Validation Score score = cross_val_score(classifier, X_val, y_val, cv=3) try: # Load Trained Model clf = load(str(self.model_save_path + saved_model_name + ".joblib")) except Exception as e: print("Model not found...") if test_data is not None: result = clf.predict(test_data) print(result) else: result = clf.predict(self.test_features) accuracy = accuracy_score(self.test_labels, result) clf_report = classification_report(self.test_labels, result) print(accuracy, clf_report) """ #################### feature_value=np.array(data_corr['output']) for i in range(len(feature_value)): if feature_value[i]<0: feature_value[i]=-feature_value[i] print(feature_value) features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation']) feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False) feature_selected=feature_sorted.index feature_selected #selected features which are very much correalated clean_data=data[feature_selected] #making input and output dataset X=clean_data.iloc[:,1:] Y=clean_data['output'] x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0) print(x_train.shape,y_train.shape,x_test.shape,y_test.shape) #data is splited in traing and testing dataset # feature scaling from sklearn.preprocessing import StandardScaler sc=StandardScaler() x_train=sc.fit_transform(x_train) x_test=sc.transform(x_test) #training our model dt=XGBClassifier(criterion='entropy',max_depth=6) dt.fit(x_train,y_train) #dt.compile(x_trqin) #predicting the value on testing data y_pred=dt.predict(x_test) #ploting the data from sklearn.metrics import confusion_matrix conf_mat=confusion_matrix(y_test,y_pred) print(conf_mat) accuracy=dt.score(x_test,y_test) print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%") joblib.dump(dt, 'heart_disease_dt_model.pkl') from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits = 10) #de FHE fhe_compatible.compile(x_train) #### server from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer # Setup the development environment dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible) dev.save() # Setup the server server = FHEModelServer(path_dir=fhe_directory) server.load()