Seanyoon commited on
Commit
87c4f73
1 Parent(s): 78a2900

Create preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +16 -0
preprocess.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def preprocess_data(data):
4
+ nc = len(data.columns)
5
+ nr = len(data.index)
6
+ new = [0] * nc
7
+
8
+ for i in range(nc):
9
+ new[i] = len(data.iloc[:, i].unique()) / nr
10
+
11
+ sorted_index = sorted(range(len(new)), key=lambda k: new[k], reverse=True)
12
+
13
+ sensitive_cols = list(data.columns[sorted_index[i]] for i in range(nc) if new[sorted_index[i]] > 0.5)
14
+ data = data.drop(columns=sensitive_cols)
15
+
16
+ return data