File size: 445 Bytes
87c4f73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import pandas as pd

def preprocess_data(data):
    nc = len(data.columns)
    nr = len(data.index)
    new = [0] * nc

    for i in range(nc):
        new[i] = len(data.iloc[:, i].unique()) / nr

    sorted_index = sorted(range(len(new)), key=lambda k: new[k], reverse=True)

    sensitive_cols = list(data.columns[sorted_index[i]] for i in range(nc) if new[sorted_index[i]] > 0.5)
    data = data.drop(columns=sensitive_cols)

    return data