Sefika commited on
Commit
dda3d40
1 Parent(s): 9ba9ac1
train/addind_data.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import pickle
3
+ import tensorflow.keras.backend as K
4
+ import tensorflow as tf
5
+
6
+ import tensorflow as tf
7
+ from cleverhans.tf2.attacks.fast_gradient_method import fast_gradient_method
8
+ from tensorflow.keras.optimizers import SGD
9
+ from tensorflow.keras.callbacks import Callback, LearningRateScheduler, EarlyStopping
10
+ from sklearn.model_selection import train_test_split
11
+ import pandas as pd
12
+ import numpy as np
13
+ from sklearn.preprocessing import LabelEncoder
14
+ from tensorflow.keras.utils import to_categorical
15
+ from sklearn.model_selection import KFold
16
+ import gzip
17
+ from train_utily import noise
18
+ import warnings
19
+
20
+ warnings.filterwarnings("ignore")
21
+ import tensorflow
22
+
23
+ print("\nTensorflow Version: " + tf.__version__)
24
+ from _utility import lrate, get_adversarial_examples, print_test
25
+ from wresnet import WideResidualNetwork
26
+ import os
27
+
28
+ ## globals
29
+ epsilons = [0.001, 0.003, 0.005, 0.01, 0.03]
30
+ percents = [0.25, 0.5, 0.75, 1.0]
31
+ os.mkdir("RandomnoiseModels")
32
+ os.mkdir("AEModels")
33
+ folder_list = ["RandomnoiseModels", "AEModels"]
34
+
35
+
36
+ def data_augmentation(epsilon, percent, X, Y, perturbation_type):
37
+ split = int(len(X) * percent)
38
+ file_name = str(epsilon) + ".pickle"
39
+ X_adv_percent = list()
40
+ if perturbation_type[0] == "FGSM":
41
+ X_adv_percent = get_adversarial_examples(model, X[:split], Y[:split], epsilon)
42
+ else:
43
+ X_adv_percent = noise(X[:split], eps=epsilon)
44
+
45
+ aug_X = np.concatenate((X, X_adv_percent), axis=0)
46
+ Y_adv = Y[:split]
47
+ aug_Y = np.concatenate((Y, Y_adv), axis=0)
48
+
49
+ return aug_X, aug_Y
50
+
51
+
52
+ def experiments(X, Y, folder):
53
+
54
+ perturbation_type = ["FGSM" if folder == "AEModels" else "Random"]
55
+
56
+ for epsilon in epsilons:
57
+ for percent in percents:
58
+ aug_X, aug_Y = data_augmentation(epsilon, percent, X, Y, perturbation_type)
59
+ train(aug_X, aug_Y, percent, epsilon, folder)
60
+
61
+
62
+ def train(X, Y, percent, epsilon, folder):
63
+
64
+ "Ten fold CVs of ResNet"
65
+ BS = 64
66
+ init = (32, 32, 1)
67
+ sgd = SGD(lr=0.1, momentum=0.9)
68
+ kfold = KFold(n_splits=10, random_state=42, shuffle=False)
69
+ model_name = folder + "/ResNet_" + str(epsilon) + "_" + str(percent)
70
+
71
+ for j, (train, val) in enumerate(kfold.split(X)):
72
+
73
+ resnet = WideResidualNetwork(
74
+ init, 0.0001, 0.9, nb_classes=4, N=2, k=1, dropout=0.0
75
+ )
76
+ model = resnet.create_wide_residual_network()
77
+
78
+ x_train, y_train = X[train], Y[train]
79
+ x_val, y_val = X[val], Y[val]
80
+
81
+ model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["acc"])
82
+
83
+ hist = model.fit(
84
+ generator.flow(x_train, y_train, batch_size=64),
85
+ steps_per_epoch=len(x_train) // 64,
86
+ epochs=50,
87
+ validation_data=(x_val, y_val),
88
+ validation_steps=len(x_val) // 64,
89
+ callbacks=[lrate],
90
+ )
91
+
92
+ name = model_name + "_" + str(j) + ".h5"
93
+ hist_name = model_name + "_acc" + "_" + str(j) + ".pickle"
94
+ hist_name_loss = model_name + "_loss" + "_" + str(j) + ".pickle"
95
+
96
+ with open(hist_name, "wb") as f:
97
+ pickle.dump(hist.history["val_acc"], f)
98
+
99
+ with open(hist_name_loss, "wb") as f:
100
+ pickle.dump(hist.history["val_loss"], f)
101
+
102
+ model.save_weights(name)
103
+
104
+
105
+ data = hkl.load("data.hkl")
106
+
107
+ X_train, X_test, Y_train, y_test = (
108
+ data["xtrain"],
109
+ data["xtest"],
110
+ data["ytrain"],
111
+ data["ytest"],
112
+ )
113
+
114
+ for folder in folder_list:
115
+ experiments(X_train, Y_train, folder)
train/adversarial_training.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from cleverhans.future.tf2.attacks import fast_gradient_method
3
+ import pandas as pd
4
+ from sklearn.model_selection import KFold
5
+ import sys
6
+ import tensorflow
7
+ import tensorflow as tf
8
+ from multiprocessing import Pool
9
+
10
+ from _utility import lrate, get_adversarial_examples, print_test, step_decay
11
+ import hickle as hkl
12
+ import pickle
13
+
14
+ model_name = "ResNet_da"
15
+
16
+
17
+ class AdversarialTraining(object):
18
+ """
19
+ The class provides an adversarial training for a given model and epsilon values.
20
+ In addition to this, the class changes the half of the batch with their adversarial examples.
21
+ The adversarial exaples obtain using fast gradient sign method of CleverHans framework.
22
+ """
23
+
24
+ def __init__(self, parameter):
25
+ self.epochs = parameter["epochs"]
26
+ self.batch_size = parameter["batch_size"]
27
+ self.optimizer = parameter["optimizer"]
28
+
29
+ self.generator = tf.keras.preprocessing.image.ImageDataGenerator(
30
+ rotation_range=10,
31
+ width_shift_range=5.0 / 32,
32
+ height_shift_range=5.0 / 32,
33
+ )
34
+
35
+ def train(self, model, train_dataset, val_dataset, epsilon_list):
36
+
37
+ # Ten fold cross validation
38
+ for epoch in range(self.epochs):
39
+ lr_rate = step_decay(epoch)
40
+ tf.keras.backend.set_value(model.optimizer.learning_rate, lr_rate)
41
+
42
+ for step, (x_train, y_train) in enumerate(train_dataset):
43
+ print(step)
44
+ x_train = self.data_augmentation(x_train, y_train, model, epsilon_list)
45
+ model.fit(
46
+ self.generator.flow(x_train, y_train, self.batch_size),
47
+ batch_size=self.batch_size,
48
+ verbose=0.0,
49
+ )
50
+
51
+ def data_augmentation(self, X_train, Y_train, pretrained_model, epsilon_list):
52
+ """[summary]
53
+
54
+ Args:
55
+ X_train ([type]): Training inputs
56
+ Y_train ([type]): outputs
57
+ epsilon_list ([type]): according to SNR
58
+
59
+ Returns:
60
+ augmented batch which consists of the adversarial and clean examples.
61
+ """
62
+ first_half_end = int(len(X_train) / 2)
63
+ second_half_end = int(len(X_train))
64
+ x_clean = X_train[0:first_half_end, :, :, :]
65
+ x_adv = self.get_adversarial(
66
+ pretrained_model,
67
+ X_train[first_half_end:second_half_end, :, :, :],
68
+ Y_train[first_half_end:second_half_end],
69
+ epsilon_list,
70
+ )
71
+ x_mix = self.merge_data(x_clean, x_adv)
72
+ y_mix = Y_train[0:second_half_end]
73
+
74
+ return x_mix, y_mix
75
+
76
+ def merge_data(self, x_clean, x_adv):
77
+ """[summary]
78
+
79
+ Args:
80
+ x_clean ([type]): [description]
81
+ x_adv ([type]): [description]
82
+
83
+ Returns:
84
+ combine the clean and adversarial inputs.
85
+ """
86
+ x_mix = []
87
+ for i in range(len(x_clean)):
88
+ x_mix.append(x_clean[i])
89
+ for j in range(len(x_adv)):
90
+ x_mix.append(x_adv[j])
91
+ x_mix = np.array(x_mix)
92
+
93
+ return x_mix
94
+
95
+ def get_adversarial(self, logits_model, X_true, y_true, epsilon_list):
96
+ return self.adversarial_example(logits_model, X_true, y_true, epsilon_list)
97
+
98
+ def adversarial_example(self, logits_model, X_true, y_true, epsilon_list):
99
+ X_adv = []
100
+
101
+ for index, x_true in enumerate(X_true):
102
+ epsilon = epsilon_list[index]
103
+
104
+ original_image = x_true
105
+ original_image = tf.reshape(original_image, (1, 32, 32))
106
+ original_label = y_true[index]
107
+ original_label = np.reshape(np.argmax(original_label), (1,)).astype("int64")
108
+ adv_example_targeted_label = fast_gradient_method(
109
+ logits_model,
110
+ original_image,
111
+ epsilon,
112
+ np.inf,
113
+ y=original_label,
114
+ targeted=False,
115
+ )
116
+ X_adv.append(np.array(adv_example_targeted_label).reshape(32, 32, 1))
117
+ X_adv = np.array(X_adv)
118
+
119
+ return X_adv
120
+
121
+
122
+ def simulate_train(s):
123
+
124
+ for j, (train, val) in enumerate(kfold.split(X_train)):
125
+ if j == s:
126
+ print(s)
127
+ model = wideresnet.create_wide_residual_network()
128
+ model.compile(
129
+ loss="categorical_crossentropy", optimizer=sgd, metrics=["acc"]
130
+ )
131
+ print("Finished compiling")
132
+ x_train, y_train = X_train[train], Y_train[train]
133
+ x_val, y_val = X_train[val], Y_train[val]
134
+ train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
135
+ train_dataset = train_dataset.batch(BS)
136
+ val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
137
+ val_dataset = val_dataset.batch(BS)
138
+ adversarial_training.train(model, train_dataset, val_dataset, epsilons)
139
+ name = model_name + "_" + str(j) + ".h5"
140
+ model.save_weights(name)
141
+
142
+
143
+ if __name__ == "__main__":
144
+
145
+ data = hkl.load("data.hkl")
146
+ X_train, X_test, Y_train, y_test = (
147
+ data["xtrain"],
148
+ data["xtest"],
149
+ data["ytrain"],
150
+ data["ytest"],
151
+ )
152
+ epsilons = [i / 1000 for i in range(1, 33)] # factor for fast gradient sign method
153
+
154
+ kfold = KFold(n_splits=10, random_state=42, shuffle=False)
155
+ EPOCHS = 50
156
+ BS = 64
157
+ init = (32, 32, 1)
158
+ sgd = SGD(lr=0.1, momentum=0.9)
159
+ parameter = {"epochs": EPOCHS, "batch_size": BS, "optimizer": sgd}
160
+ # change here depending on your model
161
+ wideresnet = WideResidualNetwork(
162
+ init, 0.0001, 0.9, nb_classes=4, N=2, k=1, dropout=0.0
163
+ )
164
+
165
+ with Pool(10) as p:
166
+ print(p.map(f, np.range(10)))
train/train_utiliy.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def noise(x, eps=0.3, order=np.inf, clip_min=None, clip_max=None):
5
+ """
6
+ A weak attack that just picks a random point in the attacker's action
7
+ space. When combined with an attack bundling function, this can be used to
8
+ implement random search.
9
+ References:
10
+ https://arxiv.org/abs/1802.00420 recommends random search to help identify
11
+ gradient masking
12
+ https://openreview.net/forum?id=H1g0piA9tQ recommends using noise as part
13
+ of an attack building recipe combining many different optimizers to
14
+ yield a strong optimizer.
15
+ Arguments
16
+ ---------
17
+ x : torch.Tensor
18
+ The input image.
19
+ """
20
+
21
+ if order != np.inf:
22
+ raise NotImplementedError(ord)
23
+
24
+ eta = np.random.uniform(low=-eps, high=eps, size=x.shape)
25
+ adv_x = x + eta
26
+
27
+ return adv_x
train/training.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from cleverhans.future.tf2.attacks import fast_gradient_method
3
+ import pandas as pd
4
+ from sklearn.model_selection import KFold
5
+ import sys
6
+ import tensorflow
7
+ import tensorflow as tf
8
+
9
+ from _utility import print_test, get_adversarial_examples
10
+
11
+ import pickle
12
+
13
+ folder_name = "./adversarial_examples_parseval_net/src/logs/saved_models/"
14
+
15
+
16
+ def train(
17
+ instance,
18
+ X_train,
19
+ Y_train,
20
+ X_test,
21
+ y_test,
22
+ epochs,
23
+ BS,
24
+ sgd,
25
+ generator,
26
+ callbacks_list,
27
+ model_name="ResNet",
28
+ ):
29
+
30
+ kfold = KFold(n_splits=10, random_state=42, shuffle=False)
31
+
32
+ for j, (train, val) in enumerate(kfold.split(X_train)):
33
+
34
+ model = instance.create_wide_residual_network()
35
+ model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["acc"])
36
+
37
+ print("Finished compiling")
38
+
39
+ x_train, y_train = X_train[train], Y_train[train]
40
+ x_val, y_val = X_train[val], Y_train[val]
41
+
42
+ hist = model.fit(
43
+ generator.flow(x_train, y_train, batch_size=BS),
44
+ steps_per_epoch=len(x_train) // BS,
45
+ epochs=epochs,
46
+ callbacks=callbacks_list,
47
+ validation_data=(x_val, y_val),
48
+ validation_steps=x_val.shape[0] // BS,
49
+ )
50
+ ## write the history
51
+
52
+ with open("history_" + model_name + str(j), "wb") as file_pi:
53
+ pickle.dump(hist.history, file_pi)
54
+
55
+ model_name = folder_name + model_name + "_" + str(j) + ".h5"
56
+ model.save(model_name)