|
import tensorflow as tf |
|
|
|
|
|
def load_and_prep_image(filename, img_shape=224, scale=True): |
|
""" |
|
Reads in an image from filename, turns it into a tensor and reshapes into |
|
(224, 224, 3). |
|
|
|
Parameters |
|
---------- |
|
filename (str): string filename of target image |
|
img_shape (int): size to resize target image to, default 224 |
|
scale (bool): whether to scale pixel values to range(0, 1), default True |
|
""" |
|
|
|
img = tf.io.read_file(filename) |
|
|
|
img = tf.image.decode_jpeg(img) |
|
|
|
img = tf.image.resize(img, [img_shape, img_shape]) |
|
if scale: |
|
|
|
return img/255. |
|
else: |
|
return img |
|
|
|
|
|
|
|
import itertools |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
from sklearn.metrics import confusion_matrix |
|
|
|
|
|
def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False): |
|
"""Makes a labelled confusion matrix comparing predictions and ground truth labels. |
|
|
|
If classes is passed, confusion matrix will be labelled, if not, integer class values |
|
will be used. |
|
|
|
Args: |
|
y_true: Array of truth labels (must be same shape as y_pred). |
|
y_pred: Array of predicted labels (must be same shape as y_true). |
|
classes: Array of class labels (e.g. string form). If `None`, integer labels are used. |
|
figsize: Size of output figure (default=(10, 10)). |
|
text_size: Size of output figure text (default=15). |
|
norm: normalize values or not (default=False). |
|
savefig: save confusion matrix to file (default=False). |
|
|
|
Returns: |
|
A labelled confusion matrix plot comparing y_true and y_pred. |
|
|
|
Example usage: |
|
make_confusion_matrix(y_true=test_labels, # ground truth test labels |
|
y_pred=y_preds, # predicted labels |
|
classes=class_names, # array of class label names |
|
figsize=(15, 15), |
|
text_size=10) |
|
""" |
|
|
|
cm = confusion_matrix(y_true, y_pred) |
|
cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] |
|
n_classes = cm.shape[0] |
|
|
|
|
|
fig, ax = plt.subplots(figsize=figsize) |
|
cax = ax.matshow(cm, cmap=plt.cm.Blues) |
|
fig.colorbar(cax) |
|
|
|
|
|
if classes: |
|
labels = classes |
|
else: |
|
labels = np.arange(cm.shape[0]) |
|
|
|
|
|
ax.set(title="Confusion Matrix", |
|
xlabel="Predicted label", |
|
ylabel="True label", |
|
xticks=np.arange(n_classes), |
|
yticks=np.arange(n_classes), |
|
xticklabels=labels, |
|
yticklabels=labels) |
|
|
|
|
|
ax.xaxis.set_label_position("bottom") |
|
ax.xaxis.tick_bottom() |
|
|
|
|
|
threshold = (cm.max() + cm.min()) / 2. |
|
|
|
|
|
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): |
|
if norm: |
|
plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)", |
|
horizontalalignment="center", |
|
color="white" if cm[i, j] > threshold else "black", |
|
size=text_size) |
|
else: |
|
plt.text(j, i, f"{cm[i, j]}", |
|
horizontalalignment="center", |
|
color="white" if cm[i, j] > threshold else "black", |
|
size=text_size) |
|
|
|
|
|
if savefig: |
|
fig.savefig("confusion_matrix.png") |
|
|
|
|
|
def pred_and_plot(model, filename, class_names): |
|
""" |
|
Imports an image located at filename, makes a prediction on it with |
|
a trained model and plots the image with the predicted class as the title. |
|
""" |
|
|
|
img = load_and_prep_image(filename) |
|
|
|
|
|
pred = model.predict(tf.expand_dims(img, axis=0)) |
|
|
|
|
|
if len(pred[0]) > 1: |
|
pred_class = class_names[pred.argmax()] |
|
else: |
|
pred_class = class_names[int(tf.round(pred)[0][0])] |
|
|
|
|
|
plt.imshow(img) |
|
plt.title(f"Prediction: {pred_class}") |
|
plt.axis(False); |
|
|
|
import datetime |
|
|
|
def create_tensorboard_callback(dir_name, experiment_name): |
|
""" |
|
Creates a TensorBoard callback instance to store log files. |
|
|
|
Stores log files with the filepath: |
|
"dir_name/experiment_name/current_datetime/" |
|
|
|
Args: |
|
dir_name: target directory to store TensorBoard log files |
|
experiment_name: name of experiment directory (e.g. efficientnet_model_1) |
|
""" |
|
log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") |
|
tensorboard_callback = tf.keras.callbacks.TensorBoard( |
|
log_dir=log_dir |
|
) |
|
print(f"Saving TensorBoard log files to: {log_dir}") |
|
return tensorboard_callback |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
|
def plot_loss_curves(history): |
|
""" |
|
Returns separate loss curves for training and validation metrics. |
|
|
|
Args: |
|
history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History) |
|
""" |
|
loss = history.history['loss'] |
|
val_loss = history.history['val_loss'] |
|
|
|
accuracy = history.history['accuracy'] |
|
val_accuracy = history.history['val_accuracy'] |
|
|
|
epochs = range(len(history.history['loss'])) |
|
|
|
|
|
plt.plot(epochs, loss, label='training_loss') |
|
plt.plot(epochs, val_loss, label='val_loss') |
|
plt.title('Loss') |
|
plt.xlabel('Epochs') |
|
plt.legend() |
|
|
|
|
|
plt.figure() |
|
plt.plot(epochs, accuracy, label='training_accuracy') |
|
plt.plot(epochs, val_accuracy, label='val_accuracy') |
|
plt.title('Accuracy') |
|
plt.xlabel('Epochs') |
|
plt.legend(); |
|
|
|
def compare_historys(original_history, new_history, initial_epochs=5): |
|
""" |
|
Compares two TensorFlow model History objects. |
|
|
|
Args: |
|
original_history: History object from original model (before new_history) |
|
new_history: History object from continued model training (after original_history) |
|
initial_epochs: Number of epochs in original_history (new_history plot starts from here) |
|
""" |
|
|
|
|
|
acc = original_history.history["accuracy"] |
|
loss = original_history.history["loss"] |
|
|
|
val_acc = original_history.history["val_accuracy"] |
|
val_loss = original_history.history["val_loss"] |
|
|
|
|
|
total_acc = acc + new_history.history["accuracy"] |
|
total_loss = loss + new_history.history["loss"] |
|
|
|
total_val_acc = val_acc + new_history.history["val_accuracy"] |
|
total_val_loss = val_loss + new_history.history["val_loss"] |
|
|
|
|
|
plt.figure(figsize=(8, 8)) |
|
plt.subplot(2, 1, 1) |
|
plt.plot(total_acc, label='Training Accuracy') |
|
plt.plot(total_val_acc, label='Validation Accuracy') |
|
plt.plot([initial_epochs-1, initial_epochs-1], |
|
plt.ylim(), label='Start Fine Tuning') |
|
plt.legend(loc='lower right') |
|
plt.title('Training and Validation Accuracy') |
|
|
|
plt.subplot(2, 1, 2) |
|
plt.plot(total_loss, label='Training Loss') |
|
plt.plot(total_val_loss, label='Validation Loss') |
|
plt.plot([initial_epochs-1, initial_epochs-1], |
|
plt.ylim(), label='Start Fine Tuning') |
|
plt.legend(loc='upper right') |
|
plt.title('Training and Validation Loss') |
|
plt.xlabel('epoch') |
|
plt.show() |
|
|
|
|
|
|
|
import zipfile |
|
|
|
def unzip_data(filename): |
|
""" |
|
Unzips filename into the current working directory. |
|
|
|
Args: |
|
filename (str): a filepath to a target zip folder to be unzipped. |
|
""" |
|
zip_ref = zipfile.ZipFile(filename, "r") |
|
zip_ref.extractall() |
|
zip_ref.close() |
|
|
|
|
|
|
|
import zipfile |
|
import requests |
|
import os |
|
|
|
def download_and_unzip(url, target_folder): |
|
|
|
filename = os.path.join(target_folder, os.path.basename(url)) |
|
with open(filename, 'wb') as f: |
|
r = requests.get(url) |
|
f.write(r.content) |
|
|
|
|
|
with zipfile.ZipFile(filename, 'r') as zip_ref: |
|
zip_ref.extractall(target_folder) |
|
|
|
|
|
|
|
import os |
|
|
|
def walk_through_dir(dir_path): |
|
""" |
|
Walks through dir_path returning its contents. |
|
|
|
Args: |
|
dir_path (str): target directory |
|
|
|
Returns: |
|
A print out of: |
|
number of subdiretories in dir_path |
|
number of images (files) in each subdirectory |
|
name of each subdirectory |
|
""" |
|
for dirpath, dirnames, filenames in os.walk(dir_path): |
|
print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.") |
|
|
|
|
|
from sklearn.metrics import accuracy_score, precision_recall_fscore_support |
|
|
|
def calculate_results(y_true, y_pred): |
|
""" |
|
Calculates model accuracy, precision, recall and f1 score of a binary classification model. |
|
|
|
Args: |
|
y_true: true labels in the form of a 1D array |
|
y_pred: predicted labels in the form of a 1D array |
|
|
|
Returns a dictionary of accuracy, precision, recall, f1-score. |
|
""" |
|
|
|
model_accuracy = accuracy_score(y_true, y_pred) * 100 |
|
|
|
model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted") |
|
model_results = {"accuracy": model_accuracy, |
|
"precision": model_precision, |
|
"recall": model_recall, |
|
"f1": model_f1} |
|
return model_results |
|
|