Spaces:

Vrk
/

SeeFood

Sleeping

App Files Files Community

Vrk commited on Dec 21, 2021

Commit

a2ba2c5

•

1 Parent(s): 5caaae1

init

Browse files

Files changed (1) hide show

helper_functions.py +288 -0

helper_functions.py ADDED Viewed

	@@ -0,0 +1,288 @@

+### We create a bunch of helpful functions throughout the course.
+### Storing them here so they're easily accessible.
+import tensorflow as tf
+# Create a function to import an image and resize it to be able to be used with our model
+def load_and_prep_image(filename, img_shape=224, scale=True):
+  """
+  Reads in an image from filename, turns it into a tensor and reshapes into
+  (224, 224, 3).
+  Parameters
+  ----------
+  filename (str): string filename of target image
+  img_shape (int): size to resize target image to, default 224
+  scale (bool): whether to scale pixel values to range(0, 1), default True
+  """
+  # Read in the image
+  img = tf.io.read_file(filename)
+  # Decode it into a tensor
+  img = tf.image.decode_jpeg(img)
+  # Resize the image
+  img = tf.image.resize(img, [img_shape, img_shape])
+  if scale:
+    # Rescale the image (get all values between 0 and 1)
+    return img/255.
+  else:
+    return img
+# Note: The following confusion matrix code is a remix of Scikit-Learn's
+# plot_confusion_matrix function - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.plot_confusion_matrix.html
+import itertools
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.metrics import confusion_matrix
+# Our function needs a different name to sklearn's plot_confusion_matrix
+def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False):
+  """Makes a labelled confusion matrix comparing predictions and ground truth labels.
+  If classes is passed, confusion matrix will be labelled, if not, integer class values
+  will be used.
+  Args:
+    y_true: Array of truth labels (must be same shape as y_pred).
+    y_pred: Array of predicted labels (must be same shape as y_true).
+    classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
+    figsize: Size of output figure (default=(10, 10)).
+    text_size: Size of output figure text (default=15).
+    norm: normalize values or not (default=False).
+    savefig: save confusion matrix to file (default=False).
+  Returns:
+    A labelled confusion matrix plot comparing y_true and y_pred.
+  Example usage:
+    make_confusion_matrix(y_true=test_labels, # ground truth test labels
+                          y_pred=y_preds, # predicted labels
+                          classes=class_names, # array of class label names
+                          figsize=(15, 15),
+                          text_size=10)
+  """
+  # Create the confustion matrix
+  cm = confusion_matrix(y_true, y_pred)
+  cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
+  n_classes = cm.shape[0] # find the number of classes we're dealing with
+  # Plot the figure and make it pretty
+  fig, ax = plt.subplots(figsize=figsize)
+  cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
+  fig.colorbar(cax)
+  # Are there a list of classes?
+  if classes:
+    labels = classes
+  else:
+    labels = np.arange(cm.shape[0])
+  # Label the axes
+  ax.set(title="Confusion Matrix",
+         xlabel="Predicted label",
+         ylabel="True label",
+         xticks=np.arange(n_classes), # create enough axis slots for each class
+         yticks=np.arange(n_classes),
+         xticklabels=labels, # axes will labeled with class names (if they exist) or ints
+         yticklabels=labels)
+  # Make x-axis labels appear on bottom
+  ax.xaxis.set_label_position("bottom")
+  ax.xaxis.tick_bottom()
+  # Set the threshold for different colors
+  threshold = (cm.max() + cm.min()) / 2.
+  # Plot the text on each cell
+  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
+    if norm:
+      plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
+              horizontalalignment="center",
+              color="white" if cm[i, j] > threshold else "black",
+              size=text_size)
+    else:
+      plt.text(j, i, f"{cm[i, j]}",
+              horizontalalignment="center",
+              color="white" if cm[i, j] > threshold else "black",
+              size=text_size)
+  # Save the figure to the current working directory
+  if savefig:
+    fig.savefig("confusion_matrix.png")
+# Make a function to predict on images and plot them (works with multi-class)
+def pred_and_plot(model, filename, class_names):
+  """
+  Imports an image located at filename, makes a prediction on it with
+  a trained model and plots the image with the predicted class as the title.
+  """
+  # Import the target image and preprocess it
+  img = load_and_prep_image(filename)
+  # Make a prediction
+  pred = model.predict(tf.expand_dims(img, axis=0))
+  # Get the predicted class
+  if len(pred[0]) > 1: # check for multi-class
+    pred_class = class_names[pred.argmax()] # if more than one output, take the max
+  else:
+    pred_class = class_names[int(tf.round(pred)[0][0])] # if only one output, round
+  # Plot the image and predicted class
+  plt.imshow(img)
+  plt.title(f"Prediction: {pred_class}")
+  plt.axis(False);
+import datetime
+def create_tensorboard_callback(dir_name, experiment_name):
+  """
+  Creates a TensorBoard callback instand to store log files.
+  Stores log files with the filepath:
+    "dir_name/experiment_name/current_datetime/"
+  Args:
+    dir_name: target directory to store TensorBoard log files
+    experiment_name: name of experiment directory (e.g. efficientnet_model_1)
+  """
+  log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+  tensorboard_callback = tf.keras.callbacks.TensorBoard(
+      log_dir=log_dir
+  )
+  print(f"Saving TensorBoard log files to: {log_dir}")
+  return tensorboard_callback
+# Plot the validation and training data separately
+import matplotlib.pyplot as plt
+def plot_loss_curves(history):
+  """
+  Returns separate loss curves for training and validation metrics.
+  Args:
+    history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History)
+  """
+  loss = history.history['loss']
+  val_loss = history.history['val_loss']
+  accuracy = history.history['accuracy']
+  val_accuracy = history.history['val_accuracy']
+  epochs = range(len(history.history['loss']))
+  # Plot loss
+  plt.plot(epochs, loss, label='training_loss')
+  plt.plot(epochs, val_loss, label='val_loss')
+  plt.title('Loss')
+  plt.xlabel('Epochs')
+  plt.legend()
+  # Plot accuracy
+  plt.figure()
+  plt.plot(epochs, accuracy, label='training_accuracy')
+  plt.plot(epochs, val_accuracy, label='val_accuracy')
+  plt.title('Accuracy')
+  plt.xlabel('Epochs')
+  plt.legend();
+def compare_historys(original_history, new_history, initial_epochs=5):
+    """
+    Compares two TensorFlow model History objects.
+    Args:
+      original_history: History object from original model (before new_history)
+      new_history: History object from continued model training (after original_history)
+      initial_epochs: Number of epochs in original_history (new_history plot starts from here)
+    """
+    # Get original history measurements
+    acc = original_history.history["accuracy"]
+    loss = original_history.history["loss"]
+    val_acc = original_history.history["val_accuracy"]
+    val_loss = original_history.history["val_loss"]
+    # Combine original history with new history
+    total_acc = acc + new_history.history["accuracy"]
+    total_loss = loss + new_history.history["loss"]
+    total_val_acc = val_acc + new_history.history["val_accuracy"]
+    total_val_loss = val_loss + new_history.history["val_loss"]
+    # Make plots
+    plt.figure(figsize=(8, 8))
+    plt.subplot(2, 1, 1)
+    plt.plot(total_acc, label='Training Accuracy')
+    plt.plot(total_val_acc, label='Validation Accuracy')
+    plt.plot([initial_epochs-1, initial_epochs-1],
+              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
+    plt.legend(loc='lower right')
+    plt.title('Training and Validation Accuracy')
+    plt.subplot(2, 1, 2)
+    plt.plot(total_loss, label='Training Loss')
+    plt.plot(total_val_loss, label='Validation Loss')
+    plt.plot([initial_epochs-1, initial_epochs-1],
+              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
+    plt.legend(loc='upper right')
+    plt.title('Training and Validation Loss')
+    plt.xlabel('epoch')
+    plt.show()
+# Create function to unzip a zipfile into current working directory
+# (since we're going to be downloading and unzipping a few files)
+import zipfile
+def unzip_data(filename):
+  """
+  Unzips filename into the current working directory.
+  Args:
+    filename (str): a filepath to a target zip folder to be unzipped.
+  """
+  zip_ref = zipfile.ZipFile(filename, "r")
+  zip_ref.extractall()
+  zip_ref.close()
+# Walk through an image classification directory and find out how many files (images)
+# are in each subdirectory.
+import os
+def walk_through_dir(dir_path):
+  """
+  Walks through dir_path returning its contents.
+  Args:
+    dir_path (str): target directory
+  Returns:
+    A print out of:
+      number of subdiretories in dir_path
+      number of images (files) in each subdirectory
+      name of each subdirectory
+  """
+  for dirpath, dirnames, filenames in os.walk(dir_path):
+    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")
+# Function to evaluate: accuracy, precision, recall, f1-score
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+def calculate_results(y_true, y_pred):
+  """
+  Calculates model accuracy, precision, recall and f1 score of a binary classification model.
+  Args:
+      y_true: true labels in the form of a 1D array
+      y_pred: predicted labels in the form of a 1D array
+  Returns a dictionary of accuracy, precision, recall, f1-score.
+  """
+  # Calculate model accuracy
+  model_accuracy = accuracy_score(y_true, y_pred) * 100
+  # Calculate model precision, recall and f1 score using "weighted average
+  model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
+  model_results = {"accuracy": model_accuracy,
+                  "precision": model_precision,
+                  "recall": model_recall,
+                  "f1": model_f1}
+  return model_results