|
""" |
|
The program applies Transfer Learning to this existing model and re-trains it to classify a new set of images. |
|
|
|
This example shows how to take a Inception v3 architecture model trained on ImageNet images, |
|
and train a new top layer that can recognize other classes of images. |
|
|
|
You can replace the image_dir argument with any folder containing subfolders of |
|
images. The label for each image is taken from the name of the subfolder it's in. |
|
|
|
""" |
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import argparse |
|
|
|
import hashlib |
|
import os.path |
|
import random |
|
import re |
|
import struct |
|
import sys |
|
import tarfile |
|
|
|
import numpy as np |
|
from six.moves import urllib |
|
import tensorflow.compat.v1 as tf |
|
|
|
from tensorflow.python.framework import graph_util |
|
from tensorflow.python.framework import tensor_shape |
|
from tensorflow.python.platform import gfile |
|
from tensorflow.python.util import compat |
|
|
|
FLAGS = None |
|
|
|
|
|
|
|
|
|
|
|
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' |
|
|
|
BOTTLENECK_TENSOR_NAME = 'pool_3/_reshape:0' |
|
BOTTLENECK_TENSOR_SIZE = 2048 |
|
MODEL_INPUT_WIDTH = 299 |
|
MODEL_INPUT_HEIGHT = 299 |
|
MODEL_INPUT_DEPTH = 3 |
|
JPEG_DATA_TENSOR_NAME = 'DecodeJpeg/contents:0' |
|
RESIZED_INPUT_TENSOR_NAME = 'ResizeBilinear:0' |
|
MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 |
|
|
|
|
|
def create_image_lists(image_dir, testing_percentage, validation_percentage): |
|
""" |
|
Brief: |
|
Builds a list of training images from the file system. |
|
Analyzes the sub folders in the image directory, splits them into stable |
|
training, testing, and validation sets, and returns a data structure |
|
describing the lists of images for each label and their paths. |
|
Args: |
|
image_dir: String path to a folder containing subfolders of images. |
|
testing_percentage: Integer percentage of the images to reserve for tests. |
|
validation_percentage: Integer percentage of images reserved for validation. |
|
Returns: |
|
A dictionary containing an entry for each label subfolder, with images split |
|
into training, testing, and validation sets within each label. |
|
""" |
|
if not gfile.Exists(image_dir): |
|
print("Image directory '" + image_dir + "' not found.") |
|
return None |
|
result = {} |
|
sub_dirs = [x[0] for x in gfile.Walk(image_dir)] |
|
|
|
is_root_dir = True |
|
for sub_dir in sub_dirs: |
|
if is_root_dir: |
|
is_root_dir = False |
|
continue |
|
extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] |
|
file_list = [] |
|
dir_name = os.path.basename(sub_dir) |
|
if dir_name == image_dir: |
|
continue |
|
print("Looking for images in '" + dir_name + "'") |
|
for extension in extensions: |
|
file_glob = os.path.join(image_dir, dir_name, '*.' + extension) |
|
file_list.extend(gfile.Glob(file_glob)) |
|
if not file_list: |
|
print('No files found') |
|
continue |
|
if len(file_list) < 20: |
|
print('WARNING: Folder has less than 20 images, which may cause issues.') |
|
elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS: |
|
print('WARNING: Folder {} has more than {} images. Some images will ' |
|
'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS)) |
|
label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) |
|
training_images = [] |
|
testing_images = [] |
|
validation_images = [] |
|
for file_name in file_list: |
|
base_name = os.path.basename(file_name) |
|
|
|
|
|
|
|
|
|
|
|
hash_name = re.sub(r'_nohash_.*$', '', file_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() |
|
percentage_hash = ((int(hash_name_hashed, 16) % |
|
(MAX_NUM_IMAGES_PER_CLASS + 1)) * |
|
(100.0 / MAX_NUM_IMAGES_PER_CLASS)) |
|
if percentage_hash < validation_percentage: |
|
validation_images.append(base_name) |
|
elif percentage_hash < (testing_percentage + validation_percentage): |
|
testing_images.append(base_name) |
|
else: |
|
training_images.append(base_name) |
|
result[label_name] = { |
|
'dir': dir_name, |
|
'training': training_images, |
|
'testing': testing_images, |
|
'validation': validation_images, |
|
} |
|
return result |
|
|
|
|
|
def get_image_path(image_lists, label_name, index, image_dir, category): |
|
"""" |
|
Brief: |
|
Returns a path to an image for a label at the given index. |
|
Args: |
|
image_lists: Dictionary of training images for each label. |
|
label_name: Label string we want to get an image for. |
|
index: Int offset of the image we want. This will be moduloed by the |
|
available number of images for the label, so it can be arbitrarily large. |
|
image_dir: Root folder string of the subfolders containing the training images. |
|
category: Name string of set to pull images from - training, testing, or validation. |
|
Returns: |
|
File system path string to an image that meets the requested parameters. |
|
""" |
|
if label_name not in image_lists: |
|
tf.logging.fatal('Label does not exist %s.', label_name) |
|
label_lists = image_lists[label_name] |
|
if category not in label_lists: |
|
tf.logging.fatal('Category does not exist %s.', category) |
|
category_list = label_lists[category] |
|
if not category_list: |
|
tf.logging.fatal('Label %s has no images in the category %s.', label_name, category) |
|
mod_index = index % len(category_list) |
|
base_name = category_list[mod_index] |
|
sub_dir = label_lists['dir'] |
|
full_path = os.path.join(image_dir, sub_dir, base_name) |
|
return full_path |
|
|
|
|
|
def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir, category): |
|
"""" |
|
Brief: |
|
Returns a path to a bottleneck file for a label at the given index. |
|
Args: |
|
image_lists: Dictionary of training images for each label. |
|
label_name: Label string we want to get an image for. |
|
index: Integer offset of the image we want. This will be moduloed by the |
|
available number of images for the label, so it can be arbitrarily large. |
|
bottleneck_dir: Folder string holding cached files of bottleneck values. |
|
category: Name string of set to pull images from - training, testing, or validation. |
|
Returns: |
|
File system path string to an image that meets the requested parameters. |
|
""" |
|
return get_image_path(image_lists, label_name, index, bottleneck_dir, |
|
category) + '.txt' |
|
|
|
|
|
def create_inception_graph(): |
|
"""" |
|
Brief: |
|
Creates a graph from saved GraphDef file and returns a Graph object. |
|
Returns: |
|
Graph holding the trained Inception network, and various tensors we'll be |
|
manipulating. |
|
""" |
|
with tf.Graph().as_default() as graph: |
|
model_filename = os.path.join(FLAGS.model_dir, 'classify_image_graph_def.pb') |
|
with gfile.FastGFile(model_filename, 'rb') as f: |
|
graph_def = tf.GraphDef() |
|
graph_def.ParseFromString(f.read()) |
|
bottleneck_tensor, jpeg_data_tensor, resized_input_tensor = ( |
|
tf.import_graph_def(graph_def, name='', return_elements=[ |
|
BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME, |
|
RESIZED_INPUT_TENSOR_NAME])) |
|
return graph, bottleneck_tensor, jpeg_data_tensor, resized_input_tensor |
|
|
|
|
|
def run_bottleneck_on_image(sess, image_data, image_data_tensor, bottleneck_tensor): |
|
"""" |
|
Brief: |
|
Runs inference on an image to extract the 'bottleneck' summary layer. |
|
Args: |
|
sess: Current active TensorFlow Session. |
|
image_data: String of raw JPEG data. |
|
image_data_tensor: Input data layer in the graph. |
|
bottleneck_tensor: Layer before the final softmax. |
|
Returns: |
|
Numpy array of bottleneck values. |
|
""" |
|
bottleneck_values = sess.run( |
|
bottleneck_tensor, |
|
{image_data_tensor: image_data}) |
|
bottleneck_values = np.squeeze(bottleneck_values) |
|
return bottleneck_values |
|
|
|
|
|
def maybe_download_and_extract(): |
|
""" |
|
Brief: |
|
Download and extract model tar file. |
|
If the pretrained model we're using doesn't already exist, this function |
|
downloads it from the TensorFlow.org website and unpacks it into a directory. |
|
""" |
|
dest_directory = FLAGS.model_dir |
|
if not os.path.exists(dest_directory): |
|
os.makedirs(dest_directory) |
|
filename = DATA_URL.split('/')[-1] |
|
filepath = os.path.join(dest_directory, filename) |
|
if not os.path.exists(filepath): |
|
def _progress(count, block_size, total_size): |
|
sys.stdout.write('\r>> Downloading %s %.1f%%' % |
|
(filename, |
|
float(count * block_size) / float(total_size) * 100.0)) |
|
sys.stdout.flush() |
|
|
|
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) |
|
print() |
|
statinfo = os.stat(filepath) |
|
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') |
|
tarfile.open(filepath, 'r:gz').extractall(dest_directory) |
|
|
|
|
|
def ensure_dir_exists(dir_name): |
|
""" |
|
Brief: |
|
Makes sure the folder exists on disk. |
|
Args: |
|
dir_name: Path string to the folder we want to create. |
|
""" |
|
if not os.path.exists(dir_name): |
|
os.makedirs(dir_name) |
|
|
|
|
|
def write_list_of_floats_to_file(list_of_floats, file_path): |
|
""" |
|
Brief: |
|
Writes a given list of floats to a binary file. |
|
Args: |
|
list_of_floats: List of floats we want to write to a file. |
|
file_path: Path to a file where list of floats will be stored. |
|
""" |
|
s = struct.pack('d' * BOTTLENECK_TENSOR_SIZE, *list_of_floats) |
|
with open(file_path, 'wb') as f: |
|
f.write(s) |
|
|
|
|
|
def read_list_of_floats_from_file(file_path): |
|
""" |
|
Brief: |
|
Reads list of floats from a given file. |
|
Args: |
|
file_path: Path to a file where list of floats was stored. |
|
Returns: |
|
Array of bottleneck values (list of floats). |
|
""" |
|
with open(file_path, 'rb') as f: |
|
s = struct.unpack('d' * BOTTLENECK_TENSOR_SIZE, f.read()) |
|
return list(s) |
|
|
|
|
|
bottleneck_path_2_bottleneck_values = {} |
|
|
|
|
|
def create_bottleneck_file(bottleneck_path, image_lists, label_name, index, |
|
image_dir, category, sess, jpeg_data_tensor, |
|
bottleneck_tensor): |
|
"""Create a single bottleneck file.""" |
|
print('Creating bottleneck at ' + bottleneck_path) |
|
image_path = get_image_path(image_lists, label_name, index, |
|
image_dir, category) |
|
if not gfile.Exists(image_path): |
|
tf.logging.fatal('File does not exist %s', image_path) |
|
image_data = gfile.FastGFile(image_path, 'rb').read() |
|
try: |
|
bottleneck_values = run_bottleneck_on_image( |
|
sess, image_data, jpeg_data_tensor, bottleneck_tensor) |
|
except: |
|
raise RuntimeError('Error during processing file %s' % image_path) |
|
|
|
bottleneck_string = ','.join(str(x) for x in bottleneck_values) |
|
with open(bottleneck_path, 'w') as bottleneck_file: |
|
bottleneck_file.write(bottleneck_string) |
|
|
|
|
|
def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir, |
|
category, bottleneck_dir, jpeg_data_tensor, |
|
bottleneck_tensor): |
|
""" |
|
Brief: |
|
Retrieves or calculates bottleneck values for an image. |
|
|
|
If a cached version of the bottleneck data exists on-disk, return that, |
|
otherwise calculate the data and save it to disk for future use. |
|
Args: |
|
sess: The current active TensorFlow Session. |
|
image_lists: Dictionary of training images for each label. |
|
label_name: Label string we want to get an image for. |
|
index: Integer offset of the image we want. This will be modulo-ed by the |
|
available number of images for the label, so it can be arbitrarily large. |
|
image_dir: Root folder string of the subfolders containing the training |
|
images. |
|
category: Name string of which set to pull images from - training, testing, |
|
or validation. |
|
bottleneck_dir: Folder string holding cached files of bottleneck values. |
|
jpeg_data_tensor: The tensor to feed loaded jpeg data into. |
|
bottleneck_tensor: The output tensor for the bottleneck values. |
|
Returns: |
|
Numpy array of values produced by the bottleneck layer for the image. |
|
""" |
|
label_lists = image_lists[label_name] |
|
sub_dir = label_lists['dir'] |
|
sub_dir_path = os.path.join(bottleneck_dir, sub_dir) |
|
ensure_dir_exists(sub_dir_path) |
|
bottleneck_path = get_bottleneck_path(image_lists, label_name, index, |
|
bottleneck_dir, category) |
|
if not os.path.exists(bottleneck_path): |
|
create_bottleneck_file(bottleneck_path, image_lists, label_name, index, |
|
image_dir, category, sess, jpeg_data_tensor, |
|
bottleneck_tensor) |
|
with open(bottleneck_path, 'r') as bottleneck_file: |
|
bottleneck_string = bottleneck_file.read() |
|
did_hit_error = False |
|
try: |
|
bottleneck_values = [float(x) for x in bottleneck_string.split(',')] |
|
except ValueError: |
|
print('Invalid float found, recreating bottleneck') |
|
did_hit_error = True |
|
if did_hit_error: |
|
create_bottleneck_file(bottleneck_path, image_lists, label_name, index, |
|
image_dir, category, sess, jpeg_data_tensor, |
|
bottleneck_tensor) |
|
with open(bottleneck_path, 'r') as bottleneck_file: |
|
bottleneck_string = bottleneck_file.read() |
|
|
|
|
|
bottleneck_values = [float(x) for x in bottleneck_string.split(',')] |
|
return bottleneck_values |
|
|
|
|
|
def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir, |
|
jpeg_data_tensor, bottleneck_tensor): |
|
""" |
|
Brief: |
|
Ensures all the training, testing, and validation bottlenecks are cached. |
|
|
|
Because we're likely to read the same image multiple times (if there are no |
|
distortions applied during training) it can speed things up a lot if we |
|
calculate the bottleneck layer values once for each image during |
|
preprocessing, and then just read those cached values repeatedly during |
|
training. Here we go through all the images we've found, calculate those |
|
values, and save them off. |
|
Args: |
|
sess: The current active TensorFlow Session. |
|
image_lists: Dictionary of training images for each label. |
|
image_dir: Root folder string of the subfolders containing the training |
|
images. |
|
bottleneck_dir: Folder string holding cached files of bottleneck values. |
|
jpeg_data_tensor: Input tensor for jpeg data from file. |
|
bottleneck_tensor: The penultimate output layer of the graph. |
|
Returns: |
|
Nothing. |
|
""" |
|
how_many_bottlenecks = 0 |
|
ensure_dir_exists(bottleneck_dir) |
|
for label_name, label_lists in image_lists.items(): |
|
for category in ['training', 'testing', 'validation']: |
|
category_list = label_lists[category] |
|
for index, unused_base_name in enumerate(category_list): |
|
get_or_create_bottleneck(sess, image_lists, label_name, index, |
|
image_dir, category, bottleneck_dir, |
|
jpeg_data_tensor, bottleneck_tensor) |
|
|
|
how_many_bottlenecks += 1 |
|
if how_many_bottlenecks % 100 == 0: |
|
print(str(how_many_bottlenecks) + ' bottleneck files created.') |
|
|
|
|
|
def get_random_cached_bottlenecks(sess, image_lists, how_many, category, |
|
bottleneck_dir, image_dir, jpeg_data_tensor, |
|
bottleneck_tensor): |
|
""" |
|
Brief: |
|
Retrieves bottleneck values for cached images. |
|
|
|
If no distortions are being applied, this function can retrieve the cached |
|
bottleneck values directly from disk for images. It picks a random set of |
|
images from the specified category. |
|
Args: |
|
sess: Current TensorFlow Session. |
|
image_lists: Dictionary of training images for each label. |
|
how_many: If positive, a random sample of this size will be chosen. |
|
If negative, all bottlenecks will be retrieved. |
|
category: Name string of which set to pull from - training, testing, or |
|
validation. |
|
bottleneck_dir: Folder string holding cached files of bottleneck values. |
|
image_dir: Root folder string of the subfolders containing the training |
|
images. |
|
jpeg_data_tensor: The layer to feed jpeg image data into. |
|
bottleneck_tensor: The bottleneck output layer of the CNN graph. |
|
Returns: |
|
List of bottleneck arrays, their corresponding ground truths, and the |
|
relevant filenames. |
|
""" |
|
class_count = len(image_lists.keys()) |
|
bottlenecks = [] |
|
ground_truths = [] |
|
filenames = [] |
|
if how_many >= 0: |
|
|
|
for unused_i in range(how_many): |
|
label_index = random.randrange(class_count) |
|
label_name = list(image_lists.keys())[label_index] |
|
image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) |
|
image_name = get_image_path(image_lists, label_name, image_index, |
|
image_dir, category) |
|
bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, |
|
image_index, image_dir, category, |
|
bottleneck_dir, jpeg_data_tensor, |
|
bottleneck_tensor) |
|
ground_truth = np.zeros(class_count, dtype=np.float32) |
|
ground_truth[label_index] = 1.0 |
|
bottlenecks.append(bottleneck) |
|
ground_truths.append(ground_truth) |
|
filenames.append(image_name) |
|
else: |
|
|
|
for label_index, label_name in enumerate(image_lists.keys()): |
|
for image_index, image_name in enumerate( |
|
image_lists[label_name][category]): |
|
image_name = get_image_path(image_lists, label_name, image_index, |
|
image_dir, category) |
|
bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, |
|
image_index, image_dir, category, |
|
bottleneck_dir, jpeg_data_tensor, |
|
bottleneck_tensor) |
|
ground_truth = np.zeros(class_count, dtype=np.float32) |
|
ground_truth[label_index] = 1.0 |
|
bottlenecks.append(bottleneck) |
|
ground_truths.append(ground_truth) |
|
filenames.append(image_name) |
|
return bottlenecks, ground_truths, filenames |
|
|
|
|
|
def get_random_distorted_bottlenecks( |
|
sess, image_lists, how_many, category, image_dir, input_jpeg_tensor, |
|
distorted_image, resized_input_tensor, bottleneck_tensor): |
|
""" |
|
Brief: |
|
Retrieves bottleneck values for training images, after distortions. |
|
|
|
If we're training with distortions like crops, scales, or flips, we have to |
|
recalculate the full model for every image, and so we can't use cached |
|
bottleneck values. Instead we find random images for the requested category, |
|
run them through the distortion graph, and then the full graph to get the |
|
bottleneck results for each. |
|
Args: |
|
sess: Current TensorFlow Session. |
|
image_lists: Dictionary of training images for each label. |
|
how_many: The integer number of bottleneck values to return. |
|
category: Name string of which set of images to fetch - training, testing, |
|
or validation. |
|
image_dir: Root folder string of the subfolders containing the training |
|
images. |
|
input_jpeg_tensor: The input layer we feed the image data to. |
|
distorted_image: The output node of the distortion graph. |
|
resized_input_tensor: The input node of the recognition graph. |
|
bottleneck_tensor: The bottleneck output layer of the CNN graph. |
|
Returns: |
|
List of bottleneck arrays and their corresponding ground truths. |
|
""" |
|
class_count = len(image_lists.keys()) |
|
bottlenecks = [] |
|
ground_truths = [] |
|
for unused_i in range(how_many): |
|
label_index = random.randrange(class_count) |
|
label_name = list(image_lists.keys())[label_index] |
|
image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) |
|
image_path = get_image_path(image_lists, label_name, image_index, image_dir, |
|
category) |
|
if not gfile.Exists(image_path): |
|
tf.logging.fatal('File does not exist %s', image_path) |
|
jpeg_data = gfile.FastGFile(image_path, 'rb').read() |
|
|
|
|
|
|
|
distorted_image_data = sess.run(distorted_image, |
|
{input_jpeg_tensor: jpeg_data}) |
|
bottleneck = run_bottleneck_on_image(sess, distorted_image_data, |
|
resized_input_tensor, |
|
bottleneck_tensor) |
|
ground_truth = np.zeros(class_count, dtype=np.float32) |
|
ground_truth[label_index] = 1.0 |
|
bottlenecks.append(bottleneck) |
|
ground_truths.append(ground_truth) |
|
return bottlenecks, ground_truths |
|
|
|
|
|
def should_distort_images(flip_left_right, random_crop, random_scale, |
|
random_brightness): |
|
""" |
|
Brief: |
|
Whether any distortions are enabled, from the input flags. |
|
Args: |
|
flip_left_right: Boolean whether to randomly mirror images horizontally. |
|
random_crop: Integer percentage setting the total margin used around the |
|
crop box. |
|
random_scale: Integer percentage of how much to vary the scale by. |
|
random_brightness: Integer range to randomly multiply the pixel values by. |
|
Returns: |
|
Boolean value indicating whether any distortions should be applied. |
|
""" |
|
return (flip_left_right or (random_crop != 0) or (random_scale != 0) or |
|
(random_brightness != 0)) |
|
|
|
|
|
def add_input_distortions(flip_left_right, random_crop, random_scale, |
|
random_brightness): |
|
""" |
|
Brief: |
|
Creates the operations to apply the specified distortions. |
|
|
|
During training it can help to improve the results if we run the images |
|
through simple distortions like crops, scales, and flips. These reflect the |
|
kind of variations we expect in the real world, and so can help train the |
|
model to cope with natural data more effectively. Here we take the supplied |
|
parameters and construct a network of operations to apply them to an image. |
|
|
|
Cropping |
|
|
|
Cropping is done by placing a bounding box at a random position in the full |
|
image. The cropping parameter controls the size of that box relative to the |
|
input image. If it's zero, then the box is the same size as the input and no |
|
cropping is performed. If the value is 50%, then the crop box will be half the |
|
width and height of the input. In a diagram it looks like this: |
|
|
|
< width > |
|
+---------------------+ |
|
| | |
|
| width - crop% | |
|
| < > | |
|
| +------+ | |
|
| | | | |
|
| | | | |
|
| | | | |
|
| +------+ | |
|
| | |
|
| | |
|
+---------------------+ |
|
|
|
Scaling |
|
|
|
Scaling is a lot like cropping, except that the bounding box is always |
|
centered and its size varies randomly within the given range. For example if |
|
the scale percentage is zero, then the bounding box is the same size as the |
|
input and no scaling is applied. If it's 50%, then the bounding box will be in |
|
a random range between half the width and height and full size. |
|
Args: |
|
flip_left_right: Boolean whether to randomly mirror images horizontally. |
|
random_crop: Integer percentage setting the total margin used around the |
|
crop box. |
|
random_scale: Integer percentage of how much to vary the scale by. |
|
random_brightness: Integer range to randomly multiply the pixel values by. |
|
graph. |
|
Returns: |
|
The jpeg input layer and the distorted result tensor. |
|
""" |
|
|
|
jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') |
|
decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH) |
|
decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) |
|
decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) |
|
margin_scale = 1.0 + (random_crop / 100.0) |
|
resize_scale = 1.0 + (random_scale / 100.0) |
|
margin_scale_value = tf.constant(margin_scale) |
|
resize_scale_value = tf.random_uniform(tensor_shape.scalar(), |
|
minval=1.0, |
|
maxval=resize_scale) |
|
scale_value = tf.multiply(margin_scale_value, resize_scale_value) |
|
precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH) |
|
precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT) |
|
precrop_shape = tf.stack([precrop_height, precrop_width]) |
|
precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) |
|
precropped_image = tf.image.resize_bilinear(decoded_image_4d, |
|
precrop_shape_as_int) |
|
precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) |
|
cropped_image = tf.random_crop(precropped_image_3d, |
|
[MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH, |
|
MODEL_INPUT_DEPTH]) |
|
if flip_left_right: |
|
flipped_image = tf.image.random_flip_left_right(cropped_image) |
|
else: |
|
flipped_image = cropped_image |
|
brightness_min = 1.0 - (random_brightness / 100.0) |
|
brightness_max = 1.0 + (random_brightness / 100.0) |
|
brightness_value = tf.random_uniform(tensor_shape.scalar(), |
|
minval=brightness_min, |
|
maxval=brightness_max) |
|
brightened_image = tf.multiply(flipped_image, brightness_value) |
|
distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') |
|
return jpeg_data, distort_result |
|
|
|
|
|
def variable_summaries(var): |
|
"""Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" |
|
with tf.name_scope('summaries'): |
|
mean = tf.reduce_mean(var) |
|
tf.summary.scalar('mean', mean) |
|
with tf.name_scope('stddev'): |
|
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) |
|
tf.summary.scalar('stddev', stddev) |
|
tf.summary.scalar('max', tf.reduce_max(var)) |
|
tf.summary.scalar('min', tf.reduce_min(var)) |
|
tf.summary.histogram('histogram', var) |
|
|
|
|
|
def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor): |
|
""" |
|
Brief: |
|
Adds a new softmax and fully-connected layer for training. |
|
|
|
We need to retrain the top layer to identify our new classes, so this function |
|
adds the right operations to the graph, along with some variables to hold the |
|
weights, and then sets up all the gradients for the backward pass. |
|
|
|
The set up for the softmax and fully-connected layers is based on: |
|
https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html |
|
Args: |
|
class_count: Integer of how many categories of things we're trying to |
|
recognize. |
|
final_tensor_name: Name string for the new final node that produces results. |
|
bottleneck_tensor: The output of the main CNN graph. |
|
Returns: |
|
The tensors for the training and cross entropy results, and tensors for the |
|
bottleneck input and ground truth input. |
|
""" |
|
with tf.name_scope('input'): |
|
bottleneck_input = tf.placeholder_with_default( |
|
bottleneck_tensor, shape=[None, BOTTLENECK_TENSOR_SIZE], |
|
name='BottleneckInputPlaceholder') |
|
|
|
ground_truth_input = tf.placeholder(tf.float32, |
|
[None, class_count], |
|
name='GroundTruthInput') |
|
|
|
|
|
|
|
layer_name = 'final_training_ops' |
|
with tf.name_scope(layer_name): |
|
with tf.name_scope('weights'): |
|
initial_value = tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, class_count], |
|
stddev=0.001) |
|
|
|
layer_weights = tf.Variable(initial_value, name='final_weights') |
|
|
|
variable_summaries(layer_weights) |
|
with tf.name_scope('biases'): |
|
layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') |
|
variable_summaries(layer_biases) |
|
with tf.name_scope('Wx_plus_b'): |
|
logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases |
|
tf.summary.histogram('pre_activations', logits) |
|
|
|
final_tensor = tf.nn.softmax(logits, name=final_tensor_name) |
|
tf.summary.histogram('activations', final_tensor) |
|
|
|
with tf.name_scope('cross_entropy'): |
|
cross_entropy = tf.nn.softmax_cross_entropy_with_logits( |
|
labels=ground_truth_input, logits=logits) |
|
with tf.name_scope('total'): |
|
cross_entropy_mean = tf.reduce_mean(cross_entropy) |
|
tf.summary.scalar('cross_entropy', cross_entropy_mean) |
|
|
|
with tf.name_scope('train'): |
|
optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) |
|
train_step = optimizer.minimize(cross_entropy_mean) |
|
|
|
return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, |
|
final_tensor) |
|
|
|
|
|
def add_evaluation_step(result_tensor, ground_truth_tensor): |
|
""" |
|
Brief: |
|
Inserts the operations we need to evaluate the accuracy of our results. |
|
Args: |
|
result_tensor: The new final node that produces results. |
|
ground_truth_tensor: The node we feed ground truth data |
|
into. |
|
Returns: |
|
Tuple of (evaluation step, prediction). |
|
""" |
|
with tf.name_scope('accuracy'): |
|
with tf.name_scope('correct_prediction'): |
|
prediction = tf.argmax(result_tensor, 1) |
|
correct_prediction = tf.equal( |
|
prediction, tf.argmax(ground_truth_tensor, 1)) |
|
with tf.name_scope('accuracy'): |
|
evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) |
|
tf.summary.scalar('accuracy', evaluation_step) |
|
return evaluation_step, prediction |
|
|
|
|
|
def main(_): |
|
|
|
if tf.gfile.Exists(FLAGS.summaries_dir): |
|
tf.gfile.DeleteRecursively(FLAGS.summaries_dir) |
|
tf.gfile.MakeDirs(FLAGS.summaries_dir) |
|
|
|
|
|
maybe_download_and_extract() |
|
graph, bottleneck_tensor, jpeg_data_tensor, resized_image_tensor = ( |
|
create_inception_graph()) |
|
|
|
|
|
image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, |
|
FLAGS.validation_percentage) |
|
class_count = len(image_lists.keys()) |
|
if class_count == 0: |
|
print('No valid folders of images found at ' + FLAGS.image_dir) |
|
return -1 |
|
if class_count == 1: |
|
print('Only one valid folder of images found at ' + FLAGS.image_dir + |
|
' - multiple classes are needed for classification.') |
|
return -1 |
|
|
|
|
|
do_distort_images = should_distort_images( |
|
FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, |
|
FLAGS.random_brightness) |
|
|
|
with tf.Session(graph=graph) as sess: |
|
|
|
if do_distort_images: |
|
|
|
(distorted_jpeg_data_tensor, |
|
distorted_image_tensor) = add_input_distortions( |
|
FLAGS.flip_left_right, FLAGS.random_crop, |
|
FLAGS.random_scale, FLAGS.random_brightness) |
|
else: |
|
|
|
|
|
cache_bottlenecks(sess, image_lists, FLAGS.image_dir, |
|
FLAGS.bottleneck_dir, jpeg_data_tensor, |
|
bottleneck_tensor) |
|
|
|
|
|
(train_step, cross_entropy, bottleneck_input, ground_truth_input, |
|
final_tensor) = add_final_training_ops(len(image_lists.keys()), |
|
FLAGS.final_tensor_name, |
|
bottleneck_tensor) |
|
|
|
|
|
evaluation_step, prediction = add_evaluation_step( |
|
final_tensor, ground_truth_input) |
|
|
|
|
|
merged = tf.summary.merge_all() |
|
train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', |
|
sess.graph) |
|
|
|
validation_writer = tf.summary.FileWriter( |
|
FLAGS.summaries_dir + '/validation') |
|
|
|
|
|
init = tf.global_variables_initializer() |
|
sess.run(init) |
|
|
|
|
|
for i in range(FLAGS.how_many_training_steps): |
|
|
|
|
|
if do_distort_images: |
|
(train_bottlenecks, |
|
train_ground_truth) = get_random_distorted_bottlenecks( |
|
sess, image_lists, FLAGS.train_batch_size, 'training', |
|
FLAGS.image_dir, distorted_jpeg_data_tensor, |
|
distorted_image_tensor, resized_image_tensor, bottleneck_tensor) |
|
else: |
|
(train_bottlenecks, |
|
train_ground_truth, _) = get_random_cached_bottlenecks( |
|
sess, image_lists, FLAGS.train_batch_size, 'training', |
|
FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, |
|
bottleneck_tensor) |
|
|
|
|
|
|
|
train_summary, _ = sess.run( |
|
[merged, train_step], |
|
feed_dict={bottleneck_input: train_bottlenecks, |
|
ground_truth_input: train_ground_truth}) |
|
train_writer.add_summary(train_summary, i) |
|
|
|
|
|
is_last_step = (i + 1 == FLAGS.how_many_training_steps) |
|
if (i % FLAGS.eval_step_interval) == 0 or is_last_step: |
|
train_accuracy, cross_entropy_value = sess.run( |
|
[evaluation_step, cross_entropy], |
|
feed_dict={bottleneck_input: train_bottlenecks, |
|
ground_truth_input: train_ground_truth}) |
|
validation_bottlenecks, validation_ground_truth, _ = ( |
|
get_random_cached_bottlenecks( |
|
sess, image_lists, FLAGS.validation_batch_size, 'validation', |
|
FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, |
|
bottleneck_tensor)) |
|
|
|
|
|
validation_summary, validation_accuracy = sess.run( |
|
[merged, evaluation_step], |
|
feed_dict={bottleneck_input: validation_bottlenecks, |
|
ground_truth_input: validation_ground_truth}) |
|
validation_writer.add_summary(validation_summary, i) |
|
print('Step: %d, Train accuracy: %.4f%%, Cross entropy: %f, Validation accuracy: %.1f%% (N=%d)' % (i, |
|
train_accuracy * 100, cross_entropy_value, validation_accuracy * 100, len(validation_bottlenecks))) |
|
|
|
|
|
|
|
test_bottlenecks, test_ground_truth, test_filenames = ( |
|
get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, |
|
'testing', FLAGS.bottleneck_dir, |
|
FLAGS.image_dir, jpeg_data_tensor, |
|
bottleneck_tensor)) |
|
test_accuracy, predictions = sess.run( |
|
[evaluation_step, prediction], |
|
feed_dict={bottleneck_input: test_bottlenecks, |
|
ground_truth_input: test_ground_truth}) |
|
print('Final test accuracy = %.1f%% (N=%d)' % ( |
|
test_accuracy * 100, len(test_bottlenecks))) |
|
|
|
if FLAGS.print_misclassified_test_images: |
|
print('=== MISCLASSIFIED TEST IMAGES ===') |
|
for i, test_filename in enumerate(test_filenames): |
|
if predictions[i] != test_ground_truth[i].argmax(): |
|
print('%70s %s' % (test_filename, |
|
list(image_lists.keys())[predictions[i]])) |
|
|
|
|
|
|
|
output_graph_def = graph_util.convert_variables_to_constants( |
|
sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) |
|
with gfile.FastGFile(FLAGS.output_graph, 'wb') as f: |
|
f.write(output_graph_def.SerializeToString()) |
|
with gfile.FastGFile(FLAGS.output_labels, 'w') as f: |
|
f.write('\n'.join(image_lists.keys()) + '\n') |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
'--image_dir', |
|
type=str, |
|
default='', |
|
help='Path to folders of labeled images.' |
|
) |
|
parser.add_argument( |
|
'--output_graph', |
|
type=str, |
|
default='logs/output_graph.pb', |
|
help='Where to save the trained graph.' |
|
) |
|
parser.add_argument( |
|
'--output_labels', |
|
type=str, |
|
default='logs/output_labels.txt', |
|
help='Where to save the trained graph\'s labels.' |
|
) |
|
parser.add_argument( |
|
'--summaries_dir', |
|
type=str, |
|
default='logs/retrain_logs', |
|
help='Where to save summary logs for TensorBoard.' |
|
) |
|
parser.add_argument( |
|
'--how_many_training_steps', |
|
type=int, |
|
default=5000, |
|
help='How many training steps to run before ending.' |
|
) |
|
parser.add_argument( |
|
'--learning_rate', |
|
type=float, |
|
default=0.01, |
|
help='How large a learning rate to use when training.' |
|
) |
|
parser.add_argument( |
|
'--testing_percentage', |
|
type=int, |
|
default=10, |
|
help='What percentage of images to use as a test set.' |
|
) |
|
parser.add_argument( |
|
'--validation_percentage', |
|
type=int, |
|
default=10, |
|
help='What percentage of images to use as a validation set.' |
|
) |
|
parser.add_argument( |
|
'--eval_step_interval', |
|
type=int, |
|
default=100, |
|
help='How often to evaluate the training results.' |
|
) |
|
parser.add_argument( |
|
'--train_batch_size', |
|
type=int, |
|
default=100, |
|
help='How many images to train on at a time.' |
|
) |
|
parser.add_argument( |
|
'--test_batch_size', |
|
type=int, |
|
default=-1, |
|
help="""\ |
|
How many images to test on. This test set is only used once, to evaluate |
|
the final accuracy of the model after training completes. |
|
A value of -1 causes the entire test set to be used, which leads to more |
|
stable results across runs.\ |
|
""" |
|
) |
|
parser.add_argument( |
|
'--validation_batch_size', |
|
type=int, |
|
default=100, |
|
help="""\ |
|
How many images to use in an evaluation batch. This validation set is |
|
used much more often than the test set, and is an early indicator of how |
|
accurate the model is during training. |
|
A value of -1 causes the entire validation set to be used, which leads to |
|
more stable results across training iterations, but may be slower on large |
|
training sets.\ |
|
""" |
|
) |
|
parser.add_argument( |
|
'--print_misclassified_test_images', |
|
default=False, |
|
help="""\ |
|
Whether to print out a list of all misclassified test images.\ |
|
""", |
|
action='store_true' |
|
) |
|
parser.add_argument( |
|
'--model_dir', |
|
type=str, |
|
default='logs/imagenet', |
|
help="""\ |
|
Path to classify_image_graph_def.pb, |
|
imagenet_synset_to_human_label_map.txt, and |
|
imagenet_2012_challenge_label_map_proto.pbtxt.\ |
|
""" |
|
) |
|
parser.add_argument( |
|
'--bottleneck_dir', |
|
type=str, |
|
default='/tmp/bottleneck', |
|
help='Path to cache bottleneck layer values as files.' |
|
) |
|
parser.add_argument( |
|
'--final_tensor_name', |
|
type=str, |
|
default='final_result', |
|
help="""\ |
|
The name of the output classification layer in the retrained graph.\ |
|
""" |
|
) |
|
parser.add_argument( |
|
'--flip_left_right', |
|
default=False, |
|
help="""\ |
|
Whether to randomly flip half of the training images horizontally.\ |
|
""", |
|
action='store_true' |
|
) |
|
parser.add_argument( |
|
'--random_crop', |
|
type=int, |
|
default=0, |
|
help="""\ |
|
A percentage determining how much of a margin to randomly crop off the |
|
training images.\ |
|
""" |
|
) |
|
parser.add_argument( |
|
'--random_scale', |
|
type=int, |
|
default=0, |
|
help="""\ |
|
A percentage determining how much to randomly scale up the size of the |
|
training images by.\ |
|
""" |
|
) |
|
parser.add_argument( |
|
'--random_brightness', |
|
type=int, |
|
default=0, |
|
help="""\ |
|
A percentage determining how much to randomly multiply the training image |
|
input pixels up or down by.\ |
|
""" |
|
) |
|
FLAGS, unparsed = parser.parse_known_args() |
|
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) |
|
|