""" |
The program applies Transfer Learning to this existing model and re-trains it to classify a new set of images. |
This example shows how to take a Inception v3 architecture model trained on ImageNet images, |
and train a new top layer that can recognize other classes of images. |
You can replace the image_dir argument with any folder containing subfolders of |
images. The label for each image is taken from the name of the subfolder it's in. |
""" |
from __future__ import absolute_import |
from __future__ import division |
from __future__ import print_function |
import argparse |
import hashlib |
import os.path |
import random |
import re |
import struct |
import sys |
import tarfile |
import numpy as np |
from six.moves import urllib |
import tensorflow.compat.v1 as tf |
from tensorflow.python.framework import graph_util |
from tensorflow.python.framework import tensor_shape |
from tensorflow.python.platform import gfile |
from tensorflow.python.util import compat |
FLAGS = None |
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' |
BOTTLENECK_TENSOR_NAME = 'pool_3/_reshape:0' |
JPEG_DATA_TENSOR_NAME = 'DecodeJpeg/contents:0' |
RESIZED_INPUT_TENSOR_NAME = 'ResizeBilinear:0' |
MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 |
def create_image_lists(image_dir, testing_percentage, validation_percentage): |
""" |
Brief: |
Builds a list of training images from the file system. |
Analyzes the sub folders in the image directory, splits them into stable |
training, testing, and validation sets, and returns a data structure |
describing the lists of images for each label and their paths. |
Args: |
image_dir: String path to a folder containing subfolders of images. |
testing_percentage: Integer percentage of the images to reserve for tests. |
validation_percentage: Integer percentage of images reserved for validation. |
Returns: |
A dictionary containing an entry for each label subfolder, with images split |
into training, testing, and validation sets within each label. |
""" |
if not gfile.Exists(image_dir): |
print("Image directory '" + image_dir + "' not found.") |
return None |
result = {} |
sub_dirs = [x[0] for x in gfile.Walk(image_dir)] |
is_root_dir = True |
for sub_dir in sub_dirs: |
if is_root_dir: |
is_root_dir = False |
continue |
extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] |
file_list = [] |
dir_name = os.path.basename(sub_dir) |
if dir_name == image_dir: |
continue |
print("Looking for images in '" + dir_name + "'") |
for extension in extensions: |
file_glob = os.path.join(image_dir, dir_name, '*.' + extension) |
file_list.extend(gfile.Glob(file_glob)) |
if not file_list: |
print('No files found') |
continue |
if len(file_list) < 20: |
print('WARNING: Folder has less than 20 images, which may cause issues.') |
elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS: |
print('WARNING: Folder {} has more than {} images. Some images will ' |
'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS)) |
label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) |
training_images = [] |
testing_images = [] |
validation_images = [] |
for file_name in file_list: |
base_name = os.path.basename(file_name) |
hash_name = re.sub(r'_nohash_.*$', '', file_name) |
hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() |
percentage_hash = ((int(hash_name_hashed, 16) % |
if percentage_hash < validation_percentage: |
validation_images.append(base_name) |
elif percentage_hash < (testing_percentage + validation_percentage): |
testing_images.append(base_name) |
else: |
training_images.append(base_name) |
result[label_name] = { |
'dir': dir_name, |
'training': training_images, |
'testing': testing_images, |
'validation': validation_images, |
} |
return result |
def get_image_path(image_lists, label_name, index, image_dir, category): |
"""" |
Brief: |
Returns a path to an image for a label at the given index. |
Args: |
image_lists: Dictionary of training images for each label. |
label_name: Label string we want to get an image for. |
index: Int offset of the image we want. This will be moduloed by the |
available number of images for the label, so it can be arbitrarily large. |
image_dir: Root folder string of the subfolders containing the training images. |
category: Name string of set to pull images from - training, testing, or validation. |
Returns: |
File system path string to an image that meets the requested parameters. |
""" |
if label_name not in image_lists: |
tf.logging.fatal('Label does not exist %s.', label_name) |
label_lists = image_lists[label_name] |
if category not in label_lists: |
tf.logging.fatal('Category does not exist %s.', category) |
category_list = label_lists[category] |
if not category_list: |
tf.logging.fatal('Label %s has no images in the category %s.', label_name, category) |
mod_index = index % len(category_list) |
base_name = category_list[mod_index] |
sub_dir = label_lists['dir'] |
full_path = os.path.join(image_dir, sub_dir, base_name) |
return full_path |
def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir, category): |
"""" |
Brief: |
Returns a path to a bottleneck file for a label at the given index. |
Args: |
image_lists: Dictionary of training images for each label. |
label_name: Label string we want to get an image for. |
index: Integer offset of the image we want. This will be moduloed by the |
available number of images for the label, so it can be arbitrarily large. |
bottleneck_dir: Folder string holding cached files of bottleneck values. |
category: Name string of set to pull images from - training, testing, or validation. |
Returns: |
File system path string to an image that meets the requested parameters. |
""" |
return get_image_path(image_lists, label_name, index, bottleneck_dir, |
category) + '.txt' |
def create_inception_graph(): |
"""" |
Brief: |
Creates a graph from saved GraphDef file and returns a Graph object. |
Returns: |
Graph holding the trained Inception network, and various tensors we'll be |
manipulating. |
""" |
with tf.Graph().as_default() as graph: |
model_filename = os.path.join(FLAGS.model_dir, 'classify_image_graph_def.pb') |
with gfile.FastGFile(model_filename, 'rb') as f: |
graph_def = tf.GraphDef() |
graph_def.ParseFromString(f.read()) |
bottleneck_tensor, jpeg_data_tensor, resized_input_tensor = ( |
tf.import_graph_def(graph_def, name='', return_elements=[ |
return graph, bottleneck_tensor, jpeg_data_tensor, resized_input_tensor |
def run_bottleneck_on_image(sess, image_data, image_data_tensor, bottleneck_tensor): |
"""" |
Brief: |
Runs inference on an image to extract the 'bottleneck' summary layer. |
Args: |
sess: Current active TensorFlow Session. |
image_data: String of raw JPEG data. |
image_data_tensor: Input data layer in the graph. |
bottleneck_tensor: Layer before the final softmax. |
Returns: |
Numpy array of bottleneck values. |
""" |
bottleneck_values = sess.run( |
bottleneck_tensor, |
{image_data_tensor: image_data}) |
bottleneck_values = np.squeeze(bottleneck_values) |
return bottleneck_values |
def maybe_download_and_extract(): |
""" |
Brief: |
Download and extract model tar file. |
If the pretrained model we're using doesn't already exist, this function |
downloads it from the TensorFlow.org website and unpacks it into a directory. |
""" |
dest_directory = FLAGS.model_dir |
if not os.path.exists(dest_directory): |
os.makedirs(dest_directory) |
filename = DATA_URL.split('/')[-1] |
filepath = os.path.join(dest_directory, filename) |
if not os.path.exists(filepath): |
def _progress(count, block_size, total_size): |
sys.stdout.write('\r>> Downloading %s %.1f%%' % |
(filename, |
float(count * block_size) / float(total_size) * 100.0)) |
sys.stdout.flush() |
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) |
print() |
statinfo = os.stat(filepath) |
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') |
tarfile.open(filepath, 'r:gz').extractall(dest_directory) |
def ensure_dir_exists(dir_name): |
""" |
Brief: |
Makes sure the folder exists on disk. |
Args: |
dir_name: Path string to the folder we want to create. |
""" |
if not os.path.exists(dir_name): |
os.makedirs(dir_name) |
def write_list_of_floats_to_file(list_of_floats, file_path): |
""" |
Brief: |
Writes a given list of floats to a binary file. |
Args: |
list_of_floats: List of floats we want to write to a file. |
file_path: Path to a file where list of floats will be stored. |
""" |
s = struct.pack('d' * BOTTLENECK_TENSOR_SIZE, *list_of_floats) |
with open(file_path, 'wb') as f: |
f.write(s) |
def read_list_of_floats_from_file(file_path): |
""" |
Brief: |
Reads list of floats from a given file. |
Args: |
file_path: Path to a file where list of floats was stored. |
Returns: |
Array of bottleneck values (list of floats). |
""" |
with open(file_path, 'rb') as f: |
s = struct.unpack('d' * BOTTLENECK_TENSOR_SIZE, f.read()) |
return list(s) |
bottleneck_path_2_bottleneck_values = {} |
def create_bottleneck_file(bottleneck_path, image_lists, label_name, index, |
image_dir, category, sess, jpeg_data_tensor, |
bottleneck_tensor): |
"""Create a single bottleneck file.""" |
print('Creating bottleneck at ' + bottleneck_path) |
image_path = get_image_path(image_lists, label_name, index, |
image_dir, category) |
if not gfile.Exists(image_path): |
tf.logging.fatal('File does not exist %s', image_path) |
image_data = gfile.FastGFile(image_path, 'rb').read() |
try: |
bottleneck_values = run_bottleneck_on_image( |
sess, image_data, jpeg_data_tensor, bottleneck_tensor) |
except: |
raise RuntimeError('Error during processing file %s' % image_path) |
bottleneck_string = ','.join(str(x) for x in bottleneck_values) |
with open(bottleneck_path, 'w') as bottleneck_file: |
bottleneck_file.write(bottleneck_string) |
def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir, |
category, bottleneck_dir, jpeg_data_tensor, |
bottleneck_tensor): |
""" |
Brief: |
Retrieves or calculates bottleneck values for an image. |
If a cached version of the bottleneck data exists on-disk, return that, |
otherwise calculate the data and save it to disk for future use. |
Args: |
sess: The current active TensorFlow Session. |
image_lists: Dictionary of training images for each label. |
label_name: Label string we want to get an image for. |
index: Integer offset of the image we want. This will be modulo-ed by the |
available number of images for the label, so it can be arbitrarily large. |
image_dir: Root folder string of the subfolders containing the training |
images. |
category: Name string of which set to pull images from - training, testing, |
or validation. |
bottleneck_dir: Folder string holding cached files of bottleneck values. |
jpeg_data_tensor: The tensor to feed loaded jpeg data into. |
bottleneck_tensor: The output tensor for the bottleneck values. |
Returns: |
Numpy array of values produced by the bottleneck layer for the image. |
""" |
label_lists = image_lists[label_name] |
sub_dir = label_lists['dir'] |
sub_dir_path = os.path.join(bottleneck_dir, sub_dir) |
ensure_dir_exists(sub_dir_path) |
bottleneck_path = get_bottleneck_path(image_lists, label_name, index, |
bottleneck_dir, category) |
if not os.path.exists(bottleneck_path): |
create_bottleneck_file(bottleneck_path, image_lists, label_name, index, |
image_dir, category, sess, jpeg_data_tensor, |
bottleneck_tensor) |
with open(bottleneck_path, 'r') as bottleneck_file: |
bottleneck_string = bottleneck_file.read() |
did_hit_error = False |
try: |
bottleneck_values = [float(x) for x in bottleneck_string.split(',')] |
except ValueError: |
print('Invalid float found, recreating bottleneck') |
did_hit_error = True |
if did_hit_error: |
create_bottleneck_file(bottleneck_path, image_lists, label_name, index, |
image_dir, category, sess, jpeg_data_tensor, |
bottleneck_tensor) |
with open(bottleneck_path, 'r') as bottleneck_file: |
bottleneck_string = bottleneck_file.read() |
bottleneck_values = [float(x) for x in bottleneck_string.split(',')] |
return bottleneck_values |
def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir, |
jpeg_data_tensor, bottleneck_tensor): |
""" |
Brief: |
Ensures all the training, testing, and validation bottlenecks are cached. |
Because we're likely to read the same image multiple times (if there are no |
distortions applied during training) it can speed things up a lot if we |
calculate the bottleneck layer values once for each image during |
preprocessing, and then just read those cached values repeatedly during |
training. Here we go through all the images we've found, calculate those |
values, and save them off. |
Args: |
sess: The current active TensorFlow Session. |
image_lists: Dictionary of training images for each label. |
image_dir: Root folder string of the subfolders containing the training |
images. |
bottleneck_dir: Folder string holding cached files of bottleneck values. |
jpeg_data_tensor: Input tensor for jpeg data from file. |
bottleneck_tensor: The penultimate output layer of the graph. |
Returns: |
Nothing. |
""" |
how_many_bottlenecks = 0 |
ensure_dir_exists(bottleneck_dir) |
for label_name, label_lists in image_lists.items(): |
for category in ['training', 'testing', 'validation']: |
category_list = label_lists[category] |
for index, unused_base_name in enumerate(category_list): |
get_or_create_bottleneck(sess, image_lists, label_name, index, |
image_dir, category, bottleneck_dir, |
jpeg_data_tensor, bottleneck_tensor) |
how_many_bottlenecks += 1 |
if how_many_bottlenecks % 100 == 0: |
print(str(how_many_bottlenecks) + ' bottleneck files created.') |
def get_random_cached_bottlenecks(sess, image_lists, how_many, category, |
bottleneck_dir, image_dir, jpeg_data_tensor, |
bottleneck_tensor): |
""" |
Brief: |
Retrieves bottleneck values for cached images. |
If no distortions are being applied, this function can retrieve the cached |
bottleneck values directly from disk for images. It picks a random set of |
images from the specified category. |
Args: |
sess: Current TensorFlow Session. |
image_lists: Dictionary of training images for each label. |
how_many: If positive, a random sample of this size will be chosen. |
If negative, all bottlenecks will be retrieved. |
category: Name string of which set to pull from - training, testing, or |
validation. |
bottleneck_dir: Folder string holding cached files of bottleneck values. |
image_dir: Root folder string of the subfolders containing the training |
images. |
jpeg_data_tensor: The layer to feed jpeg image data into. |
bottleneck_tensor: The bottleneck output layer of the CNN graph. |
Returns: |
List of bottleneck arrays, their corresponding ground truths, and the |
relevant filenames. |
""" |
class_count = len(image_lists.keys()) |
bottlenecks = [] |
ground_truths = [] |
filenames = [] |
if how_many >= 0: |
for unused_i in range(how_many): |
label_index = random.randrange(class_count) |
label_name = list(image_lists.keys())[label_index] |
image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) |
image_name = get_image_path(image_lists, label_name, image_index, |
image_dir, category) |
bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, |
image_index, image_dir, category, |
bottleneck_dir, jpeg_data_tensor, |
bottleneck_tensor) |
ground_truth = np.zeros(class_count, dtype=np.float32) |
ground_truth[label_index] = 1.0 |
bottlenecks.append(bottleneck) |
ground_truths.append(ground_truth) |
filenames.append(image_name) |
else: |
for label_index, label_name in enumerate(image_lists.keys()): |
for image_index, image_name in enumerate( |
image_lists[label_name][category]): |
image_name = get_image_path(image_lists, label_name, image_index, |
image_dir, category) |
bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, |
image_index, image_dir, category, |
bottleneck_dir, jpeg_data_tensor, |
bottleneck_tensor) |
ground_truth = np.zeros(class_count, dtype=np.float32) |
ground_truth[label_index] = 1.0 |
bottlenecks.append(bottleneck) |
ground_truths.append(ground_truth) |
filenames.append(image_name) |
return bottlenecks, ground_truths, filenames |
def get_random_distorted_bottlenecks( |
sess, image_lists, how_many, category, image_dir, input_jpeg_tensor, |
distorted_image, resized_input_tensor, bottleneck_tensor): |
""" |
Brief: |
Retrieves bottleneck values for training images, after distortions. |
If we're training with distortions like crops, scales, or flips, we have to |
recalculate the full model for every image, and so we can't use cached |
bottleneck values. Instead we find random images for the requested category, |
run them through the distortion graph, and then the full graph to get the |
bottleneck results for each. |
Args: |
sess: Current TensorFlow Session. |
image_lists: Dictionary of training images for each label. |
how_many: The integer number of bottleneck values to return. |
category: Name string of which set of images to fetch - training, testing, |
or validation. |
image_dir: Root folder string of the subfolders containing the training |
images. |
input_jpeg_tensor: The input layer we feed the image data to. |
distorted_image: The output node of the distortion graph. |
resized_input_tensor: The input node of the recognition graph. |
bottleneck_tensor: The bottleneck output layer of the CNN graph. |
Returns: |
List of bottleneck arrays and their corresponding ground truths. |
""" |
class_count = len(image_lists.keys()) |
bottlenecks = [] |
ground_truths = [] |
for unused_i in range(how_many): |
label_index = random.randrange(class_count) |
label_name = list(image_lists.keys())[label_index] |
image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) |
image_path = get_image_path(image_lists, label_name, image_index, image_dir, |
category) |
if not gfile.Exists(image_path): |
tf.logging.fatal('File does not exist %s', image_path) |
jpeg_data = gfile.FastGFile(image_path, 'rb').read() |
distorted_image_data = sess.run(distorted_image, |
{input_jpeg_tensor: jpeg_data}) |
bottleneck = run_bottleneck_on_image(sess, distorted_image_data, |
resized_input_tensor, |
bottleneck_tensor) |
ground_truth = np.zeros(class_count, dtype=np.float32) |
ground_truth[label_index] = 1.0 |
bottlenecks.append(bottleneck) |
ground_truths.append(ground_truth) |
return bottlenecks, ground_truths |
def should_distort_images(flip_left_right, random_crop, random_scale, |
random_brightness): |
""" |
Brief: |
Whether any distortions are enabled, from the input flags. |
Args: |
flip_left_right: Boolean whether to randomly mirror images horizontally. |
random_crop: Integer percentage setting the total margin used around the |
crop box. |
random_scale: Integer percentage of how much to vary the scale by. |
random_brightness: Integer range to randomly multiply the pixel values by. |
Returns: |
Boolean value indicating whether any distortions should be applied. |
""" |
return (flip_left_right or (random_crop != 0) or (random_scale != 0) or |
(random_brightness != 0)) |
def add_input_distortions(flip_left_right, random_crop, random_scale, |
random_brightness): |
""" |
Brief: |
Creates the operations to apply the specified distortions. |
During training it can help to improve the results if we run the images |
through simple distortions like crops, scales, and flips. These reflect the |
kind of variations we expect in the real world, and so can help train the |
model to cope with natural data more effectively. Here we take the supplied |
parameters and construct a network of operations to apply them to an image. |
Cropping |
Cropping is done by placing a bounding box at a random position in the full |
image. The cropping parameter controls the size of that box relative to the |
input image. If it's zero, then the box is the same size as the input and no |
cropping is performed. If the value is 50%, then the crop box will be half the |
width and height of the input. In a diagram it looks like this: |
< width > |
+---------------------+ |
| | |
| width - crop% | |
| < > | |
| +------+ | |
| | | | |
| | | | |
| | | | |
| +------+ | |
| | |
| | |
+---------------------+ |
Scaling |
Scaling is a lot like cropping, except that the bounding box is always |
centered and its size varies randomly within the given range. For example if |
the scale percentage is zero, then the bounding box is the same size as the |
input and no scaling is applied. If it's 50%, then the bounding box will be in |
a random range between half the width and height and full size. |
Args: |
flip_left_right: Boolean whether to randomly mirror images horizontally. |
random_crop: Integer percentage setting the total margin used around the |
crop box. |
random_scale: Integer percentage of how much to vary the scale by. |
random_brightness: Integer range to randomly multiply the pixel values by. |
graph. |
Returns: |
The jpeg input layer and the distorted result tensor. |
""" |
jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') |
decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH) |
decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) |
decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) |
margin_scale = 1.0 + (random_crop / 100.0) |
resize_scale = 1.0 + (random_scale / 100.0) |
margin_scale_value = tf.constant(margin_scale) |
resize_scale_value = tf.random_uniform(tensor_shape.scalar(), |
minval=1.0, |
maxval=resize_scale) |
scale_value = tf.multiply(margin_scale_value, resize_scale_value) |
precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH) |
precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT) |
precrop_shape = tf.stack([precrop_height, precrop_width]) |
precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) |
precropped_image = tf.image.resize_bilinear(decoded_image_4d, |
precrop_shape_as_int) |
precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) |
cropped_image = tf.random_crop(precropped_image_3d, |
if flip_left_right: |
flipped_image = tf.image.random_flip_left_right(cropped_image) |
else: |
flipped_image = cropped_image |
brightness_min = 1.0 - (random_brightness / 100.0) |
brightness_max = 1.0 + (random_brightness / 100.0) |
brightness_value = tf.random_uniform(tensor_shape.scalar(), |
minval=brightness_min, |
maxval=brightness_max) |
brightened_image = tf.multiply(flipped_image, brightness_value) |
distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') |
return jpeg_data, distort_result |
def variable_summaries(var): |
"""Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" |
with tf.name_scope('summaries'): |
mean = tf.reduce_mean(var) |
tf.summary.scalar('mean', mean) |
with tf.name_scope('stddev'): |
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) |
tf.summary.scalar('stddev', stddev) |
tf.summary.scalar('max', tf.reduce_max(var)) |
tf.summary.scalar('min', tf.reduce_min(var)) |
tf.summary.histogram('histogram', var) |
def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor): |
""" |
Brief: |
Adds a new softmax and fully-connected layer for training. |
We need to retrain the top layer to identify our new classes, so this function |
adds the right operations to the graph, along with some variables to hold the |
weights, and then sets up all the gradients for the backward pass. |
The set up for the softmax and fully-connected layers is based on: |
https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html |
Args: |
class_count: Integer of how many categories of things we're trying to |
recognize. |
final_tensor_name: Name string for the new final node that produces results. |
bottleneck_tensor: The output of the main CNN graph. |
Returns: |
The tensors for the training and cross entropy results, and tensors for the |
bottleneck input and ground truth input. |
""" |
with tf.name_scope('input'): |
bottleneck_input = tf.placeholder_with_default( |
bottleneck_tensor, shape=[None, BOTTLENECK_TENSOR_SIZE], |
name='BottleneckInputPlaceholder') |
ground_truth_input = tf.placeholder(tf.float32, |
[None, class_count], |
name='GroundTruthInput') |
layer_name = 'final_training_ops' |
with tf.name_scope(layer_name): |
with tf.name_scope('weights'): |
initial_value = tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, class_count], |
stddev=0.001) |
layer_weights = tf.Variable(initial_value, name='final_weights') |
variable_summaries(layer_weights) |
with tf.name_scope('biases'): |
layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') |
variable_summaries(layer_biases) |
with tf.name_scope('Wx_plus_b'): |
logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases |
tf.summary.histogram('pre_activations', logits) |
final_tensor = tf.nn.softmax(logits, name=final_tensor_name) |
tf.summary.histogram('activations', final_tensor) |
with tf.name_scope('cross_entropy'): |
cross_entropy = tf.nn.softmax_cross_entropy_with_logits( |
labels=ground_truth_input, logits=logits) |
with tf.name_scope('total'): |
cross_entropy_mean = tf.reduce_mean(cross_entropy) |
tf.summary.scalar('cross_entropy', cross_entropy_mean) |
with tf.name_scope('train'): |
optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) |
train_step = optimizer.minimize(cross_entropy_mean) |
return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, |
final_tensor) |
def add_evaluation_step(result_tensor, ground_truth_tensor): |
""" |
Brief: |
Inserts the operations we need to evaluate the accuracy of our results. |
Args: |
result_tensor: The new final node that produces results. |
ground_truth_tensor: The node we feed ground truth data |
into. |
Returns: |
Tuple of (evaluation step, prediction). |
""" |
with tf.name_scope('accuracy'): |
with tf.name_scope('correct_prediction'): |
prediction = tf.argmax(result_tensor, 1) |
correct_prediction = tf.equal( |
prediction, tf.argmax(ground_truth_tensor, 1)) |
with tf.name_scope('accuracy'): |
evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) |
tf.summary.scalar('accuracy', evaluation_step) |
return evaluation_step, prediction |
def main(_): |
if tf.gfile.Exists(FLAGS.summaries_dir): |
tf.gfile.DeleteRecursively(FLAGS.summaries_dir) |
tf.gfile.MakeDirs(FLAGS.summaries_dir) |
maybe_download_and_extract() |
graph, bottleneck_tensor, jpeg_data_tensor, resized_image_tensor = ( |
create_inception_graph()) |
image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, |
FLAGS.validation_percentage) |
class_count = len(image_lists.keys()) |
if class_count == 0: |
print('No valid folders of images found at ' + FLAGS.image_dir) |
return -1 |
if class_count == 1: |
print('Only one valid folder of images found at ' + FLAGS.image_dir + |
' - multiple classes are needed for classification.') |
return -1 |
do_distort_images = should_distort_images( |
FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, |
FLAGS.random_brightness) |
with tf.Session(graph=graph) as sess: |
if do_distort_images: |
(distorted_jpeg_data_tensor, |
distorted_image_tensor) = add_input_distortions( |
FLAGS.flip_left_right, FLAGS.random_crop, |
FLAGS.random_scale, FLAGS.random_brightness) |
else: |
cache_bottlenecks(sess, image_lists, FLAGS.image_dir, |
FLAGS.bottleneck_dir, jpeg_data_tensor, |
bottleneck_tensor) |
(train_step, cross_entropy, bottleneck_input, ground_truth_input, |
final_tensor) = add_final_training_ops(len(image_lists.keys()), |
FLAGS.final_tensor_name, |
bottleneck_tensor) |
evaluation_step, prediction = add_evaluation_step( |
final_tensor, ground_truth_input) |
merged = tf.summary.merge_all() |
train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', |
sess.graph) |
validation_writer = tf.summary.FileWriter( |
FLAGS.summaries_dir + '/validation') |
init = tf.global_variables_initializer() |
sess.run(init) |
for i in range(FLAGS.how_many_training_steps): |
if do_distort_images: |
(train_bottlenecks, |
train_ground_truth) = get_random_distorted_bottlenecks( |
sess, image_lists, FLAGS.train_batch_size, 'training', |
FLAGS.image_dir, distorted_jpeg_data_tensor, |
distorted_image_tensor, resized_image_tensor, bottleneck_tensor) |
else: |
(train_bottlenecks, |
train_ground_truth, _) = get_random_cached_bottlenecks( |
sess, image_lists, FLAGS.train_batch_size, 'training', |
FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, |
bottleneck_tensor) |
train_summary, _ = sess.run( |
[merged, train_step], |
feed_dict={bottleneck_input: train_bottlenecks, |
ground_truth_input: train_ground_truth}) |
train_writer.add_summary(train_summary, i) |
is_last_step = (i + 1 == FLAGS.how_many_training_steps) |
if (i % FLAGS.eval_step_interval) == 0 or is_last_step: |
train_accuracy, cross_entropy_value = sess.run( |
[evaluation_step, cross_entropy], |
feed_dict={bottleneck_input: train_bottlenecks, |
ground_truth_input: train_ground_truth}) |
validation_bottlenecks, validation_ground_truth, _ = ( |
get_random_cached_bottlenecks( |
sess, image_lists, FLAGS.validation_batch_size, 'validation', |
FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, |
bottleneck_tensor)) |
validation_summary, validation_accuracy = sess.run( |
[merged, evaluation_step], |
feed_dict={bottleneck_input: validation_bottlenecks, |
ground_truth_input: validation_ground_truth}) |
validation_writer.add_summary(validation_summary, i) |
print('Step: %d, Train accuracy: %.4f%%, Cross entropy: %f, Validation accuracy: %.1f%% (N=%d)' % (i, |
train_accuracy * 100, cross_entropy_value, validation_accuracy * 100, len(validation_bottlenecks))) |
test_bottlenecks, test_ground_truth, test_filenames = ( |
get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, |
'testing', FLAGS.bottleneck_dir, |
FLAGS.image_dir, jpeg_data_tensor, |
bottleneck_tensor)) |
test_accuracy, predictions = sess.run( |
[evaluation_step, prediction], |
feed_dict={bottleneck_input: test_bottlenecks, |
ground_truth_input: test_ground_truth}) |
print('Final test accuracy = %.1f%% (N=%d)' % ( |
test_accuracy * 100, len(test_bottlenecks))) |
if FLAGS.print_misclassified_test_images: |
for i, test_filename in enumerate(test_filenames): |
if predictions[i] != test_ground_truth[i].argmax(): |
print('%70s %s' % (test_filename, |
list(image_lists.keys())[predictions[i]])) |
output_graph_def = graph_util.convert_variables_to_constants( |
sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) |
with gfile.FastGFile(FLAGS.output_graph, 'wb') as f: |
f.write(output_graph_def.SerializeToString()) |
with gfile.FastGFile(FLAGS.output_labels, 'w') as f: |
f.write('\n'.join(image_lists.keys()) + '\n') |
if __name__ == '__main__': |
parser = argparse.ArgumentParser() |
parser.add_argument( |
'--image_dir', |
type=str, |
default='', |
help='Path to folders of labeled images.' |
) |
parser.add_argument( |
'--output_graph', |
type=str, |
default='logs/output_graph.pb', |
help='Where to save the trained graph.' |
) |
parser.add_argument( |
'--output_labels', |
type=str, |
default='logs/output_labels.txt', |
help='Where to save the trained graph\'s labels.' |
) |
parser.add_argument( |
'--summaries_dir', |
type=str, |
default='logs/retrain_logs', |
help='Where to save summary logs for TensorBoard.' |
) |
parser.add_argument( |
'--how_many_training_steps', |
type=int, |
default=5000, |
help='How many training steps to run before ending.' |
) |
parser.add_argument( |
'--learning_rate', |
type=float, |
default=0.01, |
help='How large a learning rate to use when training.' |
) |
parser.add_argument( |
'--testing_percentage', |
type=int, |
default=10, |
help='What percentage of images to use as a test set.' |
) |
parser.add_argument( |
'--validation_percentage', |
type=int, |
default=10, |
help='What percentage of images to use as a validation set.' |
) |
parser.add_argument( |
'--eval_step_interval', |
type=int, |
default=100, |
help='How often to evaluate the training results.' |
) |
parser.add_argument( |
'--train_batch_size', |
type=int, |
default=100, |
help='How many images to train on at a time.' |
) |
parser.add_argument( |
'--test_batch_size', |
type=int, |
default=-1, |
help="""\ |
How many images to test on. This test set is only used once, to evaluate |
the final accuracy of the model after training completes. |
A value of -1 causes the entire test set to be used, which leads to more |
stable results across runs.\ |
""" |
) |
parser.add_argument( |
'--validation_batch_size', |
type=int, |
default=100, |
help="""\ |
How many images to use in an evaluation batch. This validation set is |
used much more often than the test set, and is an early indicator of how |
accurate the model is during training. |
A value of -1 causes the entire validation set to be used, which leads to |
more stable results across training iterations, but may be slower on large |
training sets.\ |
""" |
) |
parser.add_argument( |
'--print_misclassified_test_images', |
default=False, |
help="""\ |
Whether to print out a list of all misclassified test images.\ |
""", |
action='store_true' |
) |
parser.add_argument( |
'--model_dir', |
type=str, |
default='logs/imagenet', |
help="""\ |
Path to classify_image_graph_def.pb, |
imagenet_synset_to_human_label_map.txt, and |
imagenet_2012_challenge_label_map_proto.pbtxt.\ |
""" |
) |
parser.add_argument( |
'--bottleneck_dir', |
type=str, |
default='/tmp/bottleneck', |
help='Path to cache bottleneck layer values as files.' |
) |
parser.add_argument( |
'--final_tensor_name', |
type=str, |
default='final_result', |
help="""\ |
The name of the output classification layer in the retrained graph.\ |
""" |
) |
parser.add_argument( |
'--flip_left_right', |
default=False, |
help="""\ |
Whether to randomly flip half of the training images horizontally.\ |
""", |
action='store_true' |
) |
parser.add_argument( |
'--random_crop', |
type=int, |
default=0, |
help="""\ |
A percentage determining how much of a margin to randomly crop off the |
training images.\ |
""" |
) |
parser.add_argument( |
'--random_scale', |
type=int, |
default=0, |
help="""\ |
A percentage determining how much to randomly scale up the size of the |
training images by.\ |
""" |
) |
parser.add_argument( |
'--random_brightness', |
type=int, |
default=0, |
help="""\ |
A percentage determining how much to randomly multiply the training image |
input pixels up or down by.\ |
""" |
) |
FLAGS, unparsed = parser.parse_known_args() |
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) |