Spaces:

karolmajek
/

Axial-DeepLab-SWideRNet

Runtime error

App Files Files Community

Axial-DeepLab-SWideRNet / data /preprocessing /input_preprocessing.py

karolmajek

from https://huggingface.co/spaces/akhaliq/deeplab2

d1843be over 3 years ago

raw

history blame contribute delete

12.8 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""This file contains functions to preprocess images and labels."""

	import tensorflow as tf

	from deeplab2.data.preprocessing import autoaugment_utils
	from deeplab2.data.preprocessing import preprocess_utils

	# The probability of flipping the images and labels
	# left-right during training
	_PROB_OF_FLIP = 0.5

	_MEAN_PIXEL = [127.5, 127.5, 127.5]


	def _pad_image_and_label(image, label, offset_height, offset_width,
	target_height, target_width, ignore_label=None):
	"""Pads the image and the label to the given size.

	Args:
	image: A tf.Tensor of shape [height, width, channels].
	label: A tf.Tensor of shape [height, width, 1] or None.
	offset_height: The number of rows of zeros to add on top of the image and
	label.
	offset_width: The number of columns of zeros to add on the left of the image
	and label.
	target_height: The total height after padding.
	target_width: The total width after padding.
	ignore_label: The ignore_label for the label. Must only be set when label is
	given.

	Returns:
	The padded image and label as a tuple (padded_image, padded_label).

	Raises:
	tf.errors.InvalidArgumentError: An error occurs if the padding configuration
	is invalid.
	ValueError: An error occurs if label is given without an ignore_label.
	"""
	height = tf.shape(image)[0]
	width = tf.shape(image)[1]
	original_dtype = image.dtype
	if original_dtype not in (tf.float32, tf.float64):
	image = tf.cast(image, tf.float32)

	bottom_padding = target_height - offset_height - height
	right_padding = target_width - offset_width - width

	assert_bottom_padding = tf.assert_greater(
	bottom_padding, -1,
	'The padding configuration is not valid. Please either increase the '
	'target size or reduce the padding offset.')
	assert_right_padding = tf.assert_greater(
	right_padding, -1, 'The padding configuration is not valid. Please either'
	' increase the target size or reduce the padding offset.')
	with tf.control_dependencies([assert_bottom_padding, assert_right_padding]):
	paddings = [[offset_height, bottom_padding], [offset_width, right_padding],
	[0, 0]]

	image = image - _MEAN_PIXEL
	image = tf.pad(image, paddings)
	image = image + _MEAN_PIXEL
	image = tf.cast(image, original_dtype)

	if label is not None:
	if ignore_label is None:
	raise ValueError(
	'If a label is given, the ignore label must be set too.')
	label = tf.pad(label, paddings, constant_values=ignore_label)

	return image, label


	def _update_max_resize_value(max_resize_value, crop_size, is_inference=False):
	"""Checks and may update max_resize_value.

	Args:
	max_resize_value: A 2-tuple of (height, width), maximum allowed value
	after resize. If a single element is given, then height and width
	share the same value. None, empty or having 0 indicates no maximum value
	will be used.
	crop_size: A 2-tuple of (height, width), crop size used.
	is_inference: Boolean, whether the model is performing inference or not.

	Returns:
	Updated max_resize_value.
	"""
	max_resize_value = preprocess_utils.process_resize_value(max_resize_value)
	if max_resize_value is None and is_inference:
	# During inference, default max_resize_value to crop size to allow
	# model taking input images with larger sizes.
	max_resize_value = crop_size

	if max_resize_value is None:
	return None

	if max_resize_value[0] > crop_size[0] or max_resize_value[1] > crop_size[1]:
	raise ValueError(
	'Maximum resize value provided (%s) exceeds model crop size (%s)' %
	(max_resize_value, crop_size))
	return max_resize_value


	def preprocess_image_and_label(image,
	label,
	crop_height,
	crop_width,
	prev_image=None,
	prev_label=None,
	min_resize_value=None,
	max_resize_value=None,
	resize_factor=None,
	min_scale_factor=1.,
	max_scale_factor=1.,
	scale_factor_step_size=0,
	ignore_label=None,
	is_training=True,
	autoaugment_policy_name=None):
	"""Preprocesses the image and label.

	Args:
	image: A tf.Tensor containing the image with shape [height, width, 3].
	label: A tf.Tensor containing the label with shape [height, width, 1] or
	None.
	crop_height: The height value used to crop the image and label.
	crop_width: The width value used to crop the image and label.
	prev_image: An optional tensor of shape [image_height, image_width, 3].
	prev_label: An optional tensor of shape [label_height, label_width, 1].
	min_resize_value: A 2-tuple of (height, width), desired minimum value
	after resize. If a single element is given, then height and width share
	the same value. None, empty or having 0 indicates no minimum value will
	be used.
	max_resize_value: A 2-tuple of (height, width), maximum allowed value
	after resize. If a single element is given, then height and width
	share the same value. None, empty or having 0 indicates no maximum value
	will be used.
	resize_factor: Resized dimensions are multiple of factor plus one.
	min_scale_factor: Minimum scale factor for random scale augmentation.
	max_scale_factor: Maximum scale factor for random scale augmentation.
	scale_factor_step_size: The step size from min scale factor to max scale
	factor. The input is randomly scaled based on the value of
	(min_scale_factor, max_scale_factor, scale_factor_step_size).
	ignore_label: The label value which will be ignored for training and
	evaluation.
	is_training: If the preprocessing is used for training or not.
	autoaugment_policy_name: String, autoaugment policy name. See
	autoaugment_policy.py for available policies.

	Returns:
	resized_image: The resized input image without other augmentations as a
	tf.Tensor.
	processed_image: The preprocessed image as a tf.Tensor.
	label: The preprocessed groundtruth segmentation label as a tf.Tensor.

	Raises:
	ValueError: Ground truth label not provided during training.
	"""
	if is_training and label is None:
	raise ValueError('During training, label must be provided.')

	image.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 3]))

	# Keep reference to original image.
	resized_image = image
	if prev_image is not None:
	image = tf.concat([image, prev_image], axis=2)
	processed_image = tf.cast(image, tf.float32)
	processed_prev_image = None

	if label is not None:
	label.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 1]))
	if prev_label is not None:
	label = tf.concat([label, prev_label], axis=2)
	label = tf.cast(label, tf.int32)

	# Resize image and label to the desired range.
	if any([min_resize_value, max_resize_value, not is_training]):
	max_resize_value = _update_max_resize_value(
	max_resize_value,
	crop_size=(crop_height, crop_width),
	is_inference=not is_training)

	processed_image, label = (
	preprocess_utils.resize_to_range(
	image=processed_image,
	label=label,
	min_size=min_resize_value,
	max_size=max_resize_value,
	factor=resize_factor,
	align_corners=True))
	if prev_image is None:
	resized_image = tf.identity(processed_image)
	else:
	resized_image, _ = tf.split(processed_image, 2, axis=2)

	if prev_image is not None:
	processed_image, processed_prev_image = tf.split(processed_image, 2, axis=2)

	if prev_label is not None:
	label, prev_label = tf.split(label, 2, axis=2)

	if not is_training:
	image_height = tf.shape(processed_image)[0]
	image_width = tf.shape(processed_image)[1]

	offset_height = 0
	offset_width = 0
	processed_image, label = _pad_image_and_label(processed_image, label,
	offset_height, offset_width,
	crop_height, crop_width,
	ignore_label)
	processed_image.set_shape([crop_height, crop_width, 3])
	if label is not None:
	label.set_shape([crop_height, crop_width, 1])
	if prev_image is not None:
	processed_prev_image, prev_label = _pad_image_and_label(
	processed_prev_image, prev_label, offset_height, offset_width,
	crop_height, crop_width, ignore_label)
	processed_prev_image.set_shape([crop_height, crop_width, 3])
	if prev_label is not None:
	prev_label.set_shape([crop_height, crop_width, 1])
	return (resized_image, processed_image, label, processed_prev_image,
	prev_label)

	# Data augmentation by randomly scaling the inputs.
	scale = preprocess_utils.get_random_scale(
	min_scale_factor, max_scale_factor, scale_factor_step_size)
	processed_image, label = preprocess_utils.randomly_scale_image_and_label(
	processed_image, label, scale)
	if processed_prev_image is not None:
	(processed_prev_image,
	prev_label) = preprocess_utils.randomly_scale_image_and_label(
	processed_prev_image, prev_label, scale)

	# Apply autoaugment if any.
	if autoaugment_policy_name:
	processed_image, label = _autoaugment_helper(
	processed_image, label, ignore_label, autoaugment_policy_name)
	if processed_prev_image is not None:
	processed_prev_image, prev_label = _autoaugment_helper(
	processed_prev_image, prev_label, ignore_label,
	autoaugment_policy_name)

	# Pad image and label to have dimensions >= [crop_height, crop_width].
	image_height = tf.shape(processed_image)[0]
	image_width = tf.shape(processed_image)[1]
	target_height = image_height + tf.maximum(crop_height - image_height, 0)
	target_width = image_width + tf.maximum(crop_width - image_width, 0)

	# Randomly crop the image and label.
	def _uniform_offset(margin):
	return tf.random.uniform(
	[], minval=0, maxval=tf.maximum(margin, 1), dtype=tf.int32)

	offset_height = _uniform_offset(crop_height - image_height)
	offset_width = _uniform_offset(crop_width - image_width)
	processed_image, label = _pad_image_and_label(processed_image, label,
	offset_height, offset_width,
	target_height, target_width,
	ignore_label)
	if processed_prev_image is not None:
	processed_prev_image, prev_label = _pad_image_and_label(
	processed_prev_image, prev_label, offset_height, offset_width,
	target_height, target_width, ignore_label)

	if processed_prev_image is not None:
	(processed_image, label, processed_prev_image,
	prev_label) = preprocess_utils.random_crop(
	[processed_image, label, processed_prev_image, prev_label],
	crop_height, crop_width)
	# Randomly left-right flip the image and label.
	(processed_image, label, processed_prev_image, prev_label,
	_) = preprocess_utils.flip_dim(
	[processed_image, label, processed_prev_image, prev_label],
	_PROB_OF_FLIP,
	dim=1)
	else:
	processed_image, label = preprocess_utils.random_crop(
	[processed_image, label], crop_height, crop_width)
	# Randomly left-right flip the image and label.
	processed_image, label, _ = preprocess_utils.flip_dim(
	[processed_image, label], _PROB_OF_FLIP, dim=1)

	return resized_image, processed_image, label, processed_prev_image, prev_label


	def _autoaugment_helper(image, label, ignore_label, policy_name):
	image = tf.cast(image, tf.uint8)
	label = tf.cast(label, tf.int32)
	image, label = autoaugment_utils.distort_image_with_autoaugment(
	image, label, ignore_label, policy_name)
	image = tf.cast(image, tf.float32)
	return image, label