Spaces:

karolmajek
/

Axial-DeepLab-SWideRNet

Runtime error

App Files Files Community

Axial-DeepLab-SWideRNet / data /build_step_data.py

karolmajek

from https://huggingface.co/spaces/akhaliq/deeplab2

d1843be about 3 years ago

raw

history blame contribute delete

10.7 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	r"""Converts STEP (KITTI-STEP or MOTChallenge-STEP) data to sharded TFRecord file format with tf.train.Example protos.

	The expected directory structure of the STEP dataset should be as follows:

	+ {KITTI \| MOTChallenge}-STEP
	+ images
	+ train
	+ sequence_id
	- *.{png\|jpg}
	...
	+ val
	+ test
	+ panoptic_maps
	+ train
	+ sequence_id
	- *.png
	...
	+ val

	The ground-truth panoptic map is encoded as the following in PNG format:

	R: semantic_id
	G: instance_id // 256
	B: instance % 256

	See ./utils/create_step_panoptic_maps.py for more details of how we create the
	panoptic map by merging semantic and instance maps.

	The output Example proto contains the following fields:

	image/encoded: encoded image content.
	image/filename: image filename.
	image/format: image file format.
	image/height: image height.
	image/width: image width.
	image/channels: image channels.
	image/segmentation/class/encoded: encoded panoptic segmentation content.
	image/segmentation/class/format: segmentation encoding format.
	video/sequence_id: sequence ID of the frame.
	video/frame_id: ID of the frame of the video sequence.

	The output panoptic segmentation map stored in the Example will be the raw bytes
	of an int32 panoptic map, where each pixel is assigned to a panoptic ID:

	panoptic ID = semantic ID * label divisor (1000) + instance ID

	where semantic ID will be the same with `category_id` (use TrainId) for
	each segment, and ignore label for pixels not belong to any segment.

	The instance ID will be 0 for pixels belonging to
	1) `stuff` class
	2) `thing` class with `iscrowd` label
	3) pixels with ignore label
	and [1, label divisor) otherwise.

	Example to run the scipt:

	python deeplab2/data/build_step_data.py \
	--step_root=${STEP_ROOT} \
	--output_dir=${OUTPUT_DIR}
	"""

	import math
	import os

	from typing import Iterator, Sequence, Tuple, Optional

	from absl import app
	from absl import flags
	from absl import logging
	import numpy as np

	from PIL import Image

	import tensorflow as tf

	from deeplab2.data import data_utils

	FLAGS = flags.FLAGS

	flags.DEFINE_string('step_root', None, 'STEP dataset root folder.')

	flags.DEFINE_string('output_dir', None,
	'Path to save converted TFRecord of TensorFlow examples.')
	flags.DEFINE_bool(
	'use_two_frames', False, 'Flag to separate between 1 frame '
	'per TFExample or 2 consecutive frames per TFExample.')

	_PANOPTIC_LABEL_FORMAT = 'raw'
	_NUM_SHARDS = 10
	_IMAGE_FOLDER_NAME = 'images'
	_PANOPTIC_MAP_FOLDER_NAME = 'panoptic_maps'
	_LABEL_MAP_FORMAT = 'png'
	_INSTANCE_LABEL_DIVISOR = 1000
	_ENCODED_INSTANCE_LABEL_DIVISOR = 256
	_TF_RECORD_PATTERN = '%s-%05d-of-%05d.tfrecord'
	_FRAME_ID_PATTERN = '%06d'


	def _get_image_info_from_path(image_path: str) -> Tuple[str, str]:
	"""Gets image info including sequence id and image id.

	Image path is in the format of '.../split/sequence_id/image_id.png',
	where `sequence_id` refers to the id of the video sequence, and `image_id` is
	the id of the image in the video sequence.

	Args:
	image_path: Absolute path of the image.

	Returns:
	sequence_id, and image_id as strings.
	"""
	sequence_id = image_path.split('/')[-2]
	image_id = os.path.splitext(os.path.basename(image_path))[0]
	return sequence_id, image_id


	def _get_images_per_shard(step_root: str, dataset_split: str,
	sharded_by_sequence: bool) -> Iterator[Sequence[str]]:
	"""Gets files for the specified data type and dataset split.

	Args:
	step_root: String, Path to STEP dataset root folder.
	dataset_split: String, dataset split ('train', 'val', 'test')
	sharded_by_sequence: Whether the images should be sharded by sequence or
	even split.

	Yields:
	A list of sorted file lists. Each inner list corresponds to one shard and is
	a list of files for this shard.
	"""
	search_files = os.path.join(step_root, _IMAGE_FOLDER_NAME, dataset_split, '*',
	'*')
	filenames = sorted(tf.io.gfile.glob(search_files))
	num_per_even_shard = int(math.ceil(len(filenames) / _NUM_SHARDS))

	sequence_ids = [os.path.basename(os.path.dirname(name)) for name in filenames]
	images_per_shard = []
	for i, name in enumerate(filenames):
	images_per_shard.append(name)
	shard_data = (i == len(filenames) - 1)
	# Sharded by sequence id.
	shard_data = shard_data or (sharded_by_sequence and
	sequence_ids[i + 1] != sequence_ids[i])
	# Sharded evenly.
	shard_data = shard_data or (not sharded_by_sequence and
	len(images_per_shard) == num_per_even_shard)
	if shard_data:
	yield images_per_shard
	images_per_shard = []


	def _decode_panoptic_map(panoptic_map_path: str) -> Optional[str]:
	"""Decodes the panoptic map from encoded image file.

	Args:
	panoptic_map_path: Path to the panoptic map image file.

	Returns:
	Panoptic map as an encoded int32 numpy array bytes or None if not existing.
	"""
	if not tf.io.gfile.exists(panoptic_map_path):
	return None
	with tf.io.gfile.GFile(panoptic_map_path, 'rb') as f:
	panoptic_map = np.array(Image.open(f)).astype(np.int32)
	semantic_map = panoptic_map[:, :, 0]
	instance_map = (
	panoptic_map[:, :, 1] * _ENCODED_INSTANCE_LABEL_DIVISOR +
	panoptic_map[:, :, 2])
	panoptic_map = semantic_map * _INSTANCE_LABEL_DIVISOR + instance_map
	return panoptic_map.tobytes()


	def _get_previous_frame_path(image_path: str) -> str:
	"""Gets previous frame path. If not exists, duplicate it with image_path."""
	frame_id, frame_ext = os.path.splitext(os.path.basename(image_path))
	folder_dir = os.path.dirname(image_path)
	prev_frame_id = _FRAME_ID_PATTERN % (int(frame_id) - 1)
	prev_image_path = os.path.join(folder_dir, prev_frame_id + frame_ext)
	# If first frame, duplicates it.
	if not tf.io.gfile.exists(prev_image_path):
	tf.compat.v1.logging.warn(
	'Could not find previous frame %s of frame %d, duplicate the previous '
	'frame with the current frame.', prev_image_path, int(frame_id))
	prev_image_path = image_path
	return prev_image_path


	def _create_panoptic_tfexample(image_path: str,
	panoptic_map_path: str,
	use_two_frames: bool,
	is_testing: bool = False) -> tf.train.Example:
	"""Creates a TF example for each image.

	Args:
	image_path: Path to the image.
	panoptic_map_path: Path to the panoptic map (as an image file).
	use_two_frames: Whether to encode consecutive two frames in the Example.
	is_testing: Whether it is testing data. If so, skip adding label data.

	Returns:
	TF example proto.
	"""
	with tf.io.gfile.GFile(image_path, 'rb') as f:
	image_data = f.read()
	label_data = None
	if not is_testing:
	label_data = _decode_panoptic_map(panoptic_map_path)
	image_name = os.path.basename(image_path)
	image_format = image_name.split('.')[1].lower()
	sequence_id, frame_id = _get_image_info_from_path(image_path)
	prev_image_data = None
	prev_label_data = None
	if use_two_frames:
	# Previous image.
	prev_image_path = _get_previous_frame_path(image_path)
	with tf.io.gfile.GFile(prev_image_path, 'rb') as f:
	prev_image_data = f.read()
	# Previous panoptic map.
	if not is_testing:
	prev_panoptic_map_path = _get_previous_frame_path(panoptic_map_path)
	prev_label_data = _decode_panoptic_map(prev_panoptic_map_path)
	return data_utils.create_video_tfexample(
	image_data,
	image_format,
	image_name,
	label_format=_PANOPTIC_LABEL_FORMAT,
	sequence_id=sequence_id,
	image_id=frame_id,
	label_data=label_data,
	prev_image_data=prev_image_data,
	prev_label_data=prev_label_data)


	def _convert_dataset(step_root: str,
	dataset_split: str,
	output_dir: str,
	use_two_frames: bool = False):
	"""Converts the specified dataset split to TFRecord format.

	Args:
	step_root: String, Path to STEP dataset root folder.
	dataset_split: String, the dataset split (e.g., train, val).
	output_dir: String, directory to write output TFRecords to.
	use_two_frames: Whether to encode consecutive two frames in the Example.
	"""
	# For val and test set, if we run with use_two_frames, we should create a
	# sorted tfrecord per sequence.
	create_tfrecord_per_sequence = ('train'
	not in dataset_split) and use_two_frames
	is_testing = 'test' in dataset_split

	image_files_per_shard = list(
	_get_images_per_shard(step_root, dataset_split,
	sharded_by_sequence=create_tfrecord_per_sequence))
	num_shards = len(image_files_per_shard)

	for shard_id, image_list in enumerate(image_files_per_shard):
	shard_filename = _TF_RECORD_PATTERN % (dataset_split, shard_id, num_shards)
	output_filename = os.path.join(output_dir, shard_filename)
	with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
	for image_path in image_list:
	sequence_id, image_id = _get_image_info_from_path(image_path)
	panoptic_map_path = os.path.join(
	step_root, _PANOPTIC_MAP_FOLDER_NAME, dataset_split, sequence_id,
	'%s.%s' % (image_id, _LABEL_MAP_FORMAT))
	example = _create_panoptic_tfexample(image_path, panoptic_map_path,
	use_two_frames, is_testing)
	tfrecord_writer.write(example.SerializeToString())


	def main(argv: Sequence[str]) -> None:
	if len(argv) > 1:
	raise app.UsageError('Too many command-line arguments.')
	tf.io.gfile.makedirs(FLAGS.output_dir)
	for dataset_split in ('train', 'val', 'test'):
	logging.info('Starts to processing STEP dataset split %s.', dataset_split)
	_convert_dataset(FLAGS.step_root, dataset_split, FLAGS.output_dir,
	FLAGS.use_two_frames)


	if __name__ == '__main__':
	app.run(main)