sapiens-pose / external /det /tools /misc /get_image_metas.py
rawalkhirodkar's picture
Add initial commit
28c256d
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
"""Get image metas on a specific dataset.
Here is an example to run this script.
Example:
python tools/misc/get_image_metas.py ${CONFIG} \
--out ${OUTPUT FILE NAME}
"""
import argparse
import csv
import os.path as osp
from multiprocessing import Pool
import mmcv
from mmengine.config import Config
from mmengine.fileio import dump, get
def parse_args():
parser = argparse.ArgumentParser(description='Collect image metas')
parser.add_argument('config', help='Config file path')
parser.add_argument(
'--dataset',
default='val',
choices=['train', 'val', 'test'],
help='Collect image metas from which dataset')
parser.add_argument(
'--out',
default='validation-image-metas.pkl',
help='The output image metas file name. The save dir is in the '
'same directory as `dataset.ann_file` path')
parser.add_argument(
'--nproc',
default=4,
type=int,
help='Processes used for get image metas')
args = parser.parse_args()
return args
def get_metas_from_csv_style_ann_file(ann_file):
data_infos = []
cp_filename = None
with open(ann_file, 'r') as f:
reader = csv.reader(f)
for i, line in enumerate(reader):
if i == 0:
continue
img_id = line[0]
filename = f'{img_id}.jpg'
if filename != cp_filename:
data_infos.append(dict(filename=filename))
cp_filename = filename
return data_infos
def get_metas_from_txt_style_ann_file(ann_file):
with open(ann_file) as f:
lines = f.readlines()
i = 0
data_infos = []
while i < len(lines):
filename = lines[i].rstrip()
data_infos.append(dict(filename=filename))
skip_lines = int(lines[i + 2]) + 3
i += skip_lines
return data_infos
def get_image_metas(data_info, img_prefix):
filename = data_info.get('filename', None)
if filename is not None:
if img_prefix is not None:
filename = osp.join(img_prefix, filename)
img_bytes = get(filename)
img = mmcv.imfrombytes(img_bytes, flag='color')
shape = img.shape
meta = dict(filename=filename, ori_shape=shape)
else:
raise NotImplementedError('Missing `filename` in data_info')
return meta
def main():
args = parse_args()
assert args.out.endswith('pkl'), 'The output file name must be pkl suffix'
# load config files
cfg = Config.fromfile(args.config)
dataloader_cfg = cfg.get(f'{args.dataset}_dataloader')
ann_file = osp.join(dataloader_cfg.dataset.data_root,
dataloader_cfg.dataset.ann_file)
img_prefix = osp.join(dataloader_cfg.dataset.data_root,
dataloader_cfg.dataset.data_prefix['img'])
print(f'{"-" * 5} Start Processing {"-" * 5}')
if ann_file.endswith('csv'):
data_infos = get_metas_from_csv_style_ann_file(ann_file)
elif ann_file.endswith('txt'):
data_infos = get_metas_from_txt_style_ann_file(ann_file)
else:
shuffix = ann_file.split('.')[-1]
raise NotImplementedError('File name must be csv or txt suffix but '
f'get {shuffix}')
print(f'Successfully load annotation file from {ann_file}')
print(f'Processing {len(data_infos)} images...')
pool = Pool(args.nproc)
# get image metas with multiple processes
image_metas = pool.starmap(
get_image_metas,
zip(data_infos, [img_prefix for _ in range(len(data_infos))]),
)
pool.close()
# save image metas
root_path = dataloader_cfg.dataset.ann_file.rsplit('/', 1)[0]
save_path = osp.join(root_path, args.out)
dump(image_metas, save_path, protocol=4)
print(f'Image meta file save to: {save_path}')
if __name__ == '__main__':
main()