|
|
|
|
|
""" |
|
Utility script to pack metadata files of the dataset in order to be able to re-generate it elsewhere. |
|
""" |
|
import os |
|
import glob |
|
from tqdm import tqdm |
|
import shutil |
|
import json |
|
from datasets.habitat_sim.paths import * |
|
import argparse |
|
import collections |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("input_dir") |
|
parser.add_argument("output_dir") |
|
args = parser.parse_args() |
|
|
|
input_dirname = args.input_dir |
|
output_dirname = args.output_dir |
|
|
|
input_metadata_filenames = glob.iglob(f"{input_dirname}/**/metadata.json", recursive=True) |
|
|
|
images_count = collections.defaultdict(lambda : 0) |
|
|
|
os.makedirs(output_dirname) |
|
for input_filename in tqdm(input_metadata_filenames): |
|
|
|
with open(input_filename, "r") as f: |
|
original_metadata = json.load(f) |
|
if "multiviews" not in original_metadata or len(original_metadata["multiviews"]) == 0: |
|
print("No views in", input_filename) |
|
continue |
|
|
|
relpath = os.path.relpath(input_filename, input_dirname) |
|
print(relpath) |
|
|
|
|
|
|
|
scenes_dataset_paths = dict(sorted(SCENES_DATASET.items(), key=lambda x: len(x[1]), reverse=True)) |
|
metadata = dict() |
|
for key, value in original_metadata.items(): |
|
if key in ("scene_dataset_config_file", "scene", "navmesh") and value != "": |
|
known_path = False |
|
for dataset, dataset_path in scenes_dataset_paths.items(): |
|
if value.startswith(dataset_path): |
|
value = os.path.join(dataset, os.path.relpath(value, dataset_path)) |
|
known_path = True |
|
break |
|
if not known_path: |
|
raise KeyError("Unknown path:" + value) |
|
metadata[key] = value |
|
|
|
|
|
scene_split = metadata["scene"].split("/") |
|
upper_level = "/".join(scene_split[:2]) if scene_split[0] == "hm3d" else scene_split[0] |
|
images_count[upper_level] += len(metadata["multiviews"]) |
|
|
|
output_filename = os.path.join(output_dirname, relpath) |
|
os.makedirs(os.path.dirname(output_filename), exist_ok=True) |
|
with open(output_filename, "w") as f: |
|
json.dump(metadata, f) |
|
|
|
|
|
print("Images count:") |
|
for upper_level, count in images_count.items(): |
|
print(f"- {upper_level}: {count}") |