File size: 2,370 Bytes

a638e43

import h5py
import lmdb
import numpy as np
from tqdm import tqdm
import io
import msgpack_numpy
import msgpack


lmdb_path = "data/TVR_Ranking_val_top100_hero"
h5_path = "data/h5/TVR_Ranking_val_top100_hero.h5"
# Open the LMDB environment
env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)

h5_data = h5py.File(h5_path, 'w')
with env.begin(write=False, buffers=True) as txn:
    cursor = txn.cursor()
    keys = list(cursor.iternext(values=False))  # List of keys for progress tracking
    for key in tqdm(keys, desc="Processing LMDB to HDF5"):
        key_str = bytes(key).decode()
        value = cursor.get(key)
        _external_inference_vr_res = msgpack.loads(value)
        h5_data.create_dataset(key_str, data=_external_inference_vr_res)
print("Conversion completed.")
h5_data.close()

# lmdb_path = "data/features/resnet_slowfast_1.5"
# h5_path = "data/h5/features/resnet_slowfast_1.5.h5"
# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)
# h5_data = h5py.File(h5_path, 'w')
# with env.begin(write=False, buffers=True) as txn:
#     cursor = txn.cursor()
#     keys = list(cursor.iternext(values=False))  # List of keys for progress tracking
#     for key in tqdm(keys, desc="Processing LMDB to HDF5"):
#         key_str = bytes(key).decode()
#         value = cursor.get(key)
#         img_dump = {k: np.copy(v) for k, v in msgpack_numpy.loads(value, raw=False).items()}
#         visual_feat = img_dump['features']  # Adjust if needed, like [:self.max_ctx_len]
#         h5_data.create_dataset(key_str, data=visual_feat)
# print("Conversion completed.")
# h5_data.close()


# lmdb_path = "data/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5"
# h5_path = "data/h5/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5.h5"
# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)
# h5_data = h5py.File(h5_path, 'w')
# with env.begin(write=False, buffers=True) as txn:
#     cursor = txn.cursor()
#     for key, value in tqdm(cursor):
#         key_str = bytes(key).decode()
#         with io.BytesIO(value) as reader:
#             feat_dump = np.load(reader, allow_pickle=True)
#             sub_feat = feat_dump["features"]
#             h5_data.create_dataset(key_str, data=sub_feat)
# print("Conversion completed.")
# h5_data.close()