import h5py | |
import lmdb | |
import numpy as np | |
from tqdm import tqdm | |
import io | |
import msgpack_numpy | |
import msgpack | |
lmdb_path = "data/TVR_Ranking_val_top100_hero" | |
h5_path = "data/h5/TVR_Ranking_val_top100_hero.h5" | |
# Open the LMDB environment | |
env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False) | |
h5_data = h5py.File(h5_path, 'w') | |
with env.begin(write=False, buffers=True) as txn: | |
cursor = txn.cursor() | |
keys = list(cursor.iternext(values=False)) # List of keys for progress tracking | |
for key in tqdm(keys, desc="Processing LMDB to HDF5"): | |
key_str = bytes(key).decode() | |
value = cursor.get(key) | |
_external_inference_vr_res = msgpack.loads(value) | |
h5_data.create_dataset(key_str, data=_external_inference_vr_res) | |
print("Conversion completed.") | |
h5_data.close() | |
# lmdb_path = "data/features/resnet_slowfast_1.5" | |
# h5_path = "data/h5/features/resnet_slowfast_1.5.h5" | |
# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False) | |
# h5_data = h5py.File(h5_path, 'w') | |
# with env.begin(write=False, buffers=True) as txn: | |
# cursor = txn.cursor() | |
# keys = list(cursor.iternext(values=False)) # List of keys for progress tracking | |
# for key in tqdm(keys, desc="Processing LMDB to HDF5"): | |
# key_str = bytes(key).decode() | |
# value = cursor.get(key) | |
# img_dump = {k: np.copy(v) for k, v in msgpack_numpy.loads(value, raw=False).items()} | |
# visual_feat = img_dump['features'] # Adjust if needed, like [:self.max_ctx_len] | |
# h5_data.create_dataset(key_str, data=visual_feat) | |
# print("Conversion completed.") | |
# h5_data.close() | |
# lmdb_path = "data/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5" | |
# h5_path = "data/h5/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5.h5" | |
# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False) | |
# h5_data = h5py.File(h5_path, 'w') | |
# with env.begin(write=False, buffers=True) as txn: | |
# cursor = txn.cursor() | |
# for key, value in tqdm(cursor): | |
# key_str = bytes(key).decode() | |
# with io.BytesIO(value) as reader: | |
# feat_dump = np.load(reader, allow_pickle=True) | |
# sub_feat = feat_dump["features"] | |
# h5_data.create_dataset(key_str, data=sub_feat) | |
# print("Conversion completed.") | |
# h5_data.close() | |