File size: 2,370 Bytes
a638e43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import h5py
import lmdb
import numpy as np
from tqdm import tqdm
import io
import msgpack_numpy
import msgpack
lmdb_path = "data/TVR_Ranking_val_top100_hero"
h5_path = "data/h5/TVR_Ranking_val_top100_hero.h5"
# Open the LMDB environment
env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)
h5_data = h5py.File(h5_path, 'w')
with env.begin(write=False, buffers=True) as txn:
cursor = txn.cursor()
keys = list(cursor.iternext(values=False)) # List of keys for progress tracking
for key in tqdm(keys, desc="Processing LMDB to HDF5"):
key_str = bytes(key).decode()
value = cursor.get(key)
_external_inference_vr_res = msgpack.loads(value)
h5_data.create_dataset(key_str, data=_external_inference_vr_res)
print("Conversion completed.")
h5_data.close()
# lmdb_path = "data/features/resnet_slowfast_1.5"
# h5_path = "data/h5/features/resnet_slowfast_1.5.h5"
# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)
# h5_data = h5py.File(h5_path, 'w')
# with env.begin(write=False, buffers=True) as txn:
# cursor = txn.cursor()
# keys = list(cursor.iternext(values=False)) # List of keys for progress tracking
# for key in tqdm(keys, desc="Processing LMDB to HDF5"):
# key_str = bytes(key).decode()
# value = cursor.get(key)
# img_dump = {k: np.copy(v) for k, v in msgpack_numpy.loads(value, raw=False).items()}
# visual_feat = img_dump['features'] # Adjust if needed, like [:self.max_ctx_len]
# h5_data.create_dataset(key_str, data=visual_feat)
# print("Conversion completed.")
# h5_data.close()
# lmdb_path = "data/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5"
# h5_path = "data/h5/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5.h5"
# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)
# h5_data = h5py.File(h5_path, 'w')
# with env.begin(write=False, buffers=True) as txn:
# cursor = txn.cursor()
# for key, value in tqdm(cursor):
# key_str = bytes(key).decode()
# with io.BytesIO(value) as reader:
# feat_dump = np.load(reader, allow_pickle=True)
# sub_feat = feat_dump["features"]
# h5_data.create_dataset(key_str, data=sub_feat)
# print("Conversion completed.")
# h5_data.close()
|