import h5py import lmdb import numpy as np from tqdm import tqdm import io import msgpack_numpy import msgpack lmdb_path = "data/TVR_Ranking_val_top100_hero" h5_path = "data/h5/TVR_Ranking_val_top100_hero.h5" # Open the LMDB environment env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False) h5_data = h5py.File(h5_path, 'w') with env.begin(write=False, buffers=True) as txn: cursor = txn.cursor() keys = list(cursor.iternext(values=False)) # List of keys for progress tracking for key in tqdm(keys, desc="Processing LMDB to HDF5"): key_str = bytes(key).decode() value = cursor.get(key) _external_inference_vr_res = msgpack.loads(value) h5_data.create_dataset(key_str, data=_external_inference_vr_res) print("Conversion completed.") h5_data.close() # lmdb_path = "data/features/resnet_slowfast_1.5" # h5_path = "data/h5/features/resnet_slowfast_1.5.h5" # env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False) # h5_data = h5py.File(h5_path, 'w') # with env.begin(write=False, buffers=True) as txn: # cursor = txn.cursor() # keys = list(cursor.iternext(values=False)) # List of keys for progress tracking # for key in tqdm(keys, desc="Processing LMDB to HDF5"): # key_str = bytes(key).decode() # value = cursor.get(key) # img_dump = {k: np.copy(v) for k, v in msgpack_numpy.loads(value, raw=False).items()} # visual_feat = img_dump['features'] # Adjust if needed, like [:self.max_ctx_len] # h5_data.create_dataset(key_str, data=visual_feat) # print("Conversion completed.") # h5_data.close() # lmdb_path = "data/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5" # h5_path = "data/h5/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5.h5" # env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False) # h5_data = h5py.File(h5_path, 'w') # with env.begin(write=False, buffers=True) as txn: # cursor = txn.cursor() # for key, value in tqdm(cursor): # key_str = bytes(key).decode() # with io.BytesIO(value) as reader: # feat_dump = np.load(reader, allow_pickle=True) # sub_feat = feat_dump["features"] # h5_data.create_dataset(key_str, data=sub_feat) # print("Conversion completed.") # h5_data.close()