LiangRenjie
/

CONQUER_RVMR

Model card Files Files and versions Community

CONQUER_RVMR / unused /convert_lmdb_h5.py

Liangrj5

init

a638e43 6 months ago

history blame contribute delete

2.37 kB

	import h5py
	import lmdb
	import numpy as np
	from tqdm import tqdm
	import io
	import msgpack_numpy
	import msgpack


	lmdb_path = "data/TVR_Ranking_val_top100_hero"
	h5_path = "data/h5/TVR_Ranking_val_top100_hero.h5"
	# Open the LMDB environment
	env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)

	h5_data = h5py.File(h5_path, 'w')
	with env.begin(write=False, buffers=True) as txn:
	cursor = txn.cursor()
	keys = list(cursor.iternext(values=False)) # List of keys for progress tracking
	for key in tqdm(keys, desc="Processing LMDB to HDF5"):
	key_str = bytes(key).decode()
	value = cursor.get(key)
	_external_inference_vr_res = msgpack.loads(value)
	h5_data.create_dataset(key_str, data=_external_inference_vr_res)
	print("Conversion completed.")
	h5_data.close()

	# lmdb_path = "data/features/resnet_slowfast_1.5"
	# h5_path = "data/h5/features/resnet_slowfast_1.5.h5"
	# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)
	# h5_data = h5py.File(h5_path, 'w')
	# with env.begin(write=False, buffers=True) as txn:
	# cursor = txn.cursor()
	# keys = list(cursor.iternext(values=False)) # List of keys for progress tracking
	# for key in tqdm(keys, desc="Processing LMDB to HDF5"):
	# key_str = bytes(key).decode()
	# value = cursor.get(key)
	# img_dump = {k: np.copy(v) for k, v in msgpack_numpy.loads(value, raw=False).items()}
	# visual_feat = img_dump['features'] # Adjust if needed, like [:self.max_ctx_len]
	# h5_data.create_dataset(key_str, data=visual_feat)
	# print("Conversion completed.")
	# h5_data.close()


	# lmdb_path = "data/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5"
	# h5_path = "data/h5/features/tvr_sub_pretrained_w_sub_query_max_cl-1.5.h5"
	# env = lmdb.open(lmdb_path, readonly=True, max_dbs=0, max_readers=4096 * 8, readahead=False)
	# h5_data = h5py.File(h5_path, 'w')
	# with env.begin(write=False, buffers=True) as txn:
	# cursor = txn.cursor()
	# for key, value in tqdm(cursor):
	# key_str = bytes(key).decode()
	# with io.BytesIO(value) as reader:
	# feat_dump = np.load(reader, allow_pickle=True)
	# sub_feat = feat_dump["features"]
	# h5_data.create_dataset(key_str, data=sub_feat)
	# print("Conversion completed.")
	# h5_data.close()