Spaces:

zjowowen
/

gomoku

Sleeping

App Files Files Community

gomoku / LightZero /lzero /entry /utils.py

zjowowen

init space

079c32c 11 months ago

raw

history blame

3.15 kB

	import os

	import psutil
	from pympler.asizeof import asizeof
	from tensorboardX import SummaryWriter
	from typing import Optional, Callable


	def random_collect(
	policy_cfg: 'EasyDict', # noqa
	policy: 'Policy', # noqa
	RandomPolicy: 'Policy', # noqa
	collector: 'ISerialCollector', # noqa
	collector_env: 'BaseEnvManager', # noqa
	replay_buffer: 'IBuffer', # noqa
	postprocess_data_fn: Optional[Callable] = None
	) -> None: # noqa
	assert policy_cfg.random_collect_episode_num > 0

	random_policy = RandomPolicy(cfg=policy_cfg, action_space=collector_env.env_ref.action_space)
	# set the policy to random policy
	collector.reset_policy(random_policy.collect_mode)

	# set temperature for visit count distributions according to the train_iter,
	# please refer to Appendix D in MuZero paper for details.
	collect_kwargs = {'temperature': 1, 'epsilon': 0.0}

	# Collect data by default config n_sample/n_episode.
	new_data = collector.collect(n_episode=policy_cfg.random_collect_episode_num, train_iter=0, policy_kwargs=collect_kwargs)

	if postprocess_data_fn is not None:
	new_data = postprocess_data_fn(new_data)

	# save returned new_data collected by the collector
	replay_buffer.push_game_segments(new_data)
	# remove the oldest data if the replay buffer is full.
	replay_buffer.remove_oldest_data_to_fit()

	# restore the policy
	collector.reset_policy(policy.collect_mode)


	def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None:
	"""
	Overview:
	Log the memory usage of the buffer and the current process to TensorBoard.
	Arguments:
	- train_iter (:obj:`int`): The current training iteration.
	- buffer (:obj:`GameBuffer`): The game buffer.
	- writer (:obj:`SummaryWriter`): The TensorBoard writer.
	"""
	writer.add_scalar('Buffer/num_of_all_collected_episodes', buffer.num_of_collected_episodes, train_iter)
	writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter)
	writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_segment_game_pos_look_up), train_iter)

	game_segment_buffer = buffer.game_segment_buffer

	# Calculate the amount of memory occupied by self.game_segment_buffer (in bytes).
	buffer_memory_usage = asizeof(game_segment_buffer)

	# Convert buffer_memory_usage to megabytes (MB).
	buffer_memory_usage_mb = buffer_memory_usage / (1024 * 1024)

	# Record the memory usage of self.game_segment_buffer to TensorBoard.
	writer.add_scalar('Buffer/memory_usage/game_segment_buffer', buffer_memory_usage_mb, train_iter)

	# Get the amount of memory currently used by the process (in bytes).
	process = psutil.Process(os.getpid())
	process_memory_usage = process.memory_info().rss

	# Convert process_memory_usage to megabytes (MB).
	process_memory_usage_mb = process_memory_usage / (1024 * 1024)

	# Record the memory usage of the process to TensorBoard.
	writer.add_scalar('Buffer/memory_usage/process', process_memory_usage_mb, train_iter)