zjowowen's picture
init space
079c32c
raw
history blame
3.15 kB
import os
import psutil
from pympler.asizeof import asizeof
from tensorboardX import SummaryWriter
from typing import Optional, Callable
def random_collect(
policy_cfg: 'EasyDict', # noqa
policy: 'Policy', # noqa
RandomPolicy: 'Policy', # noqa
collector: 'ISerialCollector', # noqa
collector_env: 'BaseEnvManager', # noqa
replay_buffer: 'IBuffer', # noqa
postprocess_data_fn: Optional[Callable] = None
) -> None: # noqa
assert policy_cfg.random_collect_episode_num > 0
random_policy = RandomPolicy(cfg=policy_cfg, action_space=collector_env.env_ref.action_space)
# set the policy to random policy
collector.reset_policy(random_policy.collect_mode)
# set temperature for visit count distributions according to the train_iter,
# please refer to Appendix D in MuZero paper for details.
collect_kwargs = {'temperature': 1, 'epsilon': 0.0}
# Collect data by default config n_sample/n_episode.
new_data = collector.collect(n_episode=policy_cfg.random_collect_episode_num, train_iter=0, policy_kwargs=collect_kwargs)
if postprocess_data_fn is not None:
new_data = postprocess_data_fn(new_data)
# save returned new_data collected by the collector
replay_buffer.push_game_segments(new_data)
# remove the oldest data if the replay buffer is full.
replay_buffer.remove_oldest_data_to_fit()
# restore the policy
collector.reset_policy(policy.collect_mode)
def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None:
"""
Overview:
Log the memory usage of the buffer and the current process to TensorBoard.
Arguments:
- train_iter (:obj:`int`): The current training iteration.
- buffer (:obj:`GameBuffer`): The game buffer.
- writer (:obj:`SummaryWriter`): The TensorBoard writer.
"""
writer.add_scalar('Buffer/num_of_all_collected_episodes', buffer.num_of_collected_episodes, train_iter)
writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter)
writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_segment_game_pos_look_up), train_iter)
game_segment_buffer = buffer.game_segment_buffer
# Calculate the amount of memory occupied by self.game_segment_buffer (in bytes).
buffer_memory_usage = asizeof(game_segment_buffer)
# Convert buffer_memory_usage to megabytes (MB).
buffer_memory_usage_mb = buffer_memory_usage / (1024 * 1024)
# Record the memory usage of self.game_segment_buffer to TensorBoard.
writer.add_scalar('Buffer/memory_usage/game_segment_buffer', buffer_memory_usage_mb, train_iter)
# Get the amount of memory currently used by the process (in bytes).
process = psutil.Process(os.getpid())
process_memory_usage = process.memory_info().rss
# Convert process_memory_usage to megabytes (MB).
process_memory_usage_mb = process_memory_usage / (1024 * 1024)
# Record the memory usage of the process to TensorBoard.
writer.add_scalar('Buffer/memory_usage/process', process_memory_usage_mb, train_iter)