|
import os |
|
|
|
import psutil |
|
from pympler.asizeof import asizeof |
|
from tensorboardX import SummaryWriter |
|
from typing import Optional, Callable |
|
|
|
|
|
def random_collect( |
|
policy_cfg: 'EasyDict', |
|
policy: 'Policy', |
|
RandomPolicy: 'Policy', |
|
collector: 'ISerialCollector', |
|
collector_env: 'BaseEnvManager', |
|
replay_buffer: 'IBuffer', |
|
postprocess_data_fn: Optional[Callable] = None |
|
) -> None: |
|
assert policy_cfg.random_collect_episode_num > 0 |
|
|
|
random_policy = RandomPolicy(cfg=policy_cfg, action_space=collector_env.env_ref.action_space) |
|
|
|
collector.reset_policy(random_policy.collect_mode) |
|
|
|
|
|
|
|
collect_kwargs = {'temperature': 1, 'epsilon': 0.0} |
|
|
|
|
|
new_data = collector.collect(n_episode=policy_cfg.random_collect_episode_num, train_iter=0, policy_kwargs=collect_kwargs) |
|
|
|
if postprocess_data_fn is not None: |
|
new_data = postprocess_data_fn(new_data) |
|
|
|
|
|
replay_buffer.push_game_segments(new_data) |
|
|
|
replay_buffer.remove_oldest_data_to_fit() |
|
|
|
|
|
collector.reset_policy(policy.collect_mode) |
|
|
|
|
|
def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None: |
|
""" |
|
Overview: |
|
Log the memory usage of the buffer and the current process to TensorBoard. |
|
Arguments: |
|
- train_iter (:obj:`int`): The current training iteration. |
|
- buffer (:obj:`GameBuffer`): The game buffer. |
|
- writer (:obj:`SummaryWriter`): The TensorBoard writer. |
|
""" |
|
writer.add_scalar('Buffer/num_of_all_collected_episodes', buffer.num_of_collected_episodes, train_iter) |
|
writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter) |
|
writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_segment_game_pos_look_up), train_iter) |
|
|
|
game_segment_buffer = buffer.game_segment_buffer |
|
|
|
|
|
buffer_memory_usage = asizeof(game_segment_buffer) |
|
|
|
|
|
buffer_memory_usage_mb = buffer_memory_usage / (1024 * 1024) |
|
|
|
|
|
writer.add_scalar('Buffer/memory_usage/game_segment_buffer', buffer_memory_usage_mb, train_iter) |
|
|
|
|
|
process = psutil.Process(os.getpid()) |
|
process_memory_usage = process.memory_info().rss |
|
|
|
|
|
process_memory_usage_mb = process_memory_usage / (1024 * 1024) |
|
|
|
|
|
writer.add_scalar('Buffer/memory_usage/process', process_memory_usage_mb, train_iter) |
|
|