ppo-Pyramids-Training / ml-agents /mlagents /trainers /subprocess_env_manager.py

Second Push

05c9ac2 over 1 year ago

22.8 kB

	import datetime
	from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set
	import cloudpickle
	import enum
	import time

	from mlagents_envs.environment import UnityEnvironment
	from mlagents_envs.exception import (
	UnityCommunicationException,
	UnityTimeOutException,
	UnityEnvironmentException,
	UnityCommunicatorStoppedException,
	)
	from multiprocessing import Process, Pipe, Queue
	from multiprocessing.connection import Connection
	from queue import Empty as EmptyQueueException
	from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
	from mlagents_envs import logging_util
	from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
	from mlagents.trainers.settings import TrainerSettings
	from mlagents_envs.timers import (
	TimerNode,
	timed,
	hierarchical_timer,
	reset_timers,
	get_timer_root,
	)
	from mlagents.trainers.settings import ParameterRandomizationSettings, RunOptions
	from mlagents.trainers.action_info import ActionInfo
	from mlagents_envs.side_channel.environment_parameters_channel import (
	EnvironmentParametersChannel,
	)
	from mlagents_envs.side_channel.engine_configuration_channel import (
	EngineConfigurationChannel,
	EngineConfig,
	)
	from mlagents_envs.side_channel.stats_side_channel import (
	EnvironmentStats,
	StatsSideChannel,
	)
	from mlagents.trainers.training_analytics_side_channel import (
	TrainingAnalyticsSideChannel,
	)
	from mlagents_envs.side_channel.side_channel import SideChannel


	logger = logging_util.get_logger(__name__)
	WORKER_SHUTDOWN_TIMEOUT_S = 10


	class EnvironmentCommand(enum.Enum):
	STEP = 1
	BEHAVIOR_SPECS = 2
	ENVIRONMENT_PARAMETERS = 3
	RESET = 4
	CLOSE = 5
	ENV_EXITED = 6
	CLOSED = 7
	TRAINING_STARTED = 8


	class EnvironmentRequest(NamedTuple):
	cmd: EnvironmentCommand
	payload: Any = None


	class EnvironmentResponse(NamedTuple):
	cmd: EnvironmentCommand
	worker_id: int
	payload: Any


	class StepResponse(NamedTuple):
	all_step_result: AllStepResult
	timer_root: Optional[TimerNode]
	environment_stats: EnvironmentStats


	class UnityEnvWorker:
	def __init__(self, process: Process, worker_id: int, conn: Connection):
	self.process = process
	self.worker_id = worker_id
	self.conn = conn
	self.previous_step: EnvironmentStep = EnvironmentStep.empty(worker_id)
	self.previous_all_action_info: Dict[str, ActionInfo] = {}
	self.waiting = False
	self.closed = False

	def send(self, cmd: EnvironmentCommand, payload: Any = None) -> None:
	try:
	req = EnvironmentRequest(cmd, payload)
	self.conn.send(req)
	except (BrokenPipeError, EOFError):
	raise UnityCommunicationException("UnityEnvironment worker: send failed.")

	def recv(self) -> EnvironmentResponse:
	try:
	response: EnvironmentResponse = self.conn.recv()
	if response.cmd == EnvironmentCommand.ENV_EXITED:
	env_exception: Exception = response.payload
	raise env_exception
	return response
	except (BrokenPipeError, EOFError):
	raise UnityCommunicationException("UnityEnvironment worker: recv failed.")

	def request_close(self):
	try:
	self.conn.send(EnvironmentRequest(EnvironmentCommand.CLOSE))
	except (BrokenPipeError, EOFError):
	logger.debug(
	f"UnityEnvWorker {self.worker_id} got exception trying to close."
	)
	pass


	def worker(
	parent_conn: Connection,
	step_queue: Queue,
	pickled_env_factory: str,
	worker_id: int,
	run_options: RunOptions,
	log_level: int = logging_util.INFO,
	) -> None:
	env_factory: Callable[
	[int, List[SideChannel]], UnityEnvironment
	] = cloudpickle.loads(pickled_env_factory)
	env_parameters = EnvironmentParametersChannel()

	engine_config = EngineConfig(
	width=run_options.engine_settings.width,
	height=run_options.engine_settings.height,
	quality_level=run_options.engine_settings.quality_level,
	time_scale=run_options.engine_settings.time_scale,
	target_frame_rate=run_options.engine_settings.target_frame_rate,
	capture_frame_rate=run_options.engine_settings.capture_frame_rate,
	)
	engine_configuration_channel = EngineConfigurationChannel()
	engine_configuration_channel.set_configuration(engine_config)

	stats_channel = StatsSideChannel()
	training_analytics_channel: Optional[TrainingAnalyticsSideChannel] = None
	if worker_id == 0:
	training_analytics_channel = TrainingAnalyticsSideChannel()
	env: UnityEnvironment = None
	# Set log level. On some platforms, the logger isn't common with the
	# main process, so we need to set it again.
	logging_util.set_log_level(log_level)

	def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
	parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

	def _generate_all_results() -> AllStepResult:
	all_step_result: AllStepResult = {}
	for brain_name in env.behavior_specs:
	all_step_result[brain_name] = env.get_steps(brain_name)
	return all_step_result

	try:
	side_channels = [env_parameters, engine_configuration_channel, stats_channel]
	if training_analytics_channel is not None:
	side_channels.append(training_analytics_channel)

	env = env_factory(worker_id, side_channels)
	if (
	not env.academy_capabilities
	or not env.academy_capabilities.trainingAnalytics
	):
	# Make sure we don't try to send training analytics if the environment doesn't know how to process
	# them. This wouldn't be catastrophic, but would result in unknown SideChannel UUIDs being used.
	training_analytics_channel = None
	if training_analytics_channel:
	training_analytics_channel.environment_initialized(run_options)

	while True:
	req: EnvironmentRequest = parent_conn.recv()
	if req.cmd == EnvironmentCommand.STEP:
	all_action_info = req.payload
	for brain_name, action_info in all_action_info.items():
	if len(action_info.agent_ids) > 0:
	env.set_actions(brain_name, action_info.env_action)
	env.step()
	all_step_result = _generate_all_results()
	# The timers in this process are independent from all the processes and the "main" process
	# So after we send back the root timer, we can safely clear them.
	# Note that we could randomly return timers a fraction of the time if we wanted to reduce
	# the data transferred.
	# TODO get gauges from the workers and merge them in the main process too.
	env_stats = stats_channel.get_and_reset_stats()
	step_response = StepResponse(
	all_step_result, get_timer_root(), env_stats
	)
	step_queue.put(
	EnvironmentResponse(
	EnvironmentCommand.STEP, worker_id, step_response
	)
	)
	reset_timers()
	elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS:
	_send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs)
	elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS:
	for k, v in req.payload.items():
	if isinstance(v, ParameterRandomizationSettings):
	v.apply(k, env_parameters)
	elif req.cmd == EnvironmentCommand.TRAINING_STARTED:
	behavior_name, trainer_config = req.payload
	if training_analytics_channel:
	training_analytics_channel.training_started(
	behavior_name, trainer_config
	)
	elif req.cmd == EnvironmentCommand.RESET:
	env.reset()
	all_step_result = _generate_all_results()
	_send_response(EnvironmentCommand.RESET, all_step_result)
	elif req.cmd == EnvironmentCommand.CLOSE:
	break
	except (
	KeyboardInterrupt,
	UnityCommunicationException,
	UnityTimeOutException,
	UnityEnvironmentException,
	UnityCommunicatorStoppedException,
	) as ex:
	logger.debug(f"UnityEnvironment worker {worker_id}: environment stopping.")
	step_queue.put(
	EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
	)
	_send_response(EnvironmentCommand.ENV_EXITED, ex)
	except Exception as ex:
	logger.exception(
	f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."
	)
	step_queue.put(
	EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
	)
	_send_response(EnvironmentCommand.ENV_EXITED, ex)
	finally:
	logger.debug(f"UnityEnvironment worker {worker_id} closing.")
	if env is not None:
	env.close()
	logger.debug(f"UnityEnvironment worker {worker_id} done.")
	parent_conn.close()
	step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None))
	step_queue.close()


	class SubprocessEnvManager(EnvManager):
	def __init__(
	self,
	env_factory: Callable[[int, List[SideChannel]], BaseEnv],
	run_options: RunOptions,
	n_env: int = 1,
	):
	super().__init__()
	self.env_workers: List[UnityEnvWorker] = []
	self.step_queue: Queue = Queue()
	self.workers_alive = 0
	self.env_factory = env_factory
	self.run_options = run_options
	self.env_parameters: Optional[Dict] = None
	# Each worker is correlated with a list of times they restarted within the last time period.
	self.recent_restart_timestamps: List[List[datetime.datetime]] = [
	[] for _ in range(n_env)
	]
	self.restart_counts: List[int] = [0] * n_env
	for worker_idx in range(n_env):
	self.env_workers.append(
	self.create_worker(
	worker_idx, self.step_queue, env_factory, run_options
	)
	)
	self.workers_alive += 1

	@staticmethod
	def create_worker(
	worker_id: int,
	step_queue: Queue,
	env_factory: Callable[[int, List[SideChannel]], BaseEnv],
	run_options: RunOptions,
	) -> UnityEnvWorker:
	parent_conn, child_conn = Pipe()

	# Need to use cloudpickle for the env factory function since function objects aren't picklable
	# on Windows as of Python 3.6.
	pickled_env_factory = cloudpickle.dumps(env_factory)
	child_process = Process(
	target=worker,
	args=(
	child_conn,
	step_queue,
	pickled_env_factory,
	worker_id,
	run_options,
	logger.level,
	),
	)
	child_process.start()
	return UnityEnvWorker(child_process, worker_id, parent_conn)

	def _queue_steps(self) -> None:
	for env_worker in self.env_workers:
	if not env_worker.waiting:
	env_action_info = self._take_step(env_worker.previous_step)
	env_worker.previous_all_action_info = env_action_info
	env_worker.send(EnvironmentCommand.STEP, env_action_info)
	env_worker.waiting = True

	def _restart_failed_workers(self, first_failure: EnvironmentResponse) -> None:
	if first_failure.cmd != EnvironmentCommand.ENV_EXITED:
	return
	# Drain the step queue to make sure all workers are paused and we have found all concurrent errors.
	# Pausing all training is needed since we need to reset all pending training steps as they could be corrupted.
	other_failures: Dict[int, Exception] = self._drain_step_queue()
	# TODO: Once we use python 3.9 switch to using the \| operator to combine dicts.
	failures: Dict[int, Exception] = {
	**{first_failure.worker_id: first_failure.payload},
	**other_failures,
	}
	for worker_id, ex in failures.items():
	self._assert_worker_can_restart(worker_id, ex)
	logger.warning(f"Restarting worker[{worker_id}] after '{ex}'")
	self.recent_restart_timestamps[worker_id].append(datetime.datetime.now())
	self.restart_counts[worker_id] += 1
	self.env_workers[worker_id] = self.create_worker(
	worker_id, self.step_queue, self.env_factory, self.run_options
	)
	# The restarts were successful, clear all the existing training trajectories so we don't use corrupted or
	# outdated data.
	self.reset(self.env_parameters)

	def _drain_step_queue(self) -> Dict[int, Exception]:
	"""
	Drains all steps out of the step queue and returns all exceptions from crashed workers.
	This will effectively pause all workers so that they won't do anything until _queue_steps is called.
	"""
	all_failures = {}
	workers_still_pending = {w.worker_id for w in self.env_workers if w.waiting}
	deadline = datetime.datetime.now() + datetime.timedelta(minutes=1)
	while workers_still_pending and deadline > datetime.datetime.now():
	try:
	while True:
	step: EnvironmentResponse = self.step_queue.get_nowait()
	if step.cmd == EnvironmentCommand.ENV_EXITED:
	workers_still_pending.add(step.worker_id)
	all_failures[step.worker_id] = step.payload
	else:
	workers_still_pending.remove(step.worker_id)
	self.env_workers[step.worker_id].waiting = False
	except EmptyQueueException:
	pass
	if deadline < datetime.datetime.now():
	still_waiting = {w.worker_id for w in self.env_workers if w.waiting}
	raise TimeoutError(f"Workers {still_waiting} stuck in waiting state")
	return all_failures

	def _assert_worker_can_restart(self, worker_id: int, exception: Exception) -> None:
	"""
	Checks if we can recover from an exception from a worker.
	If the restart limit is exceeded it will raise a UnityCommunicationException.
	If the exception is not recoverable it re-raises the exception.
	"""
	if (
	isinstance(exception, UnityCommunicationException)
	or isinstance(exception, UnityTimeOutException)
	or isinstance(exception, UnityEnvironmentException)
	or isinstance(exception, UnityCommunicatorStoppedException)
	):
	if self._worker_has_restart_quota(worker_id):
	return
	else:
	logger.error(
	f"Worker {worker_id} exceeded the allowed number of restarts."
	)
	raise exception
	raise exception

	def _worker_has_restart_quota(self, worker_id: int) -> bool:
	self._drop_old_restart_timestamps(worker_id)
	max_lifetime_restarts = self.run_options.env_settings.max_lifetime_restarts
	max_limit_check = (
	max_lifetime_restarts == -1
	or self.restart_counts[worker_id] < max_lifetime_restarts
	)

	rate_limit_n = self.run_options.env_settings.restarts_rate_limit_n
	rate_limit_check = (
	rate_limit_n == -1
	or len(self.recent_restart_timestamps[worker_id]) < rate_limit_n
	)

	return rate_limit_check and max_limit_check

	def _drop_old_restart_timestamps(self, worker_id: int) -> None:
	"""
	Drops environment restart timestamps that are outside of the current window.
	"""

	def _filter(t: datetime.datetime) -> bool:
	return t > datetime.datetime.now() - datetime.timedelta(
	seconds=self.run_options.env_settings.restarts_rate_limit_period_s
	)

	self.recent_restart_timestamps[worker_id] = list(
	filter(_filter, self.recent_restart_timestamps[worker_id])
	)

	def _step(self) -> List[EnvironmentStep]:
	# Queue steps for any workers which aren't in the "waiting" state.
	self._queue_steps()

	worker_steps: List[EnvironmentResponse] = []
	step_workers: Set[int] = set()
	# Poll the step queue for completed steps from environment workers until we retrieve
	# 1 or more, which we will then return as StepInfos
	while len(worker_steps) < 1:
	try:
	while True:
	step: EnvironmentResponse = self.step_queue.get_nowait()
	if step.cmd == EnvironmentCommand.ENV_EXITED:
	# If even one env exits try to restart all envs that failed.
	self._restart_failed_workers(step)
	# Clear state and restart this function.
	worker_steps.clear()
	step_workers.clear()
	self._queue_steps()
	elif step.worker_id not in step_workers:
	self.env_workers[step.worker_id].waiting = False
	worker_steps.append(step)
	step_workers.add(step.worker_id)
	except EmptyQueueException:
	pass
	step_infos = self._postprocess_steps(worker_steps)
	return step_infos

	def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
	while any(ew.waiting for ew in self.env_workers):
	if not self.step_queue.empty():
	step = self.step_queue.get_nowait()
	self.env_workers[step.worker_id].waiting = False
	# Send config to environment
	self.set_env_parameters(config)
	# First enqueue reset commands for all workers so that they reset in parallel
	for ew in self.env_workers:
	ew.send(EnvironmentCommand.RESET, config)
	# Next (synchronously) collect the reset observations from each worker in sequence
	for ew in self.env_workers:
	ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {}, {})
	return list(map(lambda ew: ew.previous_step, self.env_workers))

	def set_env_parameters(self, config: Dict = None) -> None:
	"""
	Sends environment parameter settings to C# via the
	EnvironmentParametersSidehannel for each worker.
	:param config: Dict of environment parameter keys and values
	"""
	self.env_parameters = config
	for ew in self.env_workers:
	ew.send(EnvironmentCommand.ENVIRONMENT_PARAMETERS, config)

	def on_training_started(
	self, behavior_name: str, trainer_settings: TrainerSettings
	) -> None:
	"""
	Handle traing starting for a new behavior type. Generally nothing is necessary here.
	:param behavior_name:
	:param trainer_settings:
	:return:
	"""
	for ew in self.env_workers:
	ew.send(
	EnvironmentCommand.TRAINING_STARTED, (behavior_name, trainer_settings)
	)

	@property
	def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
	result: Dict[BehaviorName, BehaviorSpec] = {}
	for worker in self.env_workers:
	worker.send(EnvironmentCommand.BEHAVIOR_SPECS)
	result.update(worker.recv().payload)
	return result

	def close(self) -> None:
	logger.debug("SubprocessEnvManager closing.")
	for env_worker in self.env_workers:
	env_worker.request_close()
	# Pull messages out of the queue until every worker has CLOSED or we time out.
	deadline = time.time() + WORKER_SHUTDOWN_TIMEOUT_S
	while self.workers_alive > 0 and time.time() < deadline:
	try:
	step: EnvironmentResponse = self.step_queue.get_nowait()
	env_worker = self.env_workers[step.worker_id]
	if step.cmd == EnvironmentCommand.CLOSED and not env_worker.closed:
	env_worker.closed = True
	self.workers_alive -= 1
	# Discard all other messages.
	except EmptyQueueException:
	pass
	self.step_queue.close()
	# Sanity check to kill zombie workers and report an issue if they occur.
	if self.workers_alive > 0:
	logger.error("SubprocessEnvManager had workers that didn't signal shutdown")
	for env_worker in self.env_workers:
	if not env_worker.closed and env_worker.process.is_alive():
	env_worker.process.terminate()
	logger.error(
	"A SubprocessEnvManager worker did not shut down correctly so it was forcefully terminated."
	)
	self.step_queue.join_thread()

	def _postprocess_steps(
	self, env_steps: List[EnvironmentResponse]
	) -> List[EnvironmentStep]:
	step_infos = []
	timer_nodes = []
	for step in env_steps:
	payload: StepResponse = step.payload
	env_worker = self.env_workers[step.worker_id]
	new_step = EnvironmentStep(
	payload.all_step_result,
	step.worker_id,
	env_worker.previous_all_action_info,
	payload.environment_stats,
	)
	step_infos.append(new_step)
	env_worker.previous_step = new_step

	if payload.timer_root:
	timer_nodes.append(payload.timer_root)

	if timer_nodes:
	with hierarchical_timer("workers") as main_timer_node:
	for worker_timer_node in timer_nodes:
	main_timer_node.merge(
	worker_timer_node, root_name="worker_root", is_parallel=True
	)

	return step_infos

	@timed
	def _take_step(self, last_step: EnvironmentStep) -> Dict[BehaviorName, ActionInfo]:
	all_action_info: Dict[str, ActionInfo] = {}
	for brain_name, step_tuple in last_step.current_all_step_result.items():
	if brain_name in self.policies:
	all_action_info[brain_name] = self.policies[brain_name].get_action(
	step_tuple[0], last_step.worker_id
	)
	return all_action_info