"# ML-Agents run with Stable Baselines 3\n",
"## Setup"
"#@title Install Rendering Dependencies { display-mode: \"form\" }\n",
"#@markdown (You only need to run this code when using Colab's hosted runtime)\n",
"import os\n",
"from IPython.display import HTML, display\n",
"def progress(value, max=100):\n",
" return HTML(\"\"\"\n",
" \n",
" \"\"\".format(value=value, max=max))\n",
"pro_bar = display(progress(0, 100), display_id=True)\n",
" import google.colab\n",
" INSTALL_XVFB = True\n",
"except ImportError:\n",
" with open('frame-buffer', 'w') as writefile:\n",
" writefile.write(\"\"\"#taken from https://gist.github.com/jterrace/2911875\n",
"XVFBARGS=\":1 -screen 0 1024x768x24 -ac +extension GLX +render -noreset\"\n",
"case \"$1\" in\n",
" start)\n",
" echo -n \"Starting virtual X frame buffer: Xvfb\"\n",
" /sbin/start-stop-daemon --start --quiet --pidfile $PIDFILE --make-pidfile --background --exec $XVFB -- $XVFBARGS\n",
" echo \".\"\n",
" ;;\n",
" stop)\n",
" echo -n \"Stopping virtual X frame buffer: Xvfb\"\n",
" /sbin/start-stop-daemon --stop --quiet --pidfile $PIDFILE\n",
" rm $PIDFILE\n",
" echo \".\"\n",
" ;;\n",
" restart)\n",
" $0 stop\n",
" $0 start\n",
" ;;\n",
" *)\n",
" echo \"Usage: /etc/init.d/xvfb {start|stop|restart}\"\n",
" exit 1\n",
"exit 0\n",
" \"\"\")\n",
" !sudo apt-get update\n",
" pro_bar.update(progress(10, 100))\n",
" !sudo DEBIAN_FRONTEND=noninteractive apt install -y daemon wget gdebi-core build-essential libfontenc1 libfreetype6 xorg-dev xorg\n",
" pro_bar.update(progress(20, 100))\n",
" !wget http://security.ubuntu.com/ubuntu/pool/main/libx/libxfont/libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb 2>&1\n",
" pro_bar.update(progress(30, 100))\n",
" !wget --output-document xvfb.deb http://security.ubuntu.com/ubuntu/pool/universe/x/xorg-server/xvfb_1.18.4-0ubuntu0.12_amd64.deb 2>&1\n",
" pro_bar.update(progress(40, 100))\n",
" !sudo dpkg -i libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb 2>&1\n",
" pro_bar.update(progress(50, 100))\n",
" !sudo dpkg -i xvfb.deb 2>&1\n",
" pro_bar.update(progress(70, 100))\n",
" !rm libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb\n",
" pro_bar.update(progress(80, 100))\n",
" !rm xvfb.deb\n",
" pro_bar.update(progress(90, 100))\n",
" !bash frame-buffer start\n",
" os.environ[\"DISPLAY\"] = \":1\"\n",
"pro_bar.update(progress(100, 100))"
"### Installing ml-agents"
" import mlagents\n",
" print(\"ml-agents already installed\")\n",
"except ImportError:\n",
" !python -m pip install -q mlagents==0.30.0\n",
" print(\"Installed ml-agents\")"
"## Run the Environment"
"### Import dependencies and set some high level parameters."
"from dataclasses import dataclass\n",
"from pathlib import Path\n",
"from typing import Callable, Any\n",
"import gym\n",
"from gym import Env\n",
"from stable_baselines3 import PPO\n",
"from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n",
"from supersuit import observation_lambda_v0\n",
"from mlagents_envs.environment import UnityEnvironment\n",
"from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper\n",
"from mlagents_envs.registry import UnityEnvRegistry, default_registry\n",
"from mlagents_envs.side_channel.engine_configuration_channel import (\n",
" EngineConfig,\n",
" EngineConfigurationChannel,\n",
"NUM_ENVS = 8"
"### Environment and Engine Configurations"
"# Default values from CLI (See cli_utils.py)\n",
"DEFAULT_ENGINE_CONFIG = EngineConfig(\n",
" width=84,\n",
" height=84,\n",
" quality_level=4,\n",
" time_scale=20,\n",
" target_frame_rate=-1,\n",
" capture_frame_rate=60,\n",
"# Some config subset of an actual config.yaml file for MLA.\n",
"class LimitedConfig:\n",
" # The local path to a Unity executable or the name of an entry in the registry.\n",
" env_path_or_name: str\n",
" base_port: int\n",
" base_seed: int = 0\n",
" num_env: int = 1\n",
" engine_config: EngineConfig = DEFAULT_ENGINE_CONFIG\n",
" visual_obs: bool = False\n",
" # TODO: Decide if we should just tell users to always use MultiInputPolicy so we can simplify the user workflow.\n",
" # WARNING: Make sure to use MultiInputPolicy if you turn this on.\n",
" allow_multiple_obs: bool = False\n",
" env_registry: UnityEnvRegistry = default_registry"
"### Unity Environment SB3 Factory"
"def _unity_env_from_path_or_registry(\n",
" env: str, registry: UnityEnvRegistry, **kwargs: Any\n",
") -> UnityEnvironment:\n",
" if Path(env).expanduser().absolute().exists():\n",
" return UnityEnvironment(file_name=env, **kwargs)\n",
" elif env in registry:\n",
" return registry.get(env).make(**kwargs)\n",
" else:\n",
" raise ValueError(f\"Environment '{env}' wasn't a local path or registry entry\")\n",
" \n",
"def make_mla_sb3_env(config: LimitedConfig, **kwargs: Any) -> VecEnv:\n",
" def handle_obs(obs, space):\n",
" if isinstance(space, gym.spaces.Tuple):\n",
" if len(space) == 1:\n",
" return obs[0]\n",
" # Turn the tuple into a dict (stable baselines can handle spaces.Dict but not spaces.Tuple).\n",
" return {str(i): v for i, v in enumerate(obs)}\n",
" return obs\n",
" def handle_obs_space(space):\n",
" if isinstance(space, gym.spaces.Tuple):\n",
" if len(space) == 1:\n",
" return space[0]\n",
" # Turn the tuple into a dict (stable baselines can handle spaces.Dict but not spaces.Tuple).\n",
" return gym.spaces.Dict({str(i): v for i, v in enumerate(space)})\n",
" return space\n",
" def create_env(env: str, worker_id: int) -> Callable[[], Env]:\n",
" def _f() -> Env:\n",
" engine_configuration_channel = EngineConfigurationChannel()\n",
" engine_configuration_channel.set_configuration(config.engine_config)\n",
" kwargs[\"side_channels\"] = kwargs.get(\"side_channels\", []) + [\n",
" engine_configuration_channel\n",
" ]\n",
" unity_env = _unity_env_from_path_or_registry(\n",
" env=env,\n",
" registry=config.env_registry,\n",
" worker_id=worker_id,\n",
" base_port=config.base_port,\n",
" seed=config.base_seed + worker_id,\n",
" **kwargs,\n",
" )\n",
" new_env = UnityToGymWrapper(\n",
" unity_env=unity_env,\n",
" uint8_visual=config.visual_obs,\n",
" allow_multiple_obs=config.allow_multiple_obs,\n",
" )\n",
" new_env = observation_lambda_v0(new_env, handle_obs, handle_obs_space)\n",
" return new_env\n",
" return _f\n",
" env_facts = [\n",
" create_env(config.env_path_or_name, worker_id=x) for x in range(config.num_env)\n",
" ]\n",
" return SubprocVecEnv(env_facts)"
"### Start Environment from the registry"
"# -----------------\n",
"# This code is used to close an env that might not have been closed before\n",
" env.close()\n",
" pass\n",
"# -----------------\n",
"env = make_mla_sb3_env(\n",
" config=LimitedConfig(\n",
" env_path_or_name='Basic', # Can use any name from a registry or a path to your own unity build.\n",
" base_port=6006,\n",
" base_seed=42,\n",
" num_env=NUM_ENVS,\n",
" allow_multiple_obs=True,\n",
" ),\n",
" no_graphics=True, # Set to false if you are running locally and want to watch the environments move around as they train.\n",
"### Create the model"
"# 250K should train to a reward ~= 0.90 for the \"Basic\" environment.\n",
"# We set the value lower here to demonstrate just a small amount of trianing.\n",
"BATCH_SIZE = 32\n",
"BUFFER_SIZE = 256\n",
"UPDATES = 50\n",
"BETA = 0.0005\n",
"N_EPOCHS = 3 \n",
"# Helps gather stats for our eval() calls later so we can see reward stats.\n",
"env = VecMonitor(env)\n",
"#Policy and Value function with 2 layers of 128 units each and no shared layers.\n",
"policy_kwargs = {\"net_arch\" : [{\"pi\": [32,32], \"vf\": [32,32]}]}\n",
"model = PPO(\n",
" \"MlpPolicy\",\n",
" env,\n",
" verbose=1,\n",
" learning_rate=lambda progress: 0.0003 * (1.0 - progress),\n",
" clip_range=lambda progress: 0.2 * (1.0 - progress),\n",
" clip_range_vf=lambda progress: 0.2 * (1.0 - progress),\n",
" # Uncomment this if you want to log tensorboard results when running this notebook locally.\n",
" # tensorboard_log=\"results\",\n",
" policy_kwargs=policy_kwargs,\n",
" n_steps=int(STEPS_PER_UPDATE),\n",
" batch_size=BATCH_SIZE,\n",
" n_epochs=N_EPOCHS,\n",
" ent_coef=BETA,\n",
"### Train the model"
"# 0.93 is considered solved for the Basic environment\n",
"for i in range(UPDATES):\n",
" print(f\"Training round {i + 1}/{UPDATES}\")\n",
" # NOTE: rest_num_timesteps should only happen the first time so that tensorboard logs are consistent.\n",
" model.learn(total_timesteps=BUFFER_SIZE, reset_num_timesteps=(i == 0))\n",
" model.policy.eval()"
"### Close the environment\n",
"Frees up the ports being used."
"print(\"Closed environment\")\n"
