Spaces:

zjowowen
/

gomoku

Sleeping

File size: 70,855 Bytes

079c32c

import copy
import enum
from collections import namedtuple
from operator import attrgetter
from functools import reduce

import numpy as np
import math
import random
from ditk import logging
from easydict import EasyDict
import pysc2.env.sc2_env as sc2_env
from pysc2.env.sc2_env import SC2Env, Agent, MAX_STEP_COUNT, get_default, crop_and_deduplicate_names
from pysc2.lib import protocol
from s2clientprotocol import common_pb2 as sc_common
from s2clientprotocol import debug_pb2 as d_pb
from s2clientprotocol import sc2api_pb2 as sc_pb
from ding.envs import BaseEnv
from ding.envs.common.env_element import EnvElement, EnvElementInfo
from ding.utils import ENV_REGISTRY, deep_merge_dicts

from .smac_map import get_map_params
from .smac_action import SMACAction, distance
from .smac_reward import SMACReward

races = {
    "R": sc_common.Random,
    "P": sc_common.Protoss,
    "T": sc_common.Terran,
    "Z": sc_common.Zerg,
}

ORIGINAL_AGENT = "me"
OPPONENT_AGENT = "opponent"

SUPPORT_MAPS = [
    "SMAC_Maps_two_player/3s5z.SC2Map",
    "SMAC_Maps_two_player/3m.SC2Map",
    "GBU_Maps/infestor_viper.sc2map",
]

FORCE_RESTART_INTERVAL = 50000


class Direction(enum.IntEnum):
    NORTH = 0
    SOUTH = 1
    EAST = 2
    WEST = 3


@ENV_REGISTRY.register('smac')
class SMACEnv(SC2Env, BaseEnv):
    """
    This environment provides the interface for both single agent and multiple agents (two players) in
    SC2 environment.
    """

    SMACTimestep = namedtuple('SMACTimestep', ['obs', 'reward', 'done', 'info', 'episode_steps'])
    SMACEnvInfo = namedtuple('SMACEnvInfo', ['agent_num', 'obs_space', 'act_space', 'rew_space', 'episode_limit'])
    config = dict(
        two_player=False,
        mirror_opponent=False,
        reward_type="original",
        save_replay_episodes=None,
        difficulty=7,
        reward_death_value=10,
        reward_win=200,
        obs_alone=False,
        game_steps_per_episode=None,
        reward_only_positive=True,
        death_mask=False,
        special_global_state=False,
        # add map's center location ponit or not
        add_center_xy=True,
        independent_obs=False,
        # add agent's id information or not in special global state
        state_agent_id=True,
    )

    def __init__(
        self,
        cfg,
    ):
        cfg = deep_merge_dicts(EasyDict(self.config), cfg)
        self.cfg = cfg
        self.save_replay_episodes = cfg.save_replay_episodes
        assert (self.save_replay_episodes is None) or isinstance(
            self.save_replay_episodes, int
        )  # Denote the number of replays to save
        self.two_player = cfg.two_player
        self.difficulty = cfg.difficulty
        self.obs_alone = cfg.obs_alone
        self.game_steps_per_episode = cfg.game_steps_per_episode

        map_name = cfg.map_name
        assert map_name is not None
        map_params = get_map_params(map_name)
        self.reward_only_positive = cfg.reward_only_positive
        self.difficulty = cfg.difficulty
        self.obs_alone = cfg.obs_alone
        self.players, self.num_players = self._get_players(
            "agent_vs_agent" if self.two_player else "game_vs_bot",
            player1_race=map_params["a_race"],
            player2_race=map_params["b_race"]
        )
        self._map_name = map_name

        # SMAC used
        self.n_agents = map_params["n_agents"]
        self.n_enemies = map_params["n_enemies"]
        self.episode_limit = map_params["limit"]

        self._agent_race = map_params["a_race"]
        self._bot_race = map_params["b_race"]
        self.shield_bits_ally = 1 if self._agent_race == "P" else 0
        self.shield_bits_enemy = 1 if self._bot_race == "P" else 0
        self.unit_type_bits = map_params["unit_type_bits"]
        self.map_type = map_params["map_type"]

        self.agents = {}
        self.enemies = {}
        self._episode_count = 0
        self._episode_steps = 0
        self._total_steps = 0
        self._next_reset_steps = FORCE_RESTART_INTERVAL

        self._obs = None
        self.battles_won = 0
        self.battles_game = 0
        self.timeouts = 0
        self.force_restarts = 0
        self.last_stats = None

        self._min_unit_type = 0
        self.marine_id = self.marauder_id = self.medivac_id = 0
        self.hydralisk_id = self.zergling_id = self.baneling_id = 0
        self.stalker_id = self.colossus_id = self.zealot_id = 0

        self.add_center_xy = cfg.add_center_xy
        self.state_agent_id = cfg.state_agent_id
        self.death_mask = cfg.death_mask
        self.special_global_state = cfg.special_global_state

        # reward
        self.reward_death_value = cfg.reward_death_value
        self.reward_win = cfg.reward_win
        self.reward_defeat = 0
        self.reward_negative_scale = 0.5
        self.reward_type = cfg.reward_type
        self.max_reward = (self.n_enemies * self.reward_death_value + self.reward_win)
        self.obs_pathing_grid = False
        self.obs_own_health = True
        self.obs_all_health = True
        self.obs_instead_of_state = False
        self.obs_last_action = True
        self.obs_terrain_height = False
        self.obs_timestep_number = False
        self.state_last_action = True
        self.state_timestep_number = False
        if self.obs_all_health:
            self.obs_own_health = True
        self.n_obs_pathing = 8
        self.n_obs_height = 9
        self._move_amount = 2
        self.continuing_episode = False

        self._seed = None
        self._launch_env_flag = True
        self.just_force_restarts = False

        # Set to false if you need structured observation / state
        self.flatten_observation = True
        self.mirror_opponent = cfg.mirror_opponent
        if self.mirror_opponent:
            self.flatten_observation = False

        # Opponent related variables
        self.battles_won_opponent = 0
        self.battles_defeat = 0
        self._min_unit_type_opponent = 0
        self.marine_id_opponent = self.marauder_id_opponent = self.medivac_id_opponent = 0
        self.hydralisk_id_opponent = self.zergling_id_opponent = self.baneling_id_opponent = 0
        self.stalker_id_opponent = self.colossus_id_opponent = self.zealot_id_opponent = 0
        self.max_distance_x = 0
        self.max_distance_y = 0
        self.map_x = 0
        self.map_y = 0

        self.previous_ally_units = None
        self.previous_enemy_units = None

        self.independent_obs = cfg.independent_obs

        self.action_helper = SMACAction(self.n_agents, self.n_enemies, self.two_player, self.mirror_opponent)
        self.reward_helper = SMACReward(
            self.n_agents,
            self.n_enemies,
            self.two_player,
            self.reward_type,
            self.max_reward,
            reward_only_positive=self.reward_only_positive
        )

        self._observation_space = self.get_obs_space()
        self._action_space = self.action_helper.info(),
        self._reward_space = self.reward_helper.info(),

    def seed(self, seed, dynamic_seed=False):
        self._seed = seed

    def _create_join(self):
        if self.two_player:
            for m in self._maps:
                m.directory = "SMAC_Maps_two_player"
                map_path = m.path
                assert map_path in SUPPORT_MAPS, "We only support the following maps: {}. Please move " \
                                                 "the maps in evaluate/sources/SMAC_Maps_two_player " \
                                                 "to the maps folder of SC2."
        # copy and overwrite original implementation
        map_inst = random.choice(self._maps)
        self._map_name = map_inst.name

        self._step_mul = max(1, self._default_step_mul or map_inst.step_mul)
        self._score_index = get_default(self._default_score_index, map_inst.score_index)
        self._score_multiplier = get_default(self._default_score_multiplier, map_inst.score_multiplier)
        self._episode_length = get_default(self._default_episode_length, map_inst.game_steps_per_episode)
        if self._episode_length <= 0 or self._episode_length > MAX_STEP_COUNT:
            self._episode_length = MAX_STEP_COUNT

        # Create the game. Set the first instance as the host.
        create = sc_pb.RequestCreateGame(disable_fog=self._disable_fog, realtime=self._realtime)

        if self._battle_net_map:
            create.battlenet_map_name = map_inst.battle_net
        else:
            create.local_map.map_path = map_inst.path
            map_data = map_inst.data(self._run_config)
            if self._num_agents == 1:
                create.local_map.map_data = map_data
            else:
                # Save the maps so they can access it. Don't do it in parallel since SC2
                # doesn't respect tmpdir on windows, which leads to a race condition:
                # https://github.com/Blizzard/s2client-proto/issues/102
                for c in self._controllers:
                    c.save_map(map_inst.path, map_data)
        if self._random_seed is not None:
            create.random_seed = self._random_seed
        for p in self._players:
            if isinstance(p, Agent):
                create.player_setup.add(type=sc_pb.Participant)
            else:
                create.player_setup.add(
                    type=sc_pb.Computer,
                    race=random.choice(p.race),
                    difficulty=p.difficulty,
                    ai_build=random.choice(p.build)
                )
        if self._num_agents > 1:
            self._controllers[1].create_game(create)
        else:
            self._controllers[0].create_game(create)

        # Create the join requests.
        agent_players = [p for p in self._players if isinstance(p, Agent)]
        self.sanitized_names = crop_and_deduplicate_names(p.name for p in agent_players)
        join_reqs = []
        for p, name, interface in zip(agent_players, self.sanitized_names, self._interface_options):
            join = sc_pb.RequestJoinGame(options=interface)
            join.race = random.choice(p.race)
            join.player_name = name
            if self._ports:
                join.shared_port = 0  # unused
                join.server_ports.game_port = self._ports[0]
                join.server_ports.base_port = self._ports[1]
                for i in range(self._num_agents - 1):
                    join.client_ports.add(game_port=self._ports[i * 2 + 2], base_port=self._ports[i * 2 + 3])
            join_reqs.append(join)

        # Join the game. This must be run in parallel because Join is a blocking
        # call to the game that waits until all clients have joined.
        self._parallel.run((c.join_game, join) for c, join in zip(self._controllers, join_reqs))

        self._game_info = self._parallel.run(c.game_info for c in self._controllers)
        for g, interface in zip(self._game_info, self._interface_options):
            if g.options.render != interface.render:
                logging.warning(
                    "Actual interface options don't match requested options:\n"
                    "Requested:\n%s\n\nActual:\n%s", interface, g.options
                )

        # original pysc2 case
        # if require_features:
        #   self._features = [
        #        features.features_from_game_info(
        #            game_info=g, agent_interface_format=aif, map_name=self._map_name)
        #        for g, aif in zip(self._game_info, self._interface_formats)]
        # smac case
        self._features = None

    def _get_players(self, game_type, player1_race, player2_race):
        if game_type == 'game_vs_bot':
            agent_num = 1
            print('difficulty', self.difficulty)
            players = [sc2_env.Agent(races[player1_race]), sc2_env.Bot(races[player2_race], self.difficulty)]
        elif game_type == 'agent_vs_agent':
            agent_num = 2
            players = [sc2_env.Agent(races[player1_race]), sc2_env.Agent(races[player2_race])]
        else:
            raise KeyError("invalid game_type: {}".format(game_type))
        return players, agent_num

    def _launch(self):

        print("*****LAUNCH FUNCTION CALLED*****")

        # necessary for compatibility with pysc2
        from absl import flags
        flags.FLAGS(['smac'])
        agent_interface_format = sc2_env.parse_agent_interface_format(use_raw_units=True)

        SC2Env.__init__(
            self,
            map_name=self.map_name,
            battle_net_map=False,
            players=self.players,
            agent_interface_format=agent_interface_format,
            discount=None,
            discount_zero_after_timeout=False,
            visualize=False,
            step_mul=8,
            realtime=False,
            save_replay_episodes=self.save_replay_episodes,
            replay_dir=None if self.save_replay_episodes is None else ".",
            replay_prefix=None,
            game_steps_per_episode=self.game_steps_per_episode,
            score_index=None,
            score_multiplier=None,
            random_seed=self._seed,
            disable_fog=False,
            ensure_available_actions=True,
            version=None
        )

        self._launch_env_flag = True

        game_info = self._game_info[0]
        map_info = game_info.start_raw
        map_play_area_min = map_info.playable_area.p0
        map_play_area_max = map_info.playable_area.p1
        self.max_distance_x = map_play_area_max.x - map_play_area_min.x
        self.max_distance_y = map_play_area_max.y - map_play_area_min.y
        self.map_x = map_info.map_size.x
        self.map_y = map_info.map_size.y

        self.action_helper.update(map_info, self.map_x, self.map_y)

    def _restart_episode(self):
        """Restart the environment by killing all units on the map.
        There is a trigger in the SC2Map file, which restarts the
        episode when there are no units left.
        """
        try:
            run_commands = [
                (
                    self._controllers[0].debug,
                    d_pb.DebugCommand(
                        kill_unit=d_pb.DebugKillUnit(
                            tag=[unit.tag for unit in self.agents.values() if unit.health > 0] +
                            [unit.tag for unit in self.enemies.values() if unit.health > 0]
                        )
                    )
                )
            ]
            if self.two_player:
                run_commands.append(
                    (self._controllers[1].debug, d_pb.DebugCommand(kill_unit=d_pb.DebugKillUnit(tag=[])))
                )
            # Kill all units on the map.
            self._parallel.run(run_commands)
            # Forward 2 step to make sure all units revive.
            ret = self._parallel.run((c.step, 2) for c in self._controllers)
        except (protocol.ProtocolError, protocol.ConnectionError) as e:
            print("Error happen in _restart. Error: ", e)
            self.full_restart()

    def full_restart(self):
        self.close()
        self._launch()
        self.force_restarts += 1
        self.just_force_restarts = True

    def reset(self):
        self._episode_steps = 0
        self._final_eval_fake_reward = 0.
        old_unit_tags = set(u.tag for u in self.agents.values()).union(set(u.tag for u in self.enemies.values()))

        if self.just_force_restarts:
            old_unit_tags = set()
            self.just_force_restarts = False

        if self._launch_env_flag:
            # Launch StarCraft II
            print("*************LAUNCH TOTAL GAME********************")
            self._launch()
            self._launch_env_flag = False
        elif (self._total_steps > self._next_reset_steps) or (self.save_replay_episodes is not None):
            # Avoid hitting the real episode limit of SC2 env
            print("We are full restarting the environment! save_replay_episodes: ", self.save_replay_episodes)
            self.full_restart()
            old_unit_tags = set()
            self._next_reset_steps += FORCE_RESTART_INTERVAL
        else:
            self._restart_episode()

        # Information kept for counting the reward
        self.win_counted = False
        self.defeat_counted = False

        self.action_helper.reset()

        self.previous_ally_units = None
        self.previous_enemy_units = None

        # if self.heuristic_ai:
        #     self.heuristic_targets = [None] * self.n_agents

        count = 0
        while count <= 5:
            self._update_obs()
            #print("INTERNAL INIT UNIT BEGIN")
            init_flag = self.init_units(old_unit_tags)
            #print("INTERNAL INIT UNIT OVER", init_flag)
            count += 1
            if init_flag:
                break
            else:
                old_unit_tags = set()
        if count >= 5:
            raise RuntimeError("reset 5 times error")

        self.reward_helper.reset(self.max_reward)

        assert all(u.health > 0 for u in self.agents.values())
        assert all(u.health > 0 for u in self.enemies.values())

        if not self.two_player:
            if self.obs_alone:
                agent_state, agent_alone_state, agent_alone_padding_state = self.get_obs()
                return {
                    'agent_state': agent_state,
                    'agent_alone_state': agent_alone_state,
                    'agent_alone_padding_state': agent_alone_padding_state,
                    'global_state': self.get_state(),
                    'action_mask': self.get_avail_actions()
                }
            elif self.independent_obs:
                return {
                    'agent_state': self.get_obs(),
                    'global_state': self.get_obs(),
                    'action_mask': self.get_avail_actions(),
                }
            elif self.special_global_state:
                return {
                    'agent_state': self.get_obs(),
                    'global_state': self.get_global_special_state(),
                    'action_mask': self.get_avail_actions(),
                }
            else:
                return {
                    'agent_state': self.get_obs(),
                    'global_state': self.get_state(),
                    'action_mask': self.get_avail_actions(),
                }

        return {
            'agent_state': {
                ORIGINAL_AGENT: self.get_obs(),
                OPPONENT_AGENT: self.get_obs(True)
            },
            'global_state': {
                ORIGINAL_AGENT: self.get_state(),
                OPPONENT_AGENT: self.get_state(True)
            },
            'action_mask': {
                ORIGINAL_AGENT: self.get_avail_actions(),
                OPPONENT_AGENT: self.get_avail_actions(True),
            },
        }

    def _submit_actions(self, actions):
        if self.two_player:
            # actions is a dict with 'me' and 'opponent' keys.
            actions_me, actions_opponent = actions[ORIGINAL_AGENT], actions[OPPONENT_AGENT]
            self._parallel.run(
                [
                    (self._controllers[0].actions, sc_pb.RequestAction(actions=actions_me)),
                    (self._controllers[1].actions, sc_pb.RequestAction(actions=actions_opponent))
                ]
            )
            step_mul = self._step_mul
            if step_mul <= 0:
                raise ValueError("step_mul should be positive, got {}".format(step_mul))
            if not any(c.status_ended for c in self._controllers):  # May already have ended.
                self._parallel.run((c.step, step_mul) for c in self._controllers)
            self._update_obs(target_game_loop=self._episode_steps + step_mul)
        else:
            # actions is a sequence
            # Send action request
            req_actions = sc_pb.RequestAction(actions=actions)
            self._controllers[0].actions(req_actions)
            self._controllers[0].step(self._step_mul)
            self._update_obs()

    def _get_empty_action(self, old_action):
        me_act = []
        for a_id in range(self.n_agents):
            no_op = self.action_helper.get_avail_agent_actions(a_id, self, is_opponent=False)[0]
            me_act.append(0 if no_op else 1)

        if isinstance(old_action, dict):
            op_act = []
            for a_id in range(self.n_enemies):
                no_op = self.action_helper.get_avail_agent_actions(a_id, self, is_opponent=False)[0]
                op_act.append(0 if no_op else 1)
            new_action = {ORIGINAL_AGENT: me_act, OPPONENT_AGENT: op_act}
        else:
            new_action = me_act
        return new_action

    def step(self, actions, force_return_two_player=False):
        processed_actions = self.action_helper.get_action(actions, self)
        # self._submit_actions(processed_actions)
        try:
            # print("Submitting actions: ", actions)
            self._submit_actions(processed_actions)
            # raise ValueError()  # To test the functionality of restart
        except (protocol.ProtocolError, protocol.ConnectionError, ValueError) as e:
            print("Error happen in step! Error: ", e)
            self.full_restart()
            info = {'abnormal': True}
            return self.SMACTimestep(obs=None, reward=None, done=True, info=info, episode_steps=self._episode_steps)

        # Update units
        game_end_code = self.update_units()
        rewards, terminates, infos = self._collect_step_data(game_end_code, actions)

        infos["draw"] = int(not (infos["me"]["battle_won"] or infos["opponent"]["battle_won"]))

        if (not self.two_player) and (not force_return_two_player):
            rewards, terminates, new_infos = rewards[ORIGINAL_AGENT], terminates[ORIGINAL_AGENT], infos[ORIGINAL_AGENT]
            self._final_eval_fake_reward += rewards
            new_infos["battle_lost"] = infos[OPPONENT_AGENT]["battle_won"]
            new_infos["draw"] = infos["draw"]
            new_infos['eval_episode_return'] = infos['eval_episode_return']
            if 'episode_info' in infos:
                new_infos['episode_info'] = infos['episode_info']
            new_infos['fake_eval_episode_return'] = infos['fake_eval_episode_return']
            infos = new_infos
            if self.obs_alone:
                agent_state, agent_alone_state, agent_alone_padding_state = self.get_obs()
                obs = {
                    'agent_state': agent_state,
                    'agent_alone_state': agent_alone_state,
                    'agent_alone_padding_state': agent_alone_padding_state,
                    'global_state': self.get_state(),
                    'action_mask': self.get_avail_actions()
                }
            elif self.independent_obs:
                obs = {
                    'agent_state': self.get_obs(),
                    'global_state': self.get_obs(),
                    'action_mask': self.get_avail_actions(),
                }
            elif self.special_global_state:
                obs = {
                    'agent_state': self.get_obs(),
                    'global_state': self.get_global_special_state(),
                    'action_mask': self.get_avail_actions(),
                }
            else:
                obs = {
                    'agent_state': self.get_obs(),
                    'global_state': self.get_state(),
                    'action_mask': self.get_avail_actions(),
                }
        else:
            raise NotImplementedError

        return self.SMACTimestep(
            obs=copy.deepcopy(obs), reward=rewards, done=terminates, info=infos, episode_steps=self._episode_steps
        )

    def _collect_step_data(self, game_end_code, action):
        """This function is called only once at each step, no matter whether you take opponent as agent.
        We already return dicts for each term, as in Multi-agent scenario.
        """
        self._total_steps += 1
        self._episode_steps += 1

        terminated = False

        reward = self.reward_helper.get_reward(self, action, game_end_code, self.win_counted, self.defeat_counted)
        for k in reward:
            reward[k] = np.array(reward[k]).astype(np.float32)

        info = {
            ORIGINAL_AGENT: {
                "battle_won": False
            },
            OPPONENT_AGENT: {
                "battle_won": False
            },
            'eval_episode_return': 0.,
            'fake_eval_episode_return': 0.
        }

        if game_end_code is not None:
            # Battle is over
            terminated = True
            self.battles_game += 1
            if game_end_code == 1 and not self.win_counted:
                # The original agent win the game.
                self.battles_won += 1
                self.win_counted = True
                info[ORIGINAL_AGENT]["battle_won"] = True
                info[OPPONENT_AGENT]["battle_won"] = False
                info['eval_episode_return'] = 1.
            elif game_end_code == -1 and not self.defeat_counted:
                self.defeat_counted = True
                info[ORIGINAL_AGENT]["battle_won"] = False
                info[OPPONENT_AGENT]["battle_won"] = True

        elif self._episode_steps >= self.episode_limit:
            # Episode limit reached
            terminated = True
            if self.continuing_episode:
                info[ORIGINAL_AGENT]["episode_limit"] = True
                info[OPPONENT_AGENT]["episode_limit"] = True
            self.battles_game += 1
            self.timeouts += 1
            # info['eval_episode_return'] = -0.5

            # if sum(u.health + u.shield for u in self.agents.values()) >= \
            #         sum(u.health + u.shield for u in self.enemies.values()):
            #     # lj fix
            #     reward[ORIGINAL_AGENT] += 1
            #     reward[OPPONENT_AGENT] += -1
            # else:
            #     reward[ORIGINAL_AGENT] += -1
            #     reward[OPPONENT_AGENT] += 1

        if terminated:
            self._episode_count += 1
            # 1-dim to 0-dim
            # count units that are still alive
            dead_allies, dead_enemies = 0, 0
            for al_id, al_unit in self.agents.items():
                if al_unit.health == 0:
                    dead_allies += 1
            for e_id, e_unit in self.enemies.items():
                if e_unit.health == 0:
                    dead_enemies += 1

            info['episode_info'] = {
                'final_eval_fake_reward': self._final_eval_fake_reward[0],
                'dead_allies': dead_allies,
                'dead_enemies': dead_enemies
            }
            self._final_eval_fake_reward = 0.

        # PZH: Zero at first step
        if self._episode_steps == 1:
            for k in reward.keys():
                reward[k] *= 0.0
            if terminated:
                print("WARNNING! Should not terminate at the first step!")

        # Test purpose
        # reward = {k: 0 * v + 100 for k, v in reward.items()}
        info['fake_eval_episode_return'] = reward[ORIGINAL_AGENT]
        return reward, {ORIGINAL_AGENT: terminated, OPPONENT_AGENT: terminated, "__all__": terminated}, info

    def close(self):
        SC2Env.close(self)

    def init_units(self, old_unit_tags):
        count = 0
        while count < 10:
            # Sometimes not all units have yet been created by SC2
            self.agents = {}
            self.enemies = {}

            ally_units = [
                unit for unit in self._obs.observation.raw_data.units
                if (unit.owner == 1) and (unit.tag not in old_unit_tags)
            ]
            ally_units_sorted = sorted(
                ally_units,
                key=attrgetter("unit_type", "pos.x", "pos.y"),
                reverse=False,
            )

            for i in range(len(ally_units_sorted)):
                self.agents[i] = ally_units_sorted[i]

            self.max_reward = self.n_enemies * self.reward_death_value + self.reward_win
            for unit in self._obs.observation.raw_data.units:
                if (unit.owner == 2) and (unit.tag not in old_unit_tags):
                    self.enemies[len(self.enemies)] = unit
                    # if self._episode_count == 0:
                    self.max_reward += unit.health_max + unit.shield_max

            all_agents_created = (len(self.agents) == self.n_agents)
            all_enemies_created = (len(self.enemies) == self.n_enemies)

            all_agents_health = all(u.health > 0 for u in self.agents.values())
            all_enemies_health = all(u.health > 0 for u in self.enemies.values())

            if all_agents_created and all_enemies_created \
                    and all_agents_health and all_enemies_health:  # all good
                if self._episode_count == 0:
                    min_unit_type = min(unit.unit_type for unit in self.agents.values())
                    min_unit_type_opponent = min(unit.unit_type for unit in self.enemies.values())
                    self._init_ally_unit_types(min_unit_type)
                    self._init_enemy_unit_types(min_unit_type_opponent)
                return True
            else:
                print(
                    "***ALL GOOD FAIL***", all_agents_created, all_enemies_created, all_agents_health,
                    all_enemies_health, len(self._obs.observation.raw_data.units)
                )
                print(
                    (len(self.agents) == self.n_agents), (len(self.enemies) == self.n_enemies), len(self.agents),
                    self.n_agents, len(self.enemies), self.n_enemies
                )
                self._restart_episode()
                count += 1

            try:
                self._parallel.run((c.step, 1) for c in self._controllers)
                self._update_obs()

            except (protocol.ProtocolError, protocol.ConnectionError) as e:
                print("Error happen in init_units.", e)
                self.full_restart()
                return False
        if count >= 10:
            self.full_restart()
            return False

    def _init_enemy_unit_types(self, min_unit_type_opponent):
        """Initialise ally unit types. Should be called once from the
        init_units function.
        """
        self._min_unit_type_opponent = min_unit_type_opponent
        if self.map_type == "marines":
            self.marine_id_opponent = min_unit_type_opponent
        elif self.map_type == "stalkers_and_zealots":
            self.stalker_id_opponent = min_unit_type_opponent
            self.zealot_id_opponent = min_unit_type_opponent + 1
        elif self.map_type == "colossi_stalkers_zealots":
            self.colossus_id_opponent = min_unit_type_opponent
            self.stalker_id_opponent = min_unit_type_opponent + 1
            self.zealot_id_opponent = min_unit_type_opponent + 2
        elif self.map_type == "MMM":
            self.marauder_id_opponent = min_unit_type_opponent
            self.marine_id_opponent = min_unit_type_opponent + 1
            self.medivac_id_opponent = min_unit_type_opponent + 2
        elif self.map_type == "zealots":
            self.zealot_id_opponent = min_unit_type_opponent
        elif self.map_type == "hydralisks":
            self.hydralisk_id_opponent = min_unit_type_opponent
        elif self.map_type == "stalkers":
            self.stalker_id_opponent = min_unit_type_opponent
        elif self.map_type == "colossus":
            self.colossus_id_opponent = min_unit_type_opponent
        elif self.map_type == "bane":
            self.baneling_id_opponent = min_unit_type_opponent
            self.zergling_id_opponent = min_unit_type_opponent + 1

    # ================
    def unit_max_shield(self, unit, is_opponent=False):
        """Returns maximal shield for a given unit."""
        stalker_id = self.stalker_id_opponent if is_opponent else self.stalker_id
        zealot_id = self.zealot_id_opponent if is_opponent else self.zealot_id
        colossus_id = self.colossus_id_opponent if is_opponent else self.colossus_id
        if unit.unit_type == 74 or unit.unit_type == stalker_id:
            return 80  # Protoss's Stalker
        if unit.unit_type == 73 or unit.unit_type == zealot_id:
            return 50  # Protoss's Zaelot
        if unit.unit_type == 4 or unit.unit_type == colossus_id:
            return 150  # Protoss's Colossus

    def get_unit_type_id(self, unit, ally, is_opponent=False):
        if is_opponent and ally:
            return unit.unit_type - self._min_unit_type_opponent
        else:
            if ally:  # use new SC2 unit types
                if self.map_type == "infestor_viper":
                    if unit.unit_type == 393:
                        type_id = 0
                    else:
                        type_id = 1
                else:
                    type_id = unit.unit_type - self._min_unit_type
            else:  # use default SC2 unit types
                if self.map_type == "stalkers_and_zealots":
                    # id(Stalker) = 74, id(Zealot) = 73
                    type_id = unit.unit_type - 73
                elif self.map_type == "colossi_stalkers_zealots":
                    # id(Stalker) = 74, id(Zealot) = 73, id(Colossus) = 4
                    if unit.unit_type == 4:
                        type_id = 0
                    elif unit.unit_type == 74:
                        type_id = 1
                    else:
                        type_id = 2
                elif self.map_type == "bane":
                    if unit.unit_type == 9:
                        type_id = 0
                    else:
                        type_id = 1
                elif self.map_type == "MMM":
                    if unit.unit_type == 51:
                        type_id = 0
                    elif unit.unit_type == 48:
                        type_id = 1
                    else:
                        type_id = 2
                elif self.map_type == "infestor_viper":
                    if unit.unit_type == 393:
                        type_id = 0
                    else:
                        type_id = 1
                else:
                    raise ValueError()
            return type_id

    def _update_obs(self, target_game_loop=0):
        # Transform in the thread so it runs while waiting for other observations.
        # def parallel_observe(c, f):

        if self.two_player:

            def parallel_observe(c):
                obs = c.observe(target_game_loop=target_game_loop)
                # agent_obs = f.transform_obs(obs)
                return obs

            # with self._metrics.measure_observation_time():
            self._obses = self._parallel.run((parallel_observe, c) for c in self._controllers)
        else:
            self._obses = [self._controllers[0].observe()]

        self._obs = self._obses[0]

    def _init_ally_unit_types(self, min_unit_type):
        """Initialise ally unit types. Should be called once from the
        init_units function.
        """
        self._min_unit_type = min_unit_type
        if self.map_type == "marines":
            self.marine_id = min_unit_type
        elif self.map_type == "stalkers_and_zealots":
            self.stalker_id = min_unit_type
            self.zealot_id = min_unit_type + 1
        elif self.map_type == "colossi_stalkers_zealots":
            self.colossus_id = min_unit_type
            self.stalker_id = min_unit_type + 1
            self.zealot_id = min_unit_type + 2
        elif self.map_type == "MMM":
            self.marauder_id = min_unit_type
            self.marine_id = min_unit_type + 1
            self.medivac_id = min_unit_type + 2
        elif self.map_type == "zealots":
            self.zealot_id = min_unit_type
        elif self.map_type == "hydralisks":
            self.hydralisk_id = min_unit_type
        elif self.map_type == "stalkers":
            self.stalker_id = min_unit_type
        elif self.map_type == "colossus":
            self.colossus_id = min_unit_type
        elif self.map_type == "bane":
            self.baneling_id = min_unit_type
            self.zergling_id = min_unit_type + 1

    def get_obs(self, is_opponent=False):
        """Returns all agent observations in a list.
        NOTE: Agents should have access only to their local observations
        during decentralised execution.
        """
        agents_obs_list = [self.get_obs_agent(i, is_opponent) for i in range(self.n_agents)]

        if self.mirror_opponent and is_opponent:
            assert not self.flatten_observation
            new_obs = list()
            for agent_obs in agents_obs_list:
                new_agent_obs = dict()
                for key, feat in agent_obs.items():
                    feat = feat.copy()

                    if key == "move_feats":
                        can_move_right = feat[2]
                        can_move_left = feat[3]
                        feat[3] = can_move_right
                        feat[2] = can_move_left

                    elif key == "enemy_feats" or key == "ally_feats":
                        for unit_id in range(feat.shape[0]):
                            # Relative x
                            feat[unit_id, 2] = -feat[unit_id, 2]

                    new_agent_obs[key] = feat
                new_obs.append(new_agent_obs)
            agents_obs_list = new_obs

        if not self.flatten_observation:
            agents_obs_list = self._flatten_obs(agents_obs_list)
        if self.obs_alone:
            agents_obs_list, agents_obs_alone_list, agents_obs_alone_padding_list = list(zip(*agents_obs_list))
            return np.array(agents_obs_list).astype(np.float32), np.array(agents_obs_alone_list).astype(
                np.float32
            ), np.array(agents_obs_alone_padding_list).astype(np.float32)
        else:
            return np.array(agents_obs_list).astype(np.float32)

    def get_obs_agent(self, agent_id, is_opponent=False):
        unit = self.get_unit_by_id(agent_id, is_opponent=is_opponent)

        # TODO All these function should have an opponent version
        enemy_feats_dim = self.get_obs_enemy_feats_size()
        ally_feats_dim = self.get_obs_ally_feats_size()
        own_feats_dim = self.get_obs_own_feats_size()

        enemy_feats = np.zeros(enemy_feats_dim, dtype=np.float32)
        ally_feats = np.zeros(ally_feats_dim, dtype=np.float32)
        own_feats = np.zeros(own_feats_dim, dtype=np.float32)

        move_feats = self.action_helper.get_movement_features(agent_id, self, is_opponent)

        if unit.health > 0:  # otherwise dead, return all zeros
            x = unit.pos.x
            y = unit.pos.y
            sight_range = self.unit_sight_range(agent_id)
            avail_actions = self.action_helper.get_avail_agent_actions(agent_id, self, is_opponent)

            # Enemy features
            if is_opponent:
                enemy_items = self.agents.items()
            else:
                enemy_items = self.enemies.items()
            for e_id, e_unit in enemy_items:
                e_x = e_unit.pos.x
                e_y = e_unit.pos.y
                dist = distance(x, y, e_x, e_y)

                if (dist < sight_range and e_unit.health > 0):  # visible and alive
                    # Sight range > shoot range
                    enemy_feats[e_id, 0] = avail_actions[self.action_helper.n_actions_no_attack + e_id]  # available
                    enemy_feats[e_id, 1] = dist / sight_range  # distance
                    enemy_feats[e_id, 2] = (e_x - x) / sight_range  # relative X
                    enemy_feats[e_id, 3] = (e_y - y) / sight_range  # relative Y

                    ind = 4
                    if self.obs_all_health:
                        enemy_feats[e_id, ind] = (e_unit.health / e_unit.health_max)  # health
                        ind += 1
                        if self.shield_bits_enemy > 0:
                            max_shield = self.unit_max_shield(e_unit, not is_opponent)
                            enemy_feats[e_id, ind] = (e_unit.shield / max_shield)  # shield
                            ind += 1

                    if self.unit_type_bits > 0:
                        # If enemy is computer, than use ally=False, but since now we use
                        #  agent for enemy, ally=True
                        if self.two_player:
                            type_id = self.get_unit_type_id(e_unit, True, not is_opponent)
                        else:
                            type_id = self.get_unit_type_id(e_unit, False, False)
                        enemy_feats[e_id, ind + type_id] = 1  # unit type

            # Ally features
            al_ids = [
                al_id for al_id in range((self.n_agents if not is_opponent else self.n_enemies)) if al_id != agent_id
            ]
            for i, al_id in enumerate(al_ids):

                al_unit = self.get_unit_by_id(al_id, is_opponent=is_opponent)
                al_x = al_unit.pos.x
                al_y = al_unit.pos.y
                dist = distance(x, y, al_x, al_y)

                if (dist < sight_range and al_unit.health > 0):  # visible and alive
                    ally_feats[i, 0] = 1  # visible
                    ally_feats[i, 1] = dist / sight_range  # distance
                    ally_feats[i, 2] = (al_x - x) / sight_range  # relative X
                    ally_feats[i, 3] = (al_y - y) / sight_range  # relative Y

                    ind = 4
                    if self.obs_all_health:
                        ally_feats[i, ind] = (al_unit.health / al_unit.health_max)  # health
                        ind += 1
                        if self.shield_bits_ally > 0:
                            max_shield = self.unit_max_shield(al_unit, is_opponent)
                            ally_feats[i, ind] = (al_unit.shield / max_shield)  # shield
                            ind += 1

                    if self.unit_type_bits > 0:
                        type_id = self.get_unit_type_id(al_unit, True, is_opponent)
                        ally_feats[i, ind + type_id] = 1
                        ind += self.unit_type_bits

                    # LJ fix
                    # if self.obs_last_action:
                    #     ally_feats[i, ind:] = self.action_helper.get_last_action(is_opponent)[al_id]

            # Own features
            ind = 0
            if self.obs_own_health:
                own_feats[ind] = unit.health / unit.health_max
                ind += 1
                if self.shield_bits_ally > 0:
                    max_shield = self.unit_max_shield(unit, is_opponent)
                    own_feats[ind] = unit.shield / max_shield
                    ind += 1

            if self.unit_type_bits > 0:
                type_id = self.get_unit_type_id(unit, True, is_opponent)
                own_feats[ind + type_id] = 1
                ind += self.unit_type_bits
            if self.obs_last_action:
                own_feats[ind:] = self.action_helper.get_last_action(is_opponent)[agent_id]

        if is_opponent:
            agent_id_feats = np.zeros(self.n_enemies)
        else:
            agent_id_feats = np.zeros(self.n_agents)
        agent_id_feats[agent_id] = 1
        # Only set to false by outside wrapper
        if self.flatten_observation:
            agent_obs = np.concatenate(
                (
                    move_feats.flatten(),
                    enemy_feats.flatten(),
                    ally_feats.flatten(),
                    own_feats.flatten(),
                    agent_id_feats,
                )
            )
            if self.obs_timestep_number:
                agent_obs = np.append(agent_obs, self._episode_steps / self.episode_limit)
            if self.obs_alone:
                agent_obs_alone = np.concatenate(
                    (
                        move_feats.flatten(),
                        enemy_feats.flatten(),
                        own_feats.flatten(),
                        agent_id_feats,
                    )
                )
                agent_obs_alone_padding = np.concatenate(
                    (
                        move_feats.flatten(),
                        enemy_feats.flatten(),
                        np.zeros_like(ally_feats.flatten()),
                        own_feats.flatten(),
                        agent_id_feats,
                    )
                )
                if self.obs_timestep_number:
                    agent_obs_alone = np.append(agent_obs_alone, self._episode_steps / self.episode_limit)
                    agent_obs_alone_padding = np.append(
                        agent_obs_alone_padding, self._episode_steps / self.episode_limit
                    )
                return agent_obs, agent_obs_alone, agent_obs_alone_padding
            else:
                return agent_obs
        else:
            agent_obs = dict(
                move_feats=move_feats,
                enemy_feats=enemy_feats,
                ally_feats=ally_feats,
                own_feats=own_feats,
                agent_id_feats=agent_id_feats
            )
            if self.obs_timestep_number:
                agent_obs["obs_timestep_number"] = self._episode_steps / self.episode_limit

        return agent_obs

    def get_unit_by_id(self, a_id, is_opponent=False):
        """Get unit by ID."""
        if is_opponent:
            return self.enemies[a_id]
        return self.agents[a_id]

    def get_obs_enemy_feats_size(self):
        """ Returns the dimensions of the matrix containing enemy features.
        Size is n_enemies x n_features.
        """
        nf_en = 4 + self.unit_type_bits

        if self.obs_all_health:
            nf_en += 1 + self.shield_bits_enemy

        return self.n_enemies, nf_en

    def get_obs_ally_feats_size(self):
        """Returns the dimensions of the matrix containing ally features.
        Size is n_allies x n_features.
        """
        nf_al = 4 + self.unit_type_bits

        if self.obs_all_health:
            nf_al += 1 + self.shield_bits_ally

        # LJ fix
        # if self.obs_last_action:
        #     nf_al += self.n_actions

        return self.n_agents - 1, nf_al

    def get_obs_own_feats_size(self):
        """Returns the size of the vector containing the agents' own features.
        """
        own_feats = self.unit_type_bits
        if self.obs_own_health:
            own_feats += 1 + self.shield_bits_ally
        if self.obs_timestep_number:
            own_feats += 1
        if self.obs_last_action:
            own_feats += self.n_actions

        return own_feats

    def get_obs_move_feats_size(self):
        """Returns the size of the vector containing the agents's movement-related features."""
        return self.action_helper.get_obs_move_feats_size()

    def get_state_size(self, is_opponent=False):
        """Returns the size of the global state."""
        if self.obs_instead_of_state:
            return self.get_obs_size(is_opponent) * self.n_agents

        nf_al = 4 + self.shield_bits_ally + self.unit_type_bits
        nf_en = 3 + self.shield_bits_enemy + self.unit_type_bits

        enemy_state = self.n_enemies * nf_en
        ally_state = self.n_agents * nf_al

        size = enemy_state + ally_state

        if self.state_last_action:
            if is_opponent:
                size += self.n_enemies * self.n_actions_opponent
            else:
                size += self.n_agents * self.n_actions
        if self.state_timestep_number:
            size += 1

        return size

    def get_obs_size(self, is_opponent=False):
        # TODO suppose the agents formation are same for both opponent and me. This can be extended in future.
        """Returns the size of the observation."""
        own_feats = self.get_obs_own_feats_size()
        move_feats = self.get_obs_move_feats_size()

        n_enemies, n_enemy_feats = self.get_obs_enemy_feats_size()
        n_allies, n_ally_feats = self.get_obs_ally_feats_size()

        enemy_feats = n_enemies * n_enemy_feats
        ally_feats = n_allies * n_ally_feats

        if is_opponent:
            agent_id_feats = self.n_enemies
        else:
            agent_id_feats = self.n_agents
        return move_feats + enemy_feats + ally_feats + own_feats + agent_id_feats

    def get_obs_alone_size(self, is_opponent=False):
        # TODO suppose the agents formation are same for both opponent and me. This can be extended in future.
        """Returns the size of the observation."""
        own_feats = self.get_obs_own_feats_size()
        move_feats = self.get_obs_move_feats_size()

        n_enemies, n_enemy_feats = self.get_obs_enemy_feats_size()

        enemy_feats = n_enemies * n_enemy_feats

        if is_opponent:
            agent_id_feats = self.n_enemies
        else:
            agent_id_feats = self.n_agents
        return move_feats + enemy_feats + own_feats + agent_id_feats

    def get_state(self, is_opponent=False):
        if self.obs_instead_of_state:
            obs_concat = np.concatenate(self.get_obs(), axis=0).astype(np.float32)
            return obs_concat

        nf_al = 4 + self.shield_bits_ally + self.unit_type_bits
        nf_en = 3 + self.shield_bits_enemy + self.unit_type_bits

        ally_state = np.zeros((self.n_agents, nf_al))
        enemy_state = np.zeros((self.n_enemies, nf_en))

        center_x = self.map_x / 2
        center_y = self.map_y / 2

        if is_opponent:
            iterator = self.enemies.items()
        else:
            iterator = self.agents.items()

        for al_id, al_unit in iterator:
            if al_unit.health > 0:
                x = al_unit.pos.x
                y = al_unit.pos.y
                max_cd = self.unit_max_cooldown(al_unit, is_opponent=is_opponent)

                ally_state[al_id, 0] = (al_unit.health / al_unit.health_max)  # health
                if (self.map_type == "MMM"
                        and al_unit.unit_type == (self.medivac_id_opponent if is_opponent else self.medivac_id)):
                    ally_state[al_id, 1] = al_unit.energy / max_cd  # energy
                else:
                    ally_state[al_id, 1] = (al_unit.weapon_cooldown / max_cd)  # cooldown
                ally_state[al_id, 2] = (x - center_x) / self.max_distance_x  # relative X
                ally_state[al_id, 3] = (y - center_y) / self.max_distance_y  # relative Y

                ind = 4
                if self.shield_bits_ally > 0:
                    max_shield = self.unit_max_shield(al_unit, is_opponent=is_opponent)
                    ally_state[al_id, ind] = (al_unit.shield / max_shield)  # shield
                    ind += 1

                if self.unit_type_bits > 0:
                    type_id = self.get_unit_type_id(al_unit, True, is_opponent=is_opponent)
                    ally_state[al_id, ind + type_id] = 1

        if is_opponent:
            iterator = self.agents.items()
        else:
            iterator = self.enemies.items()
        for e_id, e_unit in iterator:
            if e_unit.health > 0:
                x = e_unit.pos.x
                y = e_unit.pos.y

                enemy_state[e_id, 0] = (e_unit.health / e_unit.health_max)  # health
                enemy_state[e_id, 1] = (x - center_x) / self.max_distance_x  # relative X
                enemy_state[e_id, 2] = (y - center_y) / self.max_distance_y  # relative Y

                ind = 3
                if self.shield_bits_enemy > 0:
                    max_shield = self.unit_max_shield(e_unit, is_opponent=False)
                    enemy_state[e_id, ind] = (e_unit.shield / max_shield)  # shield
                    ind += 1

                if self.unit_type_bits > 0:
                    type_id = self.get_unit_type_id(e_unit, True if self.two_player else False, is_opponent=False)
                    enemy_state[e_id, ind + type_id] = 1

        last_action = self.action_helper.get_last_action(is_opponent)
        if self.flatten_observation:
            state = np.append(ally_state.flatten(), enemy_state.flatten())
            if self.state_last_action:
                state = np.append(state, last_action.flatten())
            if self.state_timestep_number:
                state = np.append(state, self._episode_steps / self.episode_limit)
            state = state.astype(dtype=np.float32)
        else:
            state = dict(ally_state=ally_state, enemy_state=enemy_state)
            if self.state_last_action:
                state["last_action"] = last_action
            if self.state_timestep_number:
                state["state_timestep_number"] = self._episode_steps / self.episode_limit

        if self.mirror_opponent and is_opponent:
            assert not self.flatten_observation

            new_state = dict()
            for key, s in state.items():
                s = s.copy()

                if key == "ally_state":
                    # relative x
                    for unit_id in range(s.shape[0]):
                        s[unit_id, 2] = -s[unit_id, 2]

                elif key == "enemy_state":
                    # relative x
                    for unit_id in range(s.shape[0]):
                        s[unit_id, 1] = -s[unit_id, 1]

                # key == "last_action" is processed in SMACAction
                new_state[key] = s
            state = new_state

        if not self.flatten_observation:
            state = self._flatten_state(state)
        return np.array(state).astype(np.float32)

    def get_global_special_state(self, is_opponent=False):
        """Returns all agent observations in a list.
        NOTE: Agents should have access only to their local observations
        during decentralised execution.
        """
        agents_obs_list = [self.get_state_agent(i, is_opponent) for i in range(self.n_agents)]

        return np.array(agents_obs_list).astype(np.float32)

    def get_global_special_state_size(self, is_opponent=False):
        enemy_feats_dim = self.get_state_enemy_feats_size()
        enemy_feats_dim = reduce(lambda x, y: x * y, enemy_feats_dim)
        ally_feats_dim = self.get_state_ally_feats_size()
        ally_feats_dim = reduce(lambda x, y: x * y, ally_feats_dim)
        own_feats_dim = self.get_state_own_feats_size()
        size = enemy_feats_dim + ally_feats_dim + own_feats_dim + self.n_agents
        if self.state_timestep_number:
            size += 1
        return size

    def get_state_agent(self, agent_id, is_opponent=False):
        """Returns observation for agent_id. The observation is composed of:

           - agent movement features (where it can move to, height information and pathing grid)
           - enemy features (available_to_attack, health, relative_x, relative_y, shield, unit_type)
           - ally features (visible, distance, relative_x, relative_y, shield, unit_type)
           - agent unit features (health, shield, unit_type)

           All of this information is flattened and concatenated into a list,
           in the aforementioned order. To know the sizes of each of the
           features inside the final list of features, take a look at the
           functions ``get_obs_move_feats_size()``,
           ``get_obs_enemy_feats_size()``, ``get_obs_ally_feats_size()`` and
           ``get_obs_own_feats_size()``.

           The size of the observation vector may vary, depending on the
           environment configuration and type of units present in the map.
           For instance, non-Protoss units will not have shields, movement
           features may or may not include terrain height and pathing grid,
           unit_type is not included if there is only one type of unit in the
           map etc.).

           NOTE: Agents should have access only to their local observations
           during decentralised execution.
        """
        if self.obs_instead_of_state:
            obs_concat = np.concatenate(self.get_obs(), axis=0).astype(np.float32)
            return obs_concat

        unit = self.get_unit_by_id(agent_id)

        enemy_feats_dim = self.get_state_enemy_feats_size()
        ally_feats_dim = self.get_state_ally_feats_size()
        own_feats_dim = self.get_state_own_feats_size()

        enemy_feats = np.zeros(enemy_feats_dim, dtype=np.float32)
        ally_feats = np.zeros(ally_feats_dim, dtype=np.float32)
        own_feats = np.zeros(own_feats_dim, dtype=np.float32)
        agent_id_feats = np.zeros(self.n_agents, dtype=np.float32)

        center_x = self.map_x / 2
        center_y = self.map_y / 2

        if (self.death_mask and unit.health > 0) or (not self.death_mask):  # otherwise dead, return all zeros
            x = unit.pos.x
            y = unit.pos.y
            sight_range = self.unit_sight_range(agent_id)
            last_action = self.action_helper.get_last_action(is_opponent)

            # Movement features
            avail_actions = self.get_avail_agent_actions(agent_id)

            # Enemy features
            for e_id, e_unit in self.enemies.items():
                e_x = e_unit.pos.x
                e_y = e_unit.pos.y
                dist = self.distance(x, y, e_x, e_y)

                if e_unit.health > 0:  # visible and alive
                    # Sight range > shoot range
                    if unit.health > 0:
                        enemy_feats[e_id, 0] = avail_actions[self.action_helper.n_actions_no_attack + e_id]  # available
                        enemy_feats[e_id, 1] = dist / sight_range  # distance
                        enemy_feats[e_id, 2] = (e_x - x) / sight_range  # relative X
                        enemy_feats[e_id, 3] = (e_y - y) / sight_range  # relative Y
                        if dist < sight_range:
                            enemy_feats[e_id, 4] = 1  # visible

                    ind = 5
                    if self.obs_all_health:
                        enemy_feats[e_id, ind] = (e_unit.health / e_unit.health_max)  # health
                        ind += 1
                        if self.shield_bits_enemy > 0:
                            max_shield = self.unit_max_shield(e_unit)
                            enemy_feats[e_id, ind] = (e_unit.shield / max_shield)  # shield
                            ind += 1

                    if self.unit_type_bits > 0:
                        type_id = self.get_unit_type_id(e_unit, False)
                        enemy_feats[e_id, ind + type_id] = 1  # unit type
                        ind += self.unit_type_bits

                    if self.add_center_xy:
                        enemy_feats[e_id, ind] = (e_x - center_x) / self.max_distance_x  # center X
                        enemy_feats[e_id, ind + 1] = (e_y - center_y) / self.max_distance_y  # center Y

            # Ally features
            al_ids = [al_id for al_id in range(self.n_agents) if al_id != agent_id]
            for i, al_id in enumerate(al_ids):

                al_unit = self.get_unit_by_id(al_id)
                al_x = al_unit.pos.x
                al_y = al_unit.pos.y
                dist = self.distance(x, y, al_x, al_y)
                max_cd = self.unit_max_cooldown(al_unit)

                if al_unit.health > 0:  # visible and alive
                    if unit.health > 0:
                        if dist < sight_range:
                            ally_feats[i, 0] = 1  # visible
                        ally_feats[i, 1] = dist / sight_range  # distance
                        ally_feats[i, 2] = (al_x - x) / sight_range  # relative X
                        ally_feats[i, 3] = (al_y - y) / sight_range  # relative Y

                    if (self.map_type == "MMM" and al_unit.unit_type == self.medivac_id):
                        ally_feats[i, 4] = al_unit.energy / max_cd  # energy
                    else:
                        ally_feats[i, 4] = (al_unit.weapon_cooldown / max_cd)  # cooldown

                    ind = 5
                    if self.obs_all_health:
                        ally_feats[i, ind] = (al_unit.health / al_unit.health_max)  # health
                        ind += 1
                        if self.shield_bits_ally > 0:
                            max_shield = self.unit_max_shield(al_unit)
                            ally_feats[i, ind] = (al_unit.shield / max_shield)  # shield
                            ind += 1

                    if self.add_center_xy:
                        ally_feats[i, ind] = (al_x - center_x) / self.max_distance_x  # center X
                        ally_feats[i, ind + 1] = (al_y - center_y) / self.max_distance_y  # center Y
                        ind += 2

                    if self.unit_type_bits > 0:
                        type_id = self.get_unit_type_id(al_unit, True)
                        ally_feats[i, ind + type_id] = 1
                        ind += self.unit_type_bits

                    if self.state_last_action:
                        ally_feats[i, ind:] = last_action[al_id]

            # Own features
            ind = 0
            own_feats[0] = 1  # visible
            own_feats[1] = 0  # distance
            own_feats[2] = 0  # X
            own_feats[3] = 0  # Y
            ind = 4
            if self.obs_own_health:
                own_feats[ind] = unit.health / unit.health_max
                ind += 1
                if self.shield_bits_ally > 0:
                    max_shield = self.unit_max_shield(unit)
                    own_feats[ind] = unit.shield / max_shield
                    ind += 1

            if self.add_center_xy:
                own_feats[ind] = (x - center_x) / self.max_distance_x  # center X
                own_feats[ind + 1] = (y - center_y) / self.max_distance_y  # center Y
                ind += 2

            if self.unit_type_bits > 0:
                type_id = self.get_unit_type_id(unit, True)
                own_feats[ind + type_id] = 1
                ind += self.unit_type_bits

            if self.state_last_action:
                own_feats[ind:] = last_action[agent_id]

        state = np.concatenate((ally_feats.flatten(), enemy_feats.flatten(), own_feats.flatten()))

        # Agent id features
        if self.state_agent_id:
            agent_id_feats[agent_id] = 1.
            state = np.append(state, agent_id_feats.flatten())

        if self.state_timestep_number:
            state = np.append(state, self._episode_steps / self.episode_limit)

        return state

    def get_state_enemy_feats_size(self):
        """ Returns the dimensions of the matrix containing enemy features.
        Size is n_enemies x n_features.
        """
        nf_en = 5 + self.unit_type_bits

        if self.obs_all_health:
            nf_en += 1 + self.shield_bits_enemy

        if self.add_center_xy:
            nf_en += 2

        return self.n_enemies, nf_en

    def get_state_ally_feats_size(self):
        """Returns the dimensions of the matrix containing ally features.
        Size is n_allies x n_features.
        """
        nf_al = 5 + self.unit_type_bits

        if self.obs_all_health:
            nf_al += 1 + self.shield_bits_ally

        if self.state_last_action:
            nf_al += self.n_actions

        if self.add_center_xy:
            nf_al += 2

        return self.n_agents - 1, nf_al

    def get_state_own_feats_size(self):
        """Returns the size of the vector containing the agents' own features.
        """
        own_feats = 4 + self.unit_type_bits
        if self.obs_own_health:
            own_feats += 1 + self.shield_bits_ally

        if self.state_last_action:
            own_feats += self.n_actions

        if self.add_center_xy:
            own_feats += 2

        return own_feats

    @staticmethod
    def distance(x1, y1, x2, y2):
        """Distance between two points."""
        return math.hypot(x2 - x1, y2 - y1)

    def unit_max_cooldown(self, unit, is_opponent=False):
        """Returns the maximal cooldown for a unit."""
        if is_opponent:
            switcher = {
                self.marine_id_opponent: 15,
                self.marauder_id_opponent: 25,
                self.medivac_id_opponent: 200,  # max energy
                self.stalker_id_opponent: 35,
                self.zealot_id_opponent: 22,
                self.colossus_id_opponent: 24,
                self.hydralisk_id_opponent: 10,
                self.zergling_id_opponent: 11,
                self.baneling_id_opponent: 1
            }
        else:
            switcher = {
                self.marine_id: 15,
                self.marauder_id: 25,
                self.medivac_id: 200,  # max energy
                self.stalker_id: 35,
                self.zealot_id: 22,
                self.colossus_id: 24,
                self.hydralisk_id: 10,
                self.zergling_id: 11,
                self.baneling_id: 1
            }
        return switcher.get(unit.unit_type, 15)

    def update_units(self):
        """Update units after an environment step.
        This function assumes that self._obs is up-to-date.
        """
        n_ally_alive = 0
        n_enemy_alive = 0

        # Store previous state
        self.previous_ally_units = copy.deepcopy(self.agents)
        self.previous_enemy_units = copy.deepcopy(self.enemies)

        for al_id, al_unit in self.agents.items():
            updated = False
            for unit in self._obs.observation.raw_data.units:
                if al_unit.tag == unit.tag:
                    self.agents[al_id] = unit
                    updated = True
                    n_ally_alive += 1
                    break

            if not updated:  # dead
                al_unit.health = 0

        for e_id, e_unit in self.enemies.items():
            updated = False
            for unit in self._obs.observation.raw_data.units:
                if e_unit.tag == unit.tag:
                    self.enemies[e_id] = unit
                    updated = True
                    n_enemy_alive += 1
                    break

            if not updated:  # dead
                e_unit.health = 0

        if (n_ally_alive == 0 and n_enemy_alive > 0 or self.only_medivac_left(ally=True)):
            return -1  # lost
        if (n_ally_alive > 0 and n_enemy_alive == 0 or self.only_medivac_left(ally=False)):
            return 1  # won
        if n_ally_alive == 0 and n_enemy_alive == 0:
            return 0

        return None

    def only_medivac_left(self, ally):
        """Check if only Medivac units are left."""
        if self.map_type != "MMM":
            return False

        if ally:
            units_alive = [
                a for a in self.agents.values()
                if (a.health > 0 and a.unit_type != self.medivac_id and a.unit_type != self.medivac_id_opponent
                    )  # <<== add medivac_id_opponent
            ]
            if len(units_alive) == 0:
                return True
            return False
        else:
            units_alive = [
                a for a in self.enemies.values()
                if (a.health > 0 and a.unit_type != self.medivac_id and a.unit_type != self.medivac_id_opponent)
            ]
            if len(units_alive) == 1 and units_alive[0].unit_type == 54:
                return True
            return False

    @property
    def n_actions(self):
        return self.action_helper.n_actions

    @property
    def n_actions_opponent(self):
        return self.n_actions

    # Workaround
    def get_avail_agent_actions(self, agent_id, is_opponent=False):
        return self.action_helper.get_avail_agent_actions(agent_id, self, is_opponent)

    def unit_sight_range(self, agent_id=None):
        """Returns the sight range for an agent."""
        return 9

    @staticmethod
    def _flatten_obs(obs):

        def _get_keys(agent_obs):
            keys = ["move_feats", "enemy_feats", "ally_feats", "own_feats", "agent_id_feats"]
            if "obs_timestep_number" in agent_obs:
                keys.append("obs_timestep_number")
            return keys

        return _flatten(obs, _get_keys)

    @staticmethod
    def _flatten_state(state):

        def _get_keys(s):
            keys = ["ally_state", "enemy_state"]
            if "last_action" in s:
                keys.append("last_action")
            if "state_timestep_number" in s:
                keys.append("state_timestep_number")
            return keys

        return _flatten([state], _get_keys)[0]

    def get_avail_actions(self, is_opponent=False):
        ava_action = self.action_helper.get_avail_actions(self, is_opponent)
        ava_action = np.array(ava_action).astype(np.float32)
        return ava_action

    def get_obs_space(self, is_opponent=False):
        T = EnvElementInfo
        agent_num = self.n_enemies if is_opponent else self.n_agents
        if self.obs_alone:
            obs_space = T(
                {
                    'agent_state': (agent_num, self.get_obs_size(is_opponent)),
                    'agent_alone_state': (agent_num, self.get_obs_alone_size(is_opponent)),
                    'agent_alone_padding_state': (agent_num, self.get_obs_size(is_opponent)),
                    'global_state': (self.get_state_size(is_opponent), ),
                    'action_mask': (agent_num, *self.action_helper.info().shape),
                },
                None,
            )
        else:
            if self.special_global_state:
                obs_space = T(
                    {
                        'agent_state': (agent_num, self.get_obs_size(is_opponent)),
                        'global_state': (agent_num, self.get_global_special_state_size(is_opponent)),
                        'action_mask': (agent_num, *self.action_helper.info().shape),
                    },
                    None,
                )
            else:
                obs_space = T(
                    {
                        'agent_state': (agent_num, self.get_obs_size(is_opponent)),
                        'global_state': (self.get_state_size(is_opponent), ),
                        'action_mask': (agent_num, *self.action_helper.info().shape),
                    },
                    None,
                )
        return obs_space

    @property
    def observation_space(self):
        return self._observation_space

    @property
    def action_space(self):
        return self._action_space

    @property
    def reward_space(self):
        return self._reward_space

    def __repr__(self):
        return "DI-engine SMAC Env"


def _flatten(obs, get_keys):
    new_obs = list()
    for agent_obs in obs:
        keys = get_keys(agent_obs)
        new_agent_obs = np.concatenate([agent_obs[feat_key].flatten() for feat_key in keys])
        new_obs.append(new_agent_obs)
    return new_obs


SMACTimestep = SMACEnv.SMACTimestep
SMACEnvInfo = SMACEnv.SMACEnvInfo