Spaces:

zjowowen
/

gomoku

Sleeping

File size: 7,249 Bytes

079c32c

from minigrid.core.grid import Grid
from minigrid.core.mission import MissionSpace
from minigrid.minigrid_env import *
from minigrid.utils.rendering import *
from minigrid.core.world_object import WorldObj
import random


class NoisyTVEnv(MiniGridEnv):
    """
    ### Description

    Classic four room reinforcement learning environment with random noise. The agent must
    navigate in a maze composed of four rooms interconnected by 4 gaps in the
    walls. To obtain a reward, the agent must reach the green goal square. Both
    the agent and the goal square are randomly placed in any of the four rooms.

    ### Mission Space

    "reach the goal"

    ### Action Space

    | Num | Name         | Action       |
    |-----|--------------|--------------|
    | 0   | left         | Turn left    |
    | 1   | right        | Turn right   |
    | 2   | forward      | Move forward |
    | 3   | pickup       | Unused       |
    | 4   | drop         | Unused       |
    | 5   | toggle       | Unused       |
    | 6   | done         | Unused       |

    ### Observation Encoding

    - Each tile is encoded as a 3 dimensional tuple:
        `(OBJECT_IDX, COLOR_IDX, STATE)`
    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
        [minigrid/minigrid.py](minigrid/minigrid.py)
    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked

    ### Rewards

    A reward of '1' is given for success, and '0' for failure.
    Noisy reward are given upon reaching a noisy tile. Noise obeys Gaussian distribution.

    ### Termination

    The episode ends if any one of the following conditions is met:

    1. The agent reaches the goal.
    2. Timeout (see `max_steps`).

    ### Registered Configurations

    - `MiniGrid-NoisyTV-v0`

    """

    def __init__(self, agent_pos=None, goal_pos=None, noisy_tile_num=4, **kwargs):
        self._agent_default_pos = agent_pos
        self._goal_default_pos = goal_pos
        self.size = 19
        self._noisy_tile_num = noisy_tile_num
        self._noisy_tile_pos = []
        for i in range(self._noisy_tile_num):
            pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1))
            while pos2 in self._noisy_tile_pos:
                pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1))
            self._noisy_tile_pos.append(pos2)
        mission_space = MissionSpace(mission_func=lambda: "reach the goal")

        super().__init__(mission_space=mission_space, width=self.size, height=self.size, max_steps=100, **kwargs)

    def _reward_noise(self):
        """
        Compute the reward to be given upon reach a noisy tile
        """
        return self._rand_float(0.05, 0.1)

    def _add_noise(self, obs):
        """
        Add noise to obs['image']
        """
        image = obs['image'].astype(float)
        for pos in self._noisy_tile_pos:
            if self.in_view(pos[0], pos[1]):  # if noisy tile is in the view of agent, the view of agent is 7*7.
                relative_pos = self.relative_coords(pos[0], pos[1])
                image[relative_pos][1] += 0.5
                obs['image'] = image
        return obs

    def _gen_grid(self, width, height):
        # Create the grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        room_w = width // 2
        room_h = height // 2

        # For each row of rooms
        for j in range(0, 2):

            # For each column
            for i in range(0, 2):
                xL = i * room_w
                yT = j * room_h
                xR = xL + room_w
                yB = yT + room_h

                # Bottom wall and door
                if i + 1 < 2:
                    self.grid.vert_wall(xR, yT, room_h)
                    pos = (xR, self._rand_int(yT + 1, yB))
                    self.grid.set(*pos, None)

                # Bottom wall and door
                if j + 1 < 2:
                    self.grid.horz_wall(xL, yB, room_w)
                    pos = (self._rand_int(xL + 1, xR), yB)
                    self.grid.set(*pos, None)

        # Randomize the player start position and orientation
        if self._agent_default_pos is not None:
            self.agent_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            # assuming random start direction
            self.agent_dir = self._rand_int(0, 4)
        else:
            self.place_agent()

        if self._goal_default_pos is not None:
            goal = Goal()
            self.put_obj(goal, *self._goal_default_pos)
            goal.init_pos, goal.cur_pos = self._goal_default_pos
        else:
            self.place_obj(Goal())

    def step(self, action):
        self.step_count += 1

        reward = 0
        terminated = False
        truncated = False

        # Get the position in front of the agent
        fwd_pos = self.front_pos

        # Get the contents of the cell in front of the agent
        fwd_cell = self.grid.get(*fwd_pos)

        # Rotate left
        if action == self.actions.left:
            self.agent_dir -= 1
            if self.agent_dir < 0:
                self.agent_dir += 4

        # Rotate right
        elif action == self.actions.right:
            self.agent_dir = (self.agent_dir + 1) % 4

        # Move forward
        elif action == self.actions.forward:
            if fwd_cell is None or fwd_cell.can_overlap():
                self.agent_pos = tuple(fwd_pos)
            if fwd_cell is not None and fwd_cell.type == "goal":
                terminated = True
                reward = self._reward()
            if fwd_cell is not None and fwd_cell.type == "lava":
                terminated = True
            # if agent reach noisy tile, return noisy reward.
            if self.agent_pos in self._noisy_tile_pos:
                reward = self._reward_noise()

        # Pick up an object
        elif action == self.actions.pickup:
            if fwd_cell and fwd_cell.can_pickup():
                if self.carrying is None:
                    self.carrying = fwd_cell
                    self.carrying.cur_pos = np.array([-1, -1])
                    self.grid.set(fwd_pos[0], fwd_pos[1], None)

        # Drop an object
        elif action == self.actions.drop:
            if not fwd_cell and self.carrying:
                self.grid.set(fwd_pos[0], fwd_pos[1], self.carrying)
                self.carrying.cur_pos = fwd_pos
                self.carrying = None

        # Toggle/activate an object
        elif action == self.actions.toggle:
            if fwd_cell:
                fwd_cell.toggle(self, fwd_pos)

        # Done action (not used by default)
        elif action == self.actions.done:
            pass

        else:
            raise ValueError(f"Unknown action: {action}")

        if self.step_count >= self.max_steps:
            truncated = True

        if self.render_mode == "human":
            self.render()

        obs = self.gen_obs()
        obs = self._add_noise(obs)

        return obs, reward, terminated, truncated, {}