File size: 7,249 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
from minigrid.core.grid import Grid
from minigrid.core.mission import MissionSpace
from minigrid.minigrid_env import *
from minigrid.utils.rendering import *
from minigrid.core.world_object import WorldObj
import random
class NoisyTVEnv(MiniGridEnv):
"""
### Description
Classic four room reinforcement learning environment with random noise. The agent must
navigate in a maze composed of four rooms interconnected by 4 gaps in the
walls. To obtain a reward, the agent must reach the green goal square. Both
the agent and the goal square are randomly placed in any of the four rooms.
### Mission Space
"reach the goal"
### Action Space
| Num | Name | Action |
|-----|--------------|--------------|
| 0 | left | Turn left |
| 1 | right | Turn right |
| 2 | forward | Move forward |
| 3 | pickup | Unused |
| 4 | drop | Unused |
| 5 | toggle | Unused |
| 6 | done | Unused |
### Observation Encoding
- Each tile is encoded as a 3 dimensional tuple:
`(OBJECT_IDX, COLOR_IDX, STATE)`
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
[minigrid/minigrid.py](minigrid/minigrid.py)
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked
### Rewards
A reward of '1' is given for success, and '0' for failure.
Noisy reward are given upon reaching a noisy tile. Noise obeys Gaussian distribution.
### Termination
The episode ends if any one of the following conditions is met:
1. The agent reaches the goal.
2. Timeout (see `max_steps`).
### Registered Configurations
- `MiniGrid-NoisyTV-v0`
"""
def __init__(self, agent_pos=None, goal_pos=None, noisy_tile_num=4, **kwargs):
self._agent_default_pos = agent_pos
self._goal_default_pos = goal_pos
self.size = 19
self._noisy_tile_num = noisy_tile_num
self._noisy_tile_pos = []
for i in range(self._noisy_tile_num):
pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1))
while pos2 in self._noisy_tile_pos:
pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1))
self._noisy_tile_pos.append(pos2)
mission_space = MissionSpace(mission_func=lambda: "reach the goal")
super().__init__(mission_space=mission_space, width=self.size, height=self.size, max_steps=100, **kwargs)
def _reward_noise(self):
"""
Compute the reward to be given upon reach a noisy tile
"""
return self._rand_float(0.05, 0.1)
def _add_noise(self, obs):
"""
Add noise to obs['image']
"""
image = obs['image'].astype(float)
for pos in self._noisy_tile_pos:
if self.in_view(pos[0], pos[1]): # if noisy tile is in the view of agent, the view of agent is 7*7.
relative_pos = self.relative_coords(pos[0], pos[1])
image[relative_pos][1] += 0.5
obs['image'] = image
return obs
def _gen_grid(self, width, height):
# Create the grid
self.grid = Grid(width, height)
# Generate the surrounding walls
self.grid.horz_wall(0, 0)
self.grid.horz_wall(0, height - 1)
self.grid.vert_wall(0, 0)
self.grid.vert_wall(width - 1, 0)
room_w = width // 2
room_h = height // 2
# For each row of rooms
for j in range(0, 2):
# For each column
for i in range(0, 2):
xL = i * room_w
yT = j * room_h
xR = xL + room_w
yB = yT + room_h
# Bottom wall and door
if i + 1 < 2:
self.grid.vert_wall(xR, yT, room_h)
pos = (xR, self._rand_int(yT + 1, yB))
self.grid.set(*pos, None)
# Bottom wall and door
if j + 1 < 2:
self.grid.horz_wall(xL, yB, room_w)
pos = (self._rand_int(xL + 1, xR), yB)
self.grid.set(*pos, None)
# Randomize the player start position and orientation
if self._agent_default_pos is not None:
self.agent_pos = self._agent_default_pos
self.grid.set(*self._agent_default_pos, None)
# assuming random start direction
self.agent_dir = self._rand_int(0, 4)
else:
self.place_agent()
if self._goal_default_pos is not None:
goal = Goal()
self.put_obj(goal, *self._goal_default_pos)
goal.init_pos, goal.cur_pos = self._goal_default_pos
else:
self.place_obj(Goal())
def step(self, action):
self.step_count += 1
reward = 0
terminated = False
truncated = False
# Get the position in front of the agent
fwd_pos = self.front_pos
# Get the contents of the cell in front of the agent
fwd_cell = self.grid.get(*fwd_pos)
# Rotate left
if action == self.actions.left:
self.agent_dir -= 1
if self.agent_dir < 0:
self.agent_dir += 4
# Rotate right
elif action == self.actions.right:
self.agent_dir = (self.agent_dir + 1) % 4
# Move forward
elif action == self.actions.forward:
if fwd_cell is None or fwd_cell.can_overlap():
self.agent_pos = tuple(fwd_pos)
if fwd_cell is not None and fwd_cell.type == "goal":
terminated = True
reward = self._reward()
if fwd_cell is not None and fwd_cell.type == "lava":
terminated = True
# if agent reach noisy tile, return noisy reward.
if self.agent_pos in self._noisy_tile_pos:
reward = self._reward_noise()
# Pick up an object
elif action == self.actions.pickup:
if fwd_cell and fwd_cell.can_pickup():
if self.carrying is None:
self.carrying = fwd_cell
self.carrying.cur_pos = np.array([-1, -1])
self.grid.set(fwd_pos[0], fwd_pos[1], None)
# Drop an object
elif action == self.actions.drop:
if not fwd_cell and self.carrying:
self.grid.set(fwd_pos[0], fwd_pos[1], self.carrying)
self.carrying.cur_pos = fwd_pos
self.carrying = None
# Toggle/activate an object
elif action == self.actions.toggle:
if fwd_cell:
fwd_cell.toggle(self, fwd_pos)
# Done action (not used by default)
elif action == self.actions.done:
pass
else:
raise ValueError(f"Unknown action: {action}")
if self.step_count >= self.max_steps:
truncated = True
if self.render_mode == "human":
self.render()
obs = self.gen_obs()
obs = self._add_noise(obs)
return obs, reward, terminated, truncated, {}
|