Spaces:

zjowowen
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /dizoo /minigrid /envs /noisy_tv.py

zjowowen

init space

079c32c 11 months ago

raw

history blame

7.25 kB

	from minigrid.core.grid import Grid
	from minigrid.core.mission import MissionSpace
	from minigrid.minigrid_env import *
	from minigrid.utils.rendering import *
	from minigrid.core.world_object import WorldObj
	import random


	class NoisyTVEnv(MiniGridEnv):
	"""
	### Description

	Classic four room reinforcement learning environment with random noise. The agent must
	navigate in a maze composed of four rooms interconnected by 4 gaps in the
	walls. To obtain a reward, the agent must reach the green goal square. Both
	the agent and the goal square are randomly placed in any of the four rooms.

	### Mission Space

	"reach the goal"

	### Action Space

	\| Num \| Name \| Action \|
	\|-----\|--------------\|--------------\|
	\| 0 \| left \| Turn left \|
	\| 1 \| right \| Turn right \|
	\| 2 \| forward \| Move forward \|
	\| 3 \| pickup \| Unused \|
	\| 4 \| drop \| Unused \|
	\| 5 \| toggle \| Unused \|
	\| 6 \| done \| Unused \|

	### Observation Encoding

	- Each tile is encoded as a 3 dimensional tuple:
	`(OBJECT_IDX, COLOR_IDX, STATE)`
	- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
	[minigrid/minigrid.py](minigrid/minigrid.py)
	- `STATE` refers to the door state with 0=open, 1=closed and 2=locked

	### Rewards

	A reward of '1' is given for success, and '0' for failure.
	Noisy reward are given upon reaching a noisy tile. Noise obeys Gaussian distribution.

	### Termination

	The episode ends if any one of the following conditions is met:

	1. The agent reaches the goal.
	2. Timeout (see `max_steps`).

	### Registered Configurations

	- `MiniGrid-NoisyTV-v0`

	"""

	def __init__(self, agent_pos=None, goal_pos=None, noisy_tile_num=4, **kwargs):
	self._agent_default_pos = agent_pos
	self._goal_default_pos = goal_pos
	self.size = 19
	self._noisy_tile_num = noisy_tile_num
	self._noisy_tile_pos = []
	for i in range(self._noisy_tile_num):
	pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1))
	while pos2 in self._noisy_tile_pos:
	pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1))
	self._noisy_tile_pos.append(pos2)
	mission_space = MissionSpace(mission_func=lambda: "reach the goal")

	super().__init__(mission_space=mission_space, width=self.size, height=self.size, max_steps=100, **kwargs)

	def _reward_noise(self):
	"""
	Compute the reward to be given upon reach a noisy tile
	"""
	return self._rand_float(0.05, 0.1)

	def _add_noise(self, obs):
	"""
	Add noise to obs['image']
	"""
	image = obs['image'].astype(float)
	for pos in self._noisy_tile_pos:
	if self.in_view(pos[0], pos[1]): # if noisy tile is in the view of agent, the view of agent is 7*7.
	relative_pos = self.relative_coords(pos[0], pos[1])
	image[relative_pos][1] += 0.5
	obs['image'] = image
	return obs

	def _gen_grid(self, width, height):
	# Create the grid
	self.grid = Grid(width, height)

	# Generate the surrounding walls
	self.grid.horz_wall(0, 0)
	self.grid.horz_wall(0, height - 1)
	self.grid.vert_wall(0, 0)
	self.grid.vert_wall(width - 1, 0)

	room_w = width // 2
	room_h = height // 2

	# For each row of rooms
	for j in range(0, 2):

	# For each column
	for i in range(0, 2):
	xL = i * room_w
	yT = j * room_h
	xR = xL + room_w
	yB = yT + room_h

	# Bottom wall and door
	if i + 1 < 2:
	self.grid.vert_wall(xR, yT, room_h)
	pos = (xR, self._rand_int(yT + 1, yB))
	self.grid.set(*pos, None)

	# Bottom wall and door
	if j + 1 < 2:
	self.grid.horz_wall(xL, yB, room_w)
	pos = (self._rand_int(xL + 1, xR), yB)
	self.grid.set(*pos, None)

	# Randomize the player start position and orientation
	if self._agent_default_pos is not None:
	self.agent_pos = self._agent_default_pos
	self.grid.set(*self._agent_default_pos, None)
	# assuming random start direction
	self.agent_dir = self._rand_int(0, 4)
	else:
	self.place_agent()

	if self._goal_default_pos is not None:
	goal = Goal()
	self.put_obj(goal, *self._goal_default_pos)
	goal.init_pos, goal.cur_pos = self._goal_default_pos
	else:
	self.place_obj(Goal())

	def step(self, action):
	self.step_count += 1

	reward = 0
	terminated = False
	truncated = False

	# Get the position in front of the agent
	fwd_pos = self.front_pos

	# Get the contents of the cell in front of the agent
	fwd_cell = self.grid.get(*fwd_pos)

	# Rotate left
	if action == self.actions.left:
	self.agent_dir -= 1
	if self.agent_dir < 0:
	self.agent_dir += 4

	# Rotate right
	elif action == self.actions.right:
	self.agent_dir = (self.agent_dir + 1) % 4

	# Move forward
	elif action == self.actions.forward:
	if fwd_cell is None or fwd_cell.can_overlap():
	self.agent_pos = tuple(fwd_pos)
	if fwd_cell is not None and fwd_cell.type == "goal":
	terminated = True
	reward = self._reward()
	if fwd_cell is not None and fwd_cell.type == "lava":
	terminated = True
	# if agent reach noisy tile, return noisy reward.
	if self.agent_pos in self._noisy_tile_pos:
	reward = self._reward_noise()

	# Pick up an object
	elif action == self.actions.pickup:
	if fwd_cell and fwd_cell.can_pickup():
	if self.carrying is None:
	self.carrying = fwd_cell
	self.carrying.cur_pos = np.array([-1, -1])
	self.grid.set(fwd_pos[0], fwd_pos[1], None)

	# Drop an object
	elif action == self.actions.drop:
	if not fwd_cell and self.carrying:
	self.grid.set(fwd_pos[0], fwd_pos[1], self.carrying)
	self.carrying.cur_pos = fwd_pos
	self.carrying = None

	# Toggle/activate an object
	elif action == self.actions.toggle:
	if fwd_cell:
	fwd_cell.toggle(self, fwd_pos)

	# Done action (not used by default)
	elif action == self.actions.done:
	pass

	else:
	raise ValueError(f"Unknown action: {action}")

	if self.step_count >= self.max_steps:
	truncated = True

	if self.render_mode == "human":
	self.render()

	obs = self.gen_obs()
	obs = self._add_noise(obs)

	return obs, reward, terminated, truncated, {}