Spaces:

mazpie
/

genrl

Running on Zero

App Files Files Community

genrl / envs /custom_dmc_tasks /quadruped.py

mazpie

Initial commit

2d9a728 6 months ago

raw

history blame

26.3 kB

	# Copyright 2019 The dm_control Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ============================================================================

	"""Quadruped Domain."""

	import collections

	from dm_control.suite import quadruped
	from dm_control import mujoco
	from dm_control.mujoco.wrapper import mjbindings
	from dm_control.rl import control
	from dm_control.suite import base
	from dm_control.suite import common
	from dm_control.utils import containers
	from dm_control.utils import rewards
	from dm_control.utils import xml_tools
	from dm_control.utils import io as resources
	from lxml import etree
	import numpy as np
	from scipy import ndimage
	import os

	enums = mjbindings.enums
	mjlib = mjbindings.mjlib


	_DEFAULT_TIME_LIMIT = 20
	_CONTROL_TIMESTEP = .02

	# Horizontal speeds above which the move reward is 1.
	_RUN_SPEED = 5
	_WALK_SPEED = 0.5

	_JUMP_HEIGHT = 1.0 # -also good for foot up
	_LIE_DOWN_HEIGHT = 0.2
	_FOOT_DOWN_HEIGHT = 0.2
	_FOOT_UP_HEIGHT = 0.8

	# Constants related to terrain generation.
	_HEIGHTFIELD_ID = 0
	_TERRAIN_SMOOTHNESS = 0.15 # 0.0: maximally bumpy; 1.0: completely smooth.
	_TERRAIN_BUMP_SCALE = 2 # Spatial scale of terrain bumps (in meters).

	# Named model elements.
	_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right']
	_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny']

	def make(task,
	task_kwargs=None,
	environment_kwargs=None,
	visualize_reward=False):
	task_kwargs = task_kwargs or {}
	if environment_kwargs is not None:
	task_kwargs = task_kwargs.copy()
	task_kwargs['environment_kwargs'] = environment_kwargs
	env = SUITE[task](**task_kwargs)
	env.task.visualize_reward = visualize_reward
	return env

	def get_model_and_assets():
	"""Returns a tuple containing the model XML string and a dict of assets."""
	root_dir = os.path.dirname(os.path.dirname(__file__))
	xml = resources.GetResource(
	os.path.join(root_dir, 'custom_dmc_tasks', 'quadruped.xml'))
	return xml, common.ASSETS


	def make_model(floor_size=None, terrain=False, rangefinders=False,
	walls_and_ball=False):
	"""Returns the model XML string."""
	root_dir = os.path.dirname(os.path.dirname(__file__))
	xml_string = common.read_model(os.path.join(root_dir, 'custom_dmc_tasks', 'quadruped.xml'))
	parser = etree.XMLParser(remove_blank_text=True)
	mjcf = etree.XML(xml_string, parser)

	# Set floor size.
	if floor_size is not None:
	floor_geom = mjcf.find('.//geom[@name=\'floor\']')
	floor_geom.attrib['size'] = f'{floor_size} {floor_size} .5'

	# Remove walls, ball and target.
	if not walls_and_ball:
	for wall in _WALLS:
	wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
	wall_geom.getparent().remove(wall_geom)

	# Remove ball.
	ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
	ball_body.getparent().remove(ball_body)

	# Remove target.
	target_site = xml_tools.find_element(mjcf, 'site', 'target')
	target_site.getparent().remove(target_site)

	# Remove terrain.
	if not terrain:
	terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
	terrain_geom.getparent().remove(terrain_geom)

	# Remove rangefinders if they're not used, as range computations can be
	# expensive, especially in a scene with heightfields.
	if not rangefinders:
	rangefinder_sensors = mjcf.findall('.//rangefinder')
	for rf in rangefinder_sensors:
	rf.getparent().remove(rf)

	return etree.tostring(mjcf, pretty_print=True)


	@quadruped.SUITE.add('custom')
	def lie_down(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	"""Returns the Walk task."""
	xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
	physics = Physics.from_xml_string(xml_string, common.ASSETS)
	task = Stand(goal='lie_down', random=random)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(physics, task, time_limit=time_limit,
	control_timestep=_CONTROL_TIMESTEP,
	**environment_kwargs)


	@quadruped.SUITE.add('custom')
	def two_legs(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	"""Returns the Walk task."""
	xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
	physics = Physics.from_xml_string(xml_string, common.ASSETS)
	task = Stand(goal='two_legs', random=random)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(physics, task, time_limit=time_limit,
	control_timestep=_CONTROL_TIMESTEP,
	**environment_kwargs)


	@quadruped.SUITE.add('custom')
	def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	"""Returns the Walk task."""
	xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
	physics = Physics.from_xml_string(xml_string, common.ASSETS)
	task = Stand(goal='stand', random=random)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(physics, task, time_limit=time_limit,
	control_timestep=_CONTROL_TIMESTEP,
	**environment_kwargs)

	@quadruped.SUITE.add('custom')
	def jump(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	"""Returns the Walk task."""
	xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
	physics = Physics.from_xml_string(xml_string, common.ASSETS)
	task = Jump(desired_height=_JUMP_HEIGHT, random=random)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(physics, task, time_limit=time_limit,
	control_timestep=_CONTROL_TIMESTEP,
	**environment_kwargs)

	@quadruped.SUITE.add('custom')
	def roll(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	"""Returns the Walk task."""
	xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
	physics = Physics.from_xml_string(xml_string, common.ASSETS)
	task = Roll(desired_speed=_WALK_SPEED, random=random)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(physics, task, time_limit=time_limit,
	control_timestep=_CONTROL_TIMESTEP,
	**environment_kwargs)

	@quadruped.SUITE.add('custom')
	def roll_fast(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	"""Returns the Walk task."""
	xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
	physics = Physics.from_xml_string(xml_string, common.ASSETS)
	task = Roll(desired_speed=_RUN_SPEED, random=random)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(physics, task, time_limit=time_limit,
	control_timestep=_CONTROL_TIMESTEP,
	**environment_kwargs)

	class Physics(mujoco.Physics):
	"""Physics simulation with additional features for the Quadruped domain."""

	def _reload_from_data(self, data):
	super()._reload_from_data(data)
	# Clear cached sensor names when the physics is reloaded.
	self._sensor_types_to_names = {}
	self._hinge_names = []

	def _get_sensor_names(self, *sensor_types):
	try:
	sensor_names = self._sensor_types_to_names[sensor_types]
	except KeyError:
	[sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
	sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
	self._sensor_types_to_names[sensor_types] = sensor_names
	return sensor_names

	def torso_upright(self):
	"""Returns the dot-product of the torso z-axis and the global z-axis."""
	return np.asarray(self.named.data.xmat['torso', 'zz'])

	def torso_velocity(self):
	"""Returns the velocity of the torso, in the local frame."""
	return self.named.data.sensordata['velocimeter'].copy()

	def com_height(self):
	return self.named.data.sensordata['center_of_mass'].copy()[2]

	def egocentric_state(self):
	"""Returns the state without global orientation or position."""
	if not self._hinge_names:
	[hinge_ids] = np.nonzero(self.model.jnt_type ==
	enums.mjtJoint.mjJNT_HINGE)
	self._hinge_names = [self.model.id2name(j_id, 'joint')
	for j_id in hinge_ids]
	return np.hstack((self.named.data.qpos[self._hinge_names],
	self.named.data.qvel[self._hinge_names],
	self.data.act))

	def toe_positions(self):
	"""Returns toe positions in egocentric frame."""
	torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
	torso_pos = self.named.data.xpos['torso']
	torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
	return torso_to_toe.dot(torso_frame)

	def force_torque(self):
	"""Returns scaled force/torque sensor readings at the toes."""
	force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
	enums.mjtSensor.mjSENS_TORQUE)
	return np.arcsinh(self.named.data.sensordata[force_torque_sensors])

	def imu(self):
	"""Returns IMU-like sensor readings."""
	imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
	enums.mjtSensor.mjSENS_ACCELEROMETER)
	return self.named.data.sensordata[imu_sensors]

	def rangefinder(self):
	"""Returns scaled rangefinder sensor readings."""
	rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER)
	rf_readings = self.named.data.sensordata[rf_sensors]
	no_intersection = -1.0
	return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings))

	def origin_distance(self):
	"""Returns the distance from the origin to the workspace."""
	return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))

	def origin(self):
	"""Returns origin position in the torso frame."""
	torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
	torso_pos = self.named.data.xpos['torso']
	return -torso_pos.dot(torso_frame)

	def ball_state(self):
	"""Returns ball position and velocity relative to the torso frame."""
	data = self.named.data
	torso_frame = data.xmat['torso'].reshape(3, 3)
	ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
	ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
	ball_rot_vel = data.qvel['ball_root'][3:]
	ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
	return ball_state.dot(torso_frame).ravel()

	def target_position(self):
	"""Returns target position in torso frame."""
	torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
	torso_pos = self.named.data.xpos['torso']
	torso_to_target = self.named.data.site_xpos['target'] - torso_pos
	return torso_to_target.dot(torso_frame)

	def ball_to_target_distance(self):
	"""Returns horizontal distance from the ball to the target."""
	ball_to_target = (self.named.data.site_xpos['target'] -
	self.named.data.xpos['ball'])
	return np.linalg.norm(ball_to_target[:2])

	def self_to_ball_distance(self):
	"""Returns horizontal distance from the quadruped workspace to the ball."""
	self_to_ball = (self.named.data.site_xpos['workspace']
	-self.named.data.xpos['ball'])
	return np.linalg.norm(self_to_ball[:2])


	def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
	"""Find a height with no contacts given a body orientation.
	Args:
	physics: An instance of `Physics`.
	orientation: A quaternion.
	x_pos: A float. Position along global x-axis.
	y_pos: A float. Position along global y-axis.
	Raises:
	RuntimeError: If a non-contacting configuration has not been found after
	10,000 attempts.
	"""
	z_pos = 0.0 # Start embedded in the floor.
	num_contacts = 1
	num_attempts = 0
	# Move up in 1cm increments until no contacts.
	while num_contacts > 0:
	try:
	with physics.reset_context():
	physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
	physics.named.data.qpos['root'][3:] = orientation
	except control.PhysicsError:
	# We may encounter a PhysicsError here due to filling the contact
	# buffer, in which case we simply increment the height and continue.
	pass
	num_contacts = physics.data.ncon
	z_pos += 0.01
	num_attempts += 1
	if num_attempts > 10000:
	raise RuntimeError('Failed to find a non-contacting configuration.')


	def _common_observations(physics):
	"""Returns the observations common to all tasks."""
	obs = collections.OrderedDict()
	obs['egocentric_state'] = physics.egocentric_state()
	obs['torso_velocity'] = physics.torso_velocity()
	obs['torso_upright'] = physics.torso_upright()
	obs['imu'] = physics.imu()
	obs['force_torque'] = physics.force_torque()
	return obs

	def _lie_down_reward(physics, deviation_angle=0):
	"""Returns a reward proportional to how upright the torso is.
	Args:
	physics: an instance of `Physics`.
	deviation_angle: A float, in degrees. The reward is 0 when the torso is
	exactly upside-down and 1 when the torso's z-axis is less than
	`deviation_angle` away from the global z-axis.
	"""
	torso = physics.named.data.xpos['torso', 'z']
	return rewards.tolerance(
	torso,
	bounds=(-float('inf'), _LIE_DOWN_HEIGHT),
	margin=_LIE_DOWN_HEIGHT * 1.5)


	def _two_legs_reward(physics, deviation_angle=0):
	"""Returns a reward proportional to how upright the torso is.
	Args:
	physics: an instance of `Physics`.
	deviation_angle: A float, in degrees. The reward is 0 when the torso is
	exactly upside-down and 1 when the torso's z-axis is less than
	`deviation_angle` away from the global z-axis.
	"""
	toes = []
	for t in ['toe_front_left', 'toe_front_right', 'toe_back_left', 'toe_back_right']:
	toe = physics.named.data.xpos[t, 'z']
	toes.append(toe)
	toes = sorted(toes)
	min_toes = sum(toes[:2]) / 2
	max_toes = sum(toes[2:]) / 2
	toes_up = rewards.tolerance(
	max_toes,
	bounds=(_FOOT_UP_HEIGHT, float('inf')),
	margin=_FOOT_UP_HEIGHT // 2)
	toes_down = rewards.tolerance(
	min_toes,
	bounds=(-float('inf'), _FOOT_DOWN_HEIGHT),
	margin=_FOOT_DOWN_HEIGHT * 1.5)
	return toes_down * toes_up


	def _upright_reward(physics, deviation_angle=0):
	"""Returns a reward proportional to how upright the torso is.
	Args:
	physics: an instance of `Physics`.
	deviation_angle: A float, in degrees. The reward is 0 when the torso is
	exactly upside-down and 1 when the torso's z-axis is less than
	`deviation_angle` away from the global z-axis.
	"""
	deviation = np.cos(np.deg2rad(deviation_angle))
	return rewards.tolerance(
	physics.torso_upright(),
	bounds=(deviation, float('inf')),
	sigmoid='linear',
	margin=1 + deviation,
	value_at_margin=0)


	class Move(base.Task):
	"""A quadruped task solved by moving forward at a designated speed."""

	def __init__(self, desired_speed, random=None):
	"""Initializes an instance of `Move`.
	Args:
	desired_speed: A float. If this value is zero, reward is given simply
	for standing upright. Otherwise this specifies the horizontal velocity
	at which the velocity-dependent reward component is maximized.
	random: Optional, either a `numpy.random.RandomState` instance, an
	integer seed for creating a new `RandomState`, or None to select a seed
	automatically (default).
	"""
	self._desired_speed = desired_speed
	super().__init__(random=random)

	def initialize_episode(self, physics):
	"""Sets the state of the environment at the start of each episode.
	Args:
	physics: An instance of `Physics`.
	"""
	# Initial configuration.
	orientation = self.random.randn(4)
	orientation /= np.linalg.norm(orientation)
	_find_non_contacting_height(physics, orientation)
	super().initialize_episode(physics)

	def get_observation(self, physics):
	"""Returns an observation to the agent."""
	return _common_observations(physics)

	def get_reward(self, physics):
	"""Returns a reward to the agent."""

	# Move reward term.
	move_reward = rewards.tolerance(
	physics.torso_velocity()[0],
	bounds=(self._desired_speed, float('inf')),
	margin=self._desired_speed,
	value_at_margin=0.5,
	sigmoid='linear')

	return _upright_reward(physics) * move_reward


	class Stand(base.Task):
	"""A quadruped task solved by moving forward at a designated speed."""

	def __init__(self, random=None, goal='stand'):
	"""Initializes an instance of `Move`.
	Args:
	desired_speed: A float. If this value is zero, reward is given simply
	for standing upright. Otherwise this specifies the horizontal velocity
	at which the velocity-dependent reward component is maximized.
	random: Optional, either a `numpy.random.RandomState` instance, an
	integer seed for creating a new `RandomState`, or None to select a seed
	automatically (default).
	"""
	super().__init__(random=random)
	self._goal = goal

	def initialize_episode(self, physics):
	"""Sets the state of the environment at the start of each episode.
	Args:
	physics: An instance of `Physics`.
	"""
	# Initial configuration.
	orientation = self.random.randn(4)
	orientation /= np.linalg.norm(orientation)
	_find_non_contacting_height(physics, orientation)
	super().initialize_episode(physics)

	def get_observation(self, physics):
	"""Returns an observation to the agent."""
	return _common_observations(physics)

	def get_reward(self, physics):
	"""Returns a reward to the agent."""
	if self._goal == 'stand':
	return _upright_reward(physics)
	elif self._goal == 'lie_down':
	return _lie_down_reward(physics)
	elif self._goal == 'two_legs':
	return _two_legs_reward(physics)

	class Jump(base.Task):
	"""A quadruped task solved by moving forward at a designated speed."""

	def __init__(self, desired_height, random=None):
	"""Initializes an instance of `Move`.
	Args:
	desired_speed: A float. If this value is zero, reward is given simply
	for standing upright. Otherwise this specifies the horizontal velocity
	at which the velocity-dependent reward component is maximized.
	random: Optional, either a `numpy.random.RandomState` instance, an
	integer seed for creating a new `RandomState`, or None to select a seed
	automatically (default).
	"""
	self._desired_height = desired_height
	super().__init__(random=random)

	def initialize_episode(self, physics):
	"""Sets the state of the environment at the start of each episode.
	Args:
	physics: An instance of `Physics`.
	"""
	# Initial configuration.
	orientation = self.random.randn(4)
	orientation /= np.linalg.norm(orientation)
	_find_non_contacting_height(physics, orientation)
	super().initialize_episode(physics)

	def get_observation(self, physics):
	"""Returns an observation to the agent."""
	return _common_observations(physics)

	def get_reward(self, physics):
	"""Returns a reward to the agent."""

	# Move reward term.
	jump_up = rewards.tolerance(
	physics.com_height(),
	bounds=(self._desired_height, float('inf')),
	margin=self._desired_height,
	value_at_margin=0.5,
	sigmoid='linear')

	return _upright_reward(physics) * jump_up


	class Roll(base.Task):
	"""A quadruped task solved by moving forward at a designated speed."""

	def __init__(self, desired_speed, random=None):
	"""Initializes an instance of `Move`.
	Args:
	desired_speed: A float. If this value is zero, reward is given simply
	for standing upright. Otherwise this specifies the horizontal velocity
	at which the velocity-dependent reward component is maximized.
	random: Optional, either a `numpy.random.RandomState` instance, an
	integer seed for creating a new `RandomState`, or None to select a seed
	automatically (default).
	"""
	self._desired_speed = desired_speed
	super().__init__(random=random)

	def initialize_episode(self, physics):
	"""Sets the state of the environment at the start of each episode.
	Args:
	physics: An instance of `Physics`.
	"""
	# Initial configuration.
	orientation = self.random.randn(4)
	orientation /= np.linalg.norm(orientation)
	_find_non_contacting_height(physics, orientation)
	super().initialize_episode(physics)

	def get_observation(self, physics):
	"""Returns an observation to the agent."""
	return _common_observations(physics)

	def get_reward(self, physics):
	"""Returns a reward to the agent."""
	# Move reward term.
	move_reward = rewards.tolerance(
	np.linalg.norm(physics.torso_velocity()),
	bounds=(self._desired_speed, float('inf')),
	margin=self._desired_speed,
	value_at_margin=0.5,
	sigmoid='linear')

	return _upright_reward(physics) * move_reward


	class Escape(base.Task):
	"""A quadruped task solved by escaping a bowl-shaped terrain."""

	def initialize_episode(self, physics):
	"""Sets the state of the environment at the start of each episode.
	Args:
	physics: An instance of `Physics`.
	"""
	# Get heightfield resolution, assert that it is square.
	res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
	assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
	# Sinusoidal bowl shape.
	row_grid, col_grid = np.ogrid[-1:1:res1j, -1:1:res1j]
	radius = np.clip(np.sqrt(col_grid2 + row_grid2), .04, 1)
	bowl_shape = .5 - np.cos(2np.piradius)/2
	# Random smooth bumps.
	terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
	bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
	bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res))
	smooth_bumps = ndimage.zoom(bumps, res / float(bump_res))
	# Terrain is elementwise product.
	terrain = bowl_shape * smooth_bumps
	start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
	physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
	super().initialize_episode(physics)

	# If we have a rendering context, we need to re-upload the modified
	# heightfield data.
	if physics.contexts:
	with physics.contexts.gl.make_current() as ctx:
	ctx.call(mjlib.mjr_uploadHField,
	physics.model.ptr,
	physics.contexts.mujoco.ptr,
	_HEIGHTFIELD_ID)

	# Initial configuration.
	orientation = self.random.randn(4)
	orientation /= np.linalg.norm(orientation)
	_find_non_contacting_height(physics, orientation)

	def get_observation(self, physics):
	"""Returns an observation to the agent."""
	obs = _common_observations(physics)
	obs['origin'] = physics.origin()
	obs['rangefinder'] = physics.rangefinder()
	return obs

	def get_reward(self, physics):
	"""Returns a reward to the agent."""

	# Escape reward term.
	terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
	escape_reward = rewards.tolerance(
	physics.origin_distance(),
	bounds=(terrain_size, float('inf')),
	margin=terrain_size,
	value_at_margin=0,
	sigmoid='linear')

	return _upright_reward(physics, deviation_angle=20) * escape_reward


	class Fetch(base.Task):
	"""A quadruped task solved by bringing a ball to the origin."""

	def initialize_episode(self, physics):
	"""Sets the state of the environment at the start of each episode.
	Args:
	physics: An instance of `Physics`.
	"""
	# Initial configuration, random azimuth and horizontal position.
	azimuth = self.random.uniform(0, 2*np.pi)
	orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
	spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
	x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
	_find_non_contacting_height(physics, orientation, x_pos, y_pos)

	# Initial ball state.
	physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
	-spawn_radius, spawn_radius, size=(2,))
	physics.named.data.qpos['ball_root'][2] = 2
	physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
	super().initialize_episode(physics)

	def get_observation(self, physics):
	"""Returns an observation to the agent."""
	obs = _common_observations(physics)
	obs['ball_state'] = physics.ball_state()
	obs['target_position'] = physics.target_position()
	return obs

	def get_reward(self, physics):
	"""Returns a reward to the agent."""

	# Reward for moving close to the ball.
	arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
	workspace_radius = physics.named.model.site_size['workspace', 0]
	ball_radius = physics.named.model.geom_size['ball', 0]
	reach_reward = rewards.tolerance(
	physics.self_to_ball_distance(),
	bounds=(0, workspace_radius+ball_radius),
	sigmoid='linear',
	margin=arena_radius, value_at_margin=0)

	# Reward for bringing the ball to the target.
	target_radius = physics.named.model.site_size['target', 0]
	fetch_reward = rewards.tolerance(
	physics.ball_to_target_distance(),
	bounds=(0, target_radius),
	sigmoid='linear',
	margin=arena_radius, value_at_margin=0)

	reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)

	return _upright_reward(physics) * reach_then_fetch