Spaces:

mazpie
/

genrl

Running on Zero

File size: 8,397 Bytes

2d9a728

# Copyright 2019 The dm_control Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""A task where the goal is to move the hand close to a target prop or site."""

import collections

from dm_control import composer
from dm_control.composer import initializers
from dm_control.composer.observation import observable
from dm_control.composer.variation import distributions
from dm_control.entities import props
from dm_control.manipulation.shared import arenas
from dm_control.manipulation.shared import cameras
from dm_control.manipulation.shared import constants
from dm_control.manipulation.shared import observations
from dm_control.manipulation.shared import registry
from dm_control.manipulation.shared import robots
from dm_control.manipulation.shared import tags
from dm_control.manipulation.shared import workspaces
from dm_control.utils import rewards
import numpy as np


_ReachWorkspace = collections.namedtuple(
    '_ReachWorkspace', ['target_bbox', 'tcp_bbox', 'arm_offset'])

# Ensures that the props are not touching the table before settling.
_PROP_Z_OFFSET = 0.001

_DUPLO_WORKSPACE = _ReachWorkspace(
    target_bbox=workspaces.BoundingBox(
        lower=(-0.1, -0.1, _PROP_Z_OFFSET),
        upper=(0.1, 0.1, _PROP_Z_OFFSET)),
    tcp_bbox=workspaces.BoundingBox(
        lower=(-0.1, -0.1, 0.2),
        upper=(0.1, 0.1, 0.4)),
    arm_offset=robots.ARM_OFFSET)

_SITE_WORKSPACE = _ReachWorkspace(
    target_bbox=workspaces.BoundingBox(
        lower=(-0.2, -0.2, 0.02),
        upper=(0.2, 0.2, 0.4)),
    tcp_bbox=workspaces.BoundingBox(
        lower=(-0.2, -0.2, 0.02),
        upper=(0.2, 0.2, 0.4)),
    arm_offset=robots.ARM_OFFSET)

_TARGET_RADIUS = 0.05
_TIME_LIMIT = 10

TASKS = {
    'reach_top_left':  workspaces.BoundingBox(
        lower=(-0.09, 0.09, _PROP_Z_OFFSET),
        upper=(-0.09, 0.09, _PROP_Z_OFFSET)),
    'reach_top_right': workspaces.BoundingBox(
        lower=(0.09, 0.09, _PROP_Z_OFFSET),
        upper=(0.09, 0.09, _PROP_Z_OFFSET)),
    'reach_bottom_left': workspaces.BoundingBox(
        lower=(-0.09, -0.09, _PROP_Z_OFFSET),
        upper=(-0.09, -0.09, _PROP_Z_OFFSET)),
    'reach_bottom_right': workspaces.BoundingBox(
        lower=(0.09, -0.09, _PROP_Z_OFFSET),
        upper=(0.09, -0.09, _PROP_Z_OFFSET)),
}


def make(task_id, obs_type, seed, img_size=64,):
  obs_settings = observations.VISION if obs_type == 'pixels' else observations.PERFECT_FEATURES
  obs_settings = obs_settings._replace(camera=obs_settings[-1]._replace(width=img_size))
  obs_settings = obs_settings._replace(camera=obs_settings[-1]._replace(height=img_size))
  if obs_type == 'states':
    global _TIME_LIMIT
    _TIME_LIMIT = 10.04
    # Note: Adding this fixes the problem of having 249 steps with action repeat = 1
  task = _reach(task_id, obs_settings=obs_settings, use_site=False)
  return composer.Environment(task, time_limit=_TIME_LIMIT, random_state=seed)


class MTReach(composer.Task):
  """Bring the hand close to a target prop or site."""

  def __init__(
      self, task_id, arena, arm, hand, prop, obs_settings, workspace, control_timestep):
    """Initializes a new `Reach` task.

    Args:
      arena: `composer.Entity` instance.
      arm: `robot_base.RobotArm` instance.
      hand: `robot_base.RobotHand` instance.
      prop: `composer.Entity` instance specifying the prop to reach to, or None
        in which case the target is a fixed site whose position is specified by
        the workspace.
      obs_settings: `observations.ObservationSettings` instance.
      workspace: `_ReachWorkspace` specifying the placement of the prop and TCP.
      control_timestep: Float specifying the control timestep in seconds.
    """
    self._task_id = task_id
    self._arena = arena
    self._arm = arm
    self._hand = hand
    self._arm.attach(self._hand)
    self._arena.attach_offset(self._arm, offset=workspace.arm_offset)
    self.control_timestep = control_timestep
    self._tcp_initializer = initializers.ToolCenterPointInitializer(
        self._hand, self._arm,
        position=distributions.Uniform(*workspace.tcp_bbox),
        quaternion=workspaces.DOWN_QUATERNION)

    # Add custom camera observable.
    self._task_observables = cameras.add_camera_observables(
        arena, obs_settings, cameras.FRONT_CLOSE)

    target_pos_distribution = distributions.Uniform(*TASKS[task_id])
    self._prop = prop
    if prop:
      # The prop itself is used to visualize the target location.
      self._make_target_site(parent_entity=prop, visible=False)
      self._target = self._arena.add_free_entity(prop)
      self._prop_placer = initializers.PropPlacer(
          props=[prop],
          position=target_pos_distribution,
          quaternion=workspaces.uniform_z_rotation,
          settle_physics=True)
    else:
      self._target = self._make_target_site(parent_entity=arena, visible=True)
      self._target_placer = target_pos_distribution

      # Commented to match EXORL
      # obs = observable.MJCFFeature('pos', self._target)
      # obs.configure(**obs_settings.prop_pose._asdict())
      # self._task_observables['target_position'] = obs

    # Add sites for visualizing the prop and target bounding boxes.
    workspaces.add_bbox_site(
        body=self.root_entity.mjcf_model.worldbody,
        lower=workspace.tcp_bbox.lower, upper=workspace.tcp_bbox.upper,
        rgba=constants.GREEN, name='tcp_spawn_area')
    workspaces.add_bbox_site(
        body=self.root_entity.mjcf_model.worldbody,
        lower=workspace.target_bbox.lower, upper=workspace.target_bbox.upper,
        rgba=constants.BLUE, name='target_spawn_area')

  def _make_target_site(self, parent_entity, visible):
    return workspaces.add_target_site(
        body=parent_entity.mjcf_model.worldbody,
        radius=_TARGET_RADIUS, visible=visible,
        rgba=constants.RED, name='target_site')

  @property
  def root_entity(self):
    return self._arena

  @property
  def arm(self):
    return self._arm

  @property
  def hand(self):
    return self._hand

  @property
  def task_observables(self):
    return self._task_observables

  def get_reward(self, physics):
    hand_pos = physics.bind(self._hand.tool_center_point).xpos
    target_pos = physics.bind(self._target).xpos
    # This was used exceptionally for the PT reward predictor experiments 
    # target_pos = distributions.Uniform(*TASKS[self._task_id])()
    distance = np.linalg.norm(hand_pos - target_pos)
    return rewards.tolerance(
        distance, bounds=(0, _TARGET_RADIUS), margin=_TARGET_RADIUS)

  def initialize_episode(self, physics, random_state):
    self._hand.set_grasp(physics, close_factors=random_state.uniform())
    self._tcp_initializer(physics, random_state)
    if self._prop:
      self._prop_placer(physics, random_state)
    else:
      physics.bind(self._target).pos = (
          self._target_placer(random_state=random_state))


def _reach(task_id, obs_settings, use_site):
  """Configure and instantiate a `Reach` task.

  Args:
    obs_settings: An `observations.ObservationSettings` instance.
    use_site: Boolean, if True then the target will be a fixed site, otherwise
      it will be a moveable Duplo brick.

  Returns:
    An instance of `reach.Reach`.
  """
  arena = arenas.Standard()
  arm = robots.make_arm(obs_settings=obs_settings)
  hand = robots.make_hand(obs_settings=obs_settings)
  if use_site:
    workspace = _SITE_WORKSPACE
    prop = None
  else:
    workspace = _DUPLO_WORKSPACE
    prop = props.Duplo(observable_options=observations.make_options(
        obs_settings, observations.FREEPROP_OBSERVABLES))
  task = MTReach(task_id, arena=arena, arm=arm, hand=hand, prop=prop,
               obs_settings=obs_settings,
               workspace=workspace,
               control_timestep=constants.CONTROL_TIMESTEP)
  return task