Spaces:
Running
on
Zero
Running
on
Zero
File size: 9,838 Bytes
2d9a728 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
# Copyright 2017 The dm_control Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Cheetah Domain."""
import collections
import os
from dm_control.suite import cheetah
from dm_control import mujoco
from dm_control.rl import control
from dm_control.suite import base
from dm_control.suite import common
from dm_control.utils import containers
from dm_control.utils import rewards
from dm_control.utils import io as resources
# How long the simulation will run, in seconds.
_DEFAULT_TIME_LIMIT = 10
_DOWN_HEIGHT = 0.15
_HIGH_HEIGHT = 1.00
_MID_HEIGHT = 0.45
# Running speed above which reward is 1.
_RUN_SPEED = 10
_SPIN_SPEED = 5
def make(task,
task_kwargs=None,
environment_kwargs=None,
visualize_reward=False):
task_kwargs = task_kwargs or {}
if environment_kwargs is not None:
task_kwargs = task_kwargs.copy()
task_kwargs['environment_kwargs'] = environment_kwargs
env = SUITE[task](**task_kwargs)
env.task.visualize_reward = visualize_reward
return env
def get_model_and_assets():
"""Returns a tuple containing the model XML string and a dict of assets."""
root_dir = os.path.dirname(os.path.dirname(__file__))
xml = resources.GetResource(
os.path.join(root_dir, 'custom_dmc_tasks', 'cheetah.xml'))
return xml, common.ASSETS
@cheetah.SUITE.add('custom')
def flipping(time_limit=_DEFAULT_TIME_LIMIT,
random=None,
environment_kwargs=None):
"""Returns the run task."""
physics = Physics.from_xml_string(*get_model_and_assets())
task = Cheetah(forward=False, flip=False, random=random, goal='flipping')
environment_kwargs = environment_kwargs or {}
return control.Environment(physics,
task,
time_limit=time_limit,
**environment_kwargs)
@cheetah.SUITE.add('custom')
def standing(time_limit=_DEFAULT_TIME_LIMIT,
random=None,
environment_kwargs=None):
"""Returns the run task."""
physics = Physics.from_xml_string(*get_model_and_assets())
task = Cheetah(forward=False, flip=False, random=random, goal='standing')
environment_kwargs = environment_kwargs or {}
return control.Environment(physics,
task,
time_limit=time_limit,
**environment_kwargs)
@cheetah.SUITE.add('custom')
def lying_down(time_limit=_DEFAULT_TIME_LIMIT,
random=None,
environment_kwargs=None):
"""Returns the run task."""
physics = Physics.from_xml_string(*get_model_and_assets())
task = Cheetah(forward=False, flip=False, random=random, goal='lying_down')
environment_kwargs = environment_kwargs or {}
return control.Environment(physics,
task,
time_limit=time_limit,
**environment_kwargs)
@cheetah.SUITE.add('custom')
def run_backward(time_limit=_DEFAULT_TIME_LIMIT,
random=None,
environment_kwargs=None):
"""Returns the run task."""
physics = Physics.from_xml_string(*get_model_and_assets())
task = Cheetah(forward=False, flip=False, random=random, goal='run_backward')
environment_kwargs = environment_kwargs or {}
return control.Environment(physics,
task,
time_limit=time_limit,
**environment_kwargs)
@cheetah.SUITE.add('custom')
def flip(time_limit=_DEFAULT_TIME_LIMIT,
random=None,
environment_kwargs=None):
"""Returns the run task."""
physics = Physics.from_xml_string(*get_model_and_assets())
task = Cheetah(forward=True, flip=True, random=random, goal='flip')
environment_kwargs = environment_kwargs or {}
return control.Environment(physics,
task,
time_limit=time_limit,
**environment_kwargs)
@cheetah.SUITE.add('custom')
def flip_backward(time_limit=_DEFAULT_TIME_LIMIT,
random=None,
environment_kwargs=None):
"""Returns the run task."""
physics = Physics.from_xml_string(*get_model_and_assets())
task = Cheetah(forward=False, flip=True, random=random, goal='flip_backward')
environment_kwargs = environment_kwargs or {}
return control.Environment(physics,
task,
time_limit=time_limit,
**environment_kwargs)
class Physics(mujoco.Physics):
"""Physics simulation with additional features for the Cheetah domain."""
def speed(self):
"""Returns the horizontal speed of the Cheetah."""
return self.named.data.sensordata['torso_subtreelinvel'][0]
def angmomentum(self):
"""Returns the angular momentum of torso of the Cheetah about Y axis."""
return self.named.data.subtree_angmom['torso'][1]
class Cheetah(base.Task):
"""A `Task` to train a running Cheetah."""
def __init__(self, goal=None, forward=True, flip=False, random=None):
self._forward = 1 if forward else -1
self._flip = flip
self._goal = goal
super(Cheetah, self).__init__(random=random)
def initialize_episode(self, physics):
"""Sets the state of the environment at the start of each episode."""
# The indexing below assumes that all joints have a single DOF.
assert physics.model.nq == physics.model.njnt
is_limited = physics.model.jnt_limited == 1
lower, upper = physics.model.jnt_range[is_limited].T
physics.data.qpos[is_limited] = self.random.uniform(lower, upper)
# Stabilize the model before the actual simulation.
for _ in range(200):
physics.step()
physics.data.time = 0
self._timeout_progress = 0
super().initialize_episode(physics)
def _get_lying_down_reward(self, physics):
torso = physics.named.data.xpos['torso', 'z']
torso_down = rewards.tolerance(torso,
bounds=(-float('inf'), _DOWN_HEIGHT),
margin=_DOWN_HEIGHT * 1.5,)
feet = physics.named.data.xpos['bfoot', 'z'] + physics.named.data.xpos['ffoot', 'z']
feet_up = rewards.tolerance(feet,
bounds=(_MID_HEIGHT, float('inf')),
margin=_MID_HEIGHT / 2,)
return (torso_down + feet_up) / 2
def _get_standing_reward(self, physics):
bfoot = physics.named.data.xpos['bfoot', 'z']
ffoot = physics.named.data.xpos['ffoot', 'z']
max_foot = bfoot if bfoot > ffoot else ffoot
min_foot = bfoot if bfoot <= ffoot else ffoot
low_foot_low = rewards.tolerance(min_foot,
bounds=(-float('inf'), _DOWN_HEIGHT),
margin=_DOWN_HEIGHT * 1.5,)
high_foot_high = rewards.tolerance(max_foot,
bounds=(_HIGH_HEIGHT, float('inf')),
margin=_HIGH_HEIGHT / 2,)
return high_foot_high * low_foot_low
def _get_flip_reward(self, physics):
return rewards.tolerance(self._forward * physics.angmomentum(),
bounds=(_SPIN_SPEED, float('inf')),
margin=_SPIN_SPEED,
value_at_margin=0,
sigmoid='linear')
def get_observation(self, physics):
"""Returns an observation of the state, ignoring horizontal position."""
obs = collections.OrderedDict()
# Ignores horizontal position to maintain translational invariance.
obs['position'] = physics.data.qpos[1:].copy()
obs['velocity'] = physics.velocity()
return obs
def get_reward(self, physics):
"""Returns a reward to the agent."""
if self._goal in ['run', 'flip', 'run_backward', 'flip_backward']:
if self._flip:
return self._get_flip_reward(physics)
else:
reward = rewards.tolerance(self._forward * physics.speed(),
bounds=(_RUN_SPEED, float('inf')),
margin=_RUN_SPEED,
value_at_margin=0,
sigmoid='linear')
return reward
elif self._goal == 'lying_down':
return self._get_lying_down_reward(physics)
elif self._goal == 'flipping':
self._forward = True
fwd_reward = self._get_flip_reward(physics)
self._forward = False
back_reward = self._get_flip_reward(physics)
return max(fwd_reward, back_reward)
elif self._goal == 'standing':
return self._get_standing_reward(physics)
else:
raise NotImplementedError(self._goal)
|