gomoku / DI-engine /dizoo /mujoco /envs /test /test_mujoco_env.py
zjowowen's picture
init space
079c32c
import os
import pytest
import numpy as np
from easydict import EasyDict
from ding.utils import set_pkg_seed
from dizoo.mujoco.envs import MujocoEnv
@pytest.mark.envtest
@pytest.mark.parametrize('delay_reward_step', [1, 10])
def test_mujoco_env_delay_reward(delay_reward_step):
set_pkg_seed(1234, use_cuda=False)
env = MujocoEnv(
EasyDict(
{
'env_id': 'Ant-v3',
'action_clip': False,
'delay_reward_step': delay_reward_step,
'save_replay_gif': False,
'replay_path_gif': None
}
)
)
env.seed(1234)
env.reset()
action_dim = env.action_space.shape
for i in range(25):
# Both ``env.random_action()``, and utilizing ``np.random`` as well as action space,
# can generate legal random action.
if i < 10:
action = np.random.random(size=action_dim)
else:
action = env.random_action()
timestep = env.step(action)
print(timestep.reward)
assert timestep.reward.shape == (1, ), timestep.reward.shape
@pytest.mark.envtest
def test_mujoco_env_eval_episode_return():
set_pkg_seed(1234, use_cuda=False)
env = MujocoEnv(
EasyDict(
{
'env_id': 'Ant-v3',
'action_clip': False,
'delay_reward_step': 4,
'save_replay_gif': False,
'replay_path_gif': None
}
)
)
env.seed(1234)
env.reset()
action_dim = env.action_space.shape
eval_episode_return = np.array([0.], dtype=np.float32)
while True:
action = np.random.random(size=action_dim)
timestep = env.step(action)
eval_episode_return += timestep.reward
# print("{}(dtype: {})".format(timestep.reward, timestep.reward.dtype))
if timestep.done:
print(
"{}({}), {}({})".format(
timestep.info['eval_episode_return'], type(timestep.info['eval_episode_return']),
eval_episode_return, type(eval_episode_return)
)
)
# timestep.reward and the cumulative reward in wrapper EvalEpisodeReturn are not the same.
assert abs(timestep.info['eval_episode_return'].item() - eval_episode_return.item()) / \
abs(timestep.info['eval_episode_return'].item()) < 1e-5
break