gomoku / LightZero /lzero /envs /tests /test_lightzero_env_wrapper.py
zjowowen's picture
init space
079c32c
raw
history blame
4.6 kB
import pytest
from ding.envs import DingEnvWrapper
from lzero.envs.wrappers import ActionDiscretizationEnvWrapper, LightZeroEnvWrapper
from easydict import EasyDict
import gym
import numpy as np
@pytest.mark.unittest
class TestLightZeroEnvWrapper:
def test_continuous_pendulum(self):
env_cfg = EasyDict(
dict(
env_name='Pendulum-v1',
manually_discretization=False,
continuous=True,
each_dim_disc_size=None,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name), cfg={'env_wrapper': [
lambda env: LightZeroEnvWrapper(env, env_cfg),
]}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (3, )
assert obs['action_mask'] is None and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))
def test_discretization_pendulum(self):
env_cfg = EasyDict(
dict(
env_name='Pendulum-v1',
manually_discretization=True,
continuous=False,
each_dim_disc_size=11,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name),
cfg={
'env_wrapper': [
lambda env: ActionDiscretizationEnvWrapper(env, env_cfg),
lambda env: LightZeroEnvWrapper(env, env_cfg),
]
}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (3, )
assert obs['action_mask'].sum() == 11 and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))
def test_continuous_bipedalwalker(self):
env_cfg = EasyDict(
dict(
env_name='BipedalWalker-v3',
manually_discretization=False,
continuous=True,
each_dim_disc_size=4,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name), cfg={'env_wrapper': [
lambda env: LightZeroEnvWrapper(env, env_cfg),
]}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (24, )
assert obs['action_mask'] is None and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))
def test_discretization_bipedalwalker(self):
env_cfg = EasyDict(
dict(
env_name='BipedalWalker-v3',
manually_discretization=True,
continuous=False,
each_dim_disc_size=4,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name),
cfg={
'env_wrapper': [
lambda env: ActionDiscretizationEnvWrapper(env, env_cfg),
lambda env: LightZeroEnvWrapper(env, env_cfg),
]
}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (24, )
assert obs['action_mask'].sum() == 256 and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))