|
import pytest |
|
import os |
|
import numpy as np |
|
from dizoo.minigrid.envs import MiniGridEnv |
|
from easydict import EasyDict |
|
import copy |
|
|
|
|
|
config = dict( |
|
env_id='MiniGrid-AKTDT-13x13-v0', |
|
flat_obs=True, |
|
) |
|
cfg = EasyDict(copy.deepcopy(config)) |
|
cfg.cfg_type = 'MiniGridEnvDict' |
|
|
|
config2 = dict( |
|
env_id='MiniGrid-AKTDT-7x7-1-v0', |
|
flat_obs=True, |
|
) |
|
cfg2 = EasyDict(copy.deepcopy(config2)) |
|
cfg2.cfg_type = 'MiniGridEnvDict' |
|
|
|
|
|
@pytest.mark.envtest |
|
class TestMiniGridEnv: |
|
|
|
def test_naive(self): |
|
env = MiniGridEnv(MiniGridEnv.default_config()) |
|
env.seed(314) |
|
path = './video' |
|
if not os.path.exists(path): |
|
os.mkdir(path) |
|
env.enable_save_replay(path) |
|
assert env._seed == 314 |
|
obs = env.reset() |
|
act_val = env.info().act_space.value |
|
min_val, max_val = act_val['min'], act_val['max'] |
|
for i in range(env._max_step): |
|
random_action = np.random.randint(min_val, max_val, size=(1, )) |
|
timestep = env.step(random_action) |
|
print(timestep) |
|
print(timestep.obs.max()) |
|
assert isinstance(timestep.obs, np.ndarray) |
|
assert isinstance(timestep.done, bool) |
|
assert timestep.obs.shape == (2739, ) |
|
assert timestep.reward.shape == (1, ) |
|
assert timestep.reward >= env.info().rew_space.value['min'] |
|
assert timestep.reward <= env.info().rew_space.value['max'] |
|
if timestep.done: |
|
env.reset() |
|
print(env.info()) |
|
env.close() |
|
|
|
|
|
@pytest.mark.envtest |
|
class TestMiniGridAKTDTnv: |
|
|
|
def test_adtkt_13(self): |
|
env = MiniGridEnv(cfg2) |
|
env.seed(314) |
|
path = './video' |
|
if not os.path.exists(path): |
|
os.mkdir(path) |
|
env.enable_save_replay(path) |
|
assert env._seed == 314 |
|
obs = env.reset() |
|
act_val = env.info().act_space.value |
|
min_val, max_val = act_val['min'], act_val['max'] |
|
for i in range(env._max_step): |
|
random_action = np.random.randint(min_val, max_val, size=(1, )) |
|
timestep = env.step(random_action) |
|
print(timestep) |
|
print(timestep.obs.max()) |
|
assert isinstance(timestep.obs, np.ndarray) |
|
assert isinstance(timestep.done, bool) |
|
assert timestep.obs.shape == (2667, ) |
|
assert timestep.reward.shape == (1, ) |
|
assert timestep.reward >= env.info().rew_space.value['min'] |
|
assert timestep.reward <= env.info().rew_space.value['max'] |
|
if timestep.done: |
|
env.reset() |
|
print(env.info()) |
|
env.close() |
|
|
|
def test_adtkt_7(self): |
|
env = MiniGridEnv(cfg2) |
|
env.seed(314) |
|
path = './video' |
|
if not os.path.exists(path): |
|
os.mkdir(path) |
|
env.enable_save_replay(path) |
|
assert env._seed == 314 |
|
obs = env.reset() |
|
act_val = env.info().act_space.value |
|
min_val, max_val = act_val['min'], act_val['max'] |
|
for i in range(env._max_step): |
|
random_action = np.random.randint(min_val, max_val, size=(1, )) |
|
timestep = env.step(random_action) |
|
print(timestep) |
|
print(timestep.obs.max()) |
|
assert isinstance(timestep.obs, np.ndarray) |
|
assert isinstance(timestep.done, bool) |
|
assert timestep.obs.shape == (2619, ) |
|
assert timestep.reward.shape == (1, ) |
|
assert timestep.reward >= env.info().rew_space.value['min'] |
|
assert timestep.reward <= env.info().rew_space.value['max'] |
|
if timestep.done: |
|
env.reset() |
|
print(env.info()) |
|
env.close() |
|
|