Spaces:

zjowowen
/

gomoku

Sleeping

File size: 1,597 Bytes

079c32c

import pytest
import numpy as np
from easydict import EasyDict

from ding.utils import set_pkg_seed
from dizoo.mujoco.envs import MujocoDiscEnv


@pytest.mark.envtest
def test_mujoco_env_eval_episode_return():
    set_pkg_seed(1234, use_cuda=False)
    each_dim_disc_size = 2
    env = MujocoDiscEnv(
        EasyDict(
            {
                'env_id': 'Ant-v3',
                'action_clip': False,
                'each_dim_disc_size': each_dim_disc_size,
                'delay_reward_step': 4,
                'save_replay_gif': False,
                'replay_path_gif': None
            }
        )
    )
    env.seed(1234)
    env.reset()
    action_dim = env._raw_action_space.shape
    eval_episode_return = np.array([0.], dtype=np.float32)
    while True:
        action = np.random.randint(0, each_dim_disc_size ** action_dim[0], 1)
        timestep = env.step(action)
        eval_episode_return += timestep.reward
        # print("{}(dtype: {})".format(timestep.reward, timestep.reward.dtype))
        if timestep.done:
            print(
                "{}({}), {}({})".format(
                    timestep.info['eval_episode_return'], type(timestep.info['eval_episode_return']),
                    eval_episode_return, type(eval_episode_return)
                )
            )
            # timestep.reward and the cumulative reward in wrapper EvalEpisodeReturn are not the same.
            assert abs(timestep.info['eval_episode_return'].item() - eval_episode_return.item()) / \
                   abs(timestep.info['eval_episode_return'].item()) < 1e-5
            break