zjowowen's picture
init space
079c32c
from typing import Any, Union
import gym
import numpy as np
from ding.envs.env import BaseEnv, BaseEnvTimestep
class DemoEnv(BaseEnv):
def __init__(self, cfg: dict) -> None:
self._closed = True
# It is highly recommended to implement these three spaces
self._observation_space = gym.spaces.Dict(
{
"demo_dict": gym.spaces.Tuple(
[
gym.spaces.Box(low=-10., high=10., shape=(4, ), dtype=np.float32),
gym.spaces.Box(low=-100., high=100., shape=(1, ), dtype=np.float32)
]
)
}
)
self._action_space = gym.spaces.Discrete(5)
self._reward_space = gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32)
@property
def observation_space(self) -> gym.spaces.Space:
return self._observation_space
@property
def action_space(self) -> gym.spaces.Space:
return self._action_space
@property
def reward_space(self) -> gym.spaces.Space:
return self._reward_space
def reset(self) -> Any:
"""
Overview:
Resets the env to an initial state and returns an initial observation. Abstract Method from ``gym.Env``.
"""
self._step_count = 0
self._env = "A real environment"
self._closed = False
return self.observation_space.sample()
def close(self) -> None:
self._closed = True
def step(self, action: Any) -> 'BaseEnv.timestep':
self._step_count += 1
obs = self.observation_space.sample()
rew = self.reward_space.sample()
if self._step_count == 30:
self._step_count = 0
done = True
else:
done = False
info = {}
if done:
info['eval_episode_return'] = self.reward_space.sample() * 30
return BaseEnvTimestep(obs, rew, done, info)
def seed(self, seed: int) -> None:
self._seed = seed
def random_action(self) -> Union[np.ndarray, int]:
return self.action_space.sample()
def __repr__(self) -> str:
return "Demo Env for env_implementation_test.py"