!!python/object/apply:collections.OrderedDict | |
- - - batch_size | |
- 256 | |
- - buffer_size | |
- 200000 | |
- - callback | |
- - rl_zoo3.callbacks.ParallelTrainCallback: | |
gradient_steps: 200 | |
- rl_zoo3.callbacks.LapTimeCallback | |
- - ent_coef | |
- auto | |
- - env_wrapper | |
- - gym.wrappers.time_limit.TimeLimit: | |
max_episode_steps: 10000 | |
- ae.wrapper.AutoencoderWrapper | |
- rl_zoo3.wrappers.HistoryWrapper: | |
horizon: 2 | |
- - gamma | |
- 0.99 | |
- - gradient_steps | |
- 256 | |
- - learning_rate | |
- 0.00073 | |
- - learning_starts | |
- 500 | |
- - n_timesteps | |
- 2000000.0 | |
- - normalize | |
- '{''norm_obs'': True, ''norm_reward'': False}' | |
- - policy | |
- MlpPolicy | |
- - policy_kwargs | |
- dict(log_std_init=-3, net_arch=[256, 256], n_critics=2, use_expln=True) | |
- - sde_sample_freq | |
- 16 | |
- - tau | |
- 0.02 | |
- - train_freq | |
- 200 | |
- - use_sde | |
- true | |
- - use_sde_at_warmup | |
- true | |