File size: 2,612 Bytes
3d6ce6f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 5e5
env_hyperparams:
n_envs: 8
CartPole-v0:
<<: *cartpole-defaults
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
normalize: true
MountainCarContinuous-v0:
n_timesteps: !!float 1e5
env_hyperparams:
n_envs: 4
normalize: true
# policy_hyperparams:
# use_sde: true
# log_std_init: 0.0
# init_layers_orthogonal: false
algo_hyperparams:
n_steps: 100
sde_sample_freq: 16
Acrobot-v1:
n_timesteps: !!float 5e5
env_hyperparams:
normalize: true
n_envs: 16
LunarLander-v2:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 8
normalize: true
algo_hyperparams:
n_steps: 5
gamma: 0.995
learning_rate: !!float 8.3e-4
learning_rate_decay: linear
ent_coef: !!float 1e-5
BipedalWalker-v3:
n_timesteps: !!float 5e6
env_hyperparams:
n_envs: 16
normalize: true
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
algo_hyperparams:
ent_coef: 0
max_grad_norm: 0.5
n_steps: 8
gae_lambda: 0.9
vf_coef: 0.4
gamma: 0.99
learning_rate: !!float 9.6e-4
learning_rate_decay: linear
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams:
n_envs: 4
normalize: true
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
algo_hyperaparms: &pybullet-algo-defaults
n_steps: 8
ent_coef: 0
max_grad_norm: 0.5
gae_lambda: 0.9
gamma: 0.99
vf_coef: 0.4
learning_rate: !!float 9.6e-4
learning_rate_decay: linear
AntBulletEnv-v0:
<<: *pybullet-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
HopperBulletEnv-v0:
<<: *pybullet-defaults
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
n_envs: 8
frame_stack: 4
policy_hyperparams:
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
activation_fn: relu
share_features_extractor: false
cnn_feature_dim: 256
hidden_sizes: [256]
algo_hyperparams:
n_steps: 8
learning_rate: !!float 5e-5
learning_rate_decay: linear
gamma: 0.99
gae_lambda: 0.95
ent_coef: 0
sde_sample_freq: 4
_atari: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams: &atari-env-defaults
n_envs: 16
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: async
policy_hyperparams: &atari-policy-defaults
activation_fn: relu
algo_hyperparams:
ent_coef: 0.01
vf_coef: 0.25
|