File size: 3,121 Bytes
8bf4dee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e936e1
8bf4dee
 
 
b638440
8bf4dee
 
 
 
 
 
b638440
 
0e936e1
 
8bf4dee
0e936e1
8bf4dee
 
b638440
 
 
 
 
 
 
 
 
0e936e1
b638440
8bf4dee
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
CartPole-v1: &cartpole-defaults
  n_timesteps: !!float 5e5
  env_hyperparams:
    n_envs: 8

CartPole-v0:
  <<: *cartpole-defaults

MountainCar-v0:
  n_timesteps: !!float 1e6
  env_hyperparams:
    n_envs: 16
    normalize: true

MountainCarContinuous-v0:
  n_timesteps: !!float 1e5
  env_hyperparams:
    n_envs: 4
    normalize: true
  # policy_hyperparams:
  #   use_sde: true
  #   log_std_init: 0.0
  #   init_layers_orthogonal: false
  algo_hyperparams:
    n_steps: 100
    sde_sample_freq: 16

Acrobot-v1:
  n_timesteps: !!float 5e5
  env_hyperparams:
    normalize: true
    n_envs: 16

# Tuned
LunarLander-v2:
  device: cpu
  n_timesteps: !!float 1e6
  env_hyperparams:
    n_envs: 4
    normalize: true
  algo_hyperparams:
    n_steps: 2
    gamma: 0.9955517404308908
    gae_lambda: 0.9875340918797773
    learning_rate: 0.0013814130817068916
    learning_rate_decay: linear
    ent_coef: !!float 3.388369146384422e-7
    ent_coef_decay: none
    max_grad_norm: 3.33982095073364
    normalize_advantage: true
    vf_coef: 0.1667838310548184

BipedalWalker-v3:
  n_timesteps: !!float 5e6
  env_hyperparams:
    n_envs: 16
    normalize: true
  policy_hyperparams:
    use_sde: true
    log_std_init: -2
    init_layers_orthogonal: false
  algo_hyperparams:
    ent_coef: 0
    max_grad_norm: 0.5
    n_steps: 8
    gae_lambda: 0.9
    vf_coef: 0.4
    gamma: 0.99
    learning_rate: !!float 9.6e-4
    learning_rate_decay: linear

HalfCheetahBulletEnv-v0: &pybullet-defaults
  n_timesteps: !!float 2e6
  env_hyperparams:
    n_envs: 4
    normalize: true
  policy_hyperparams:
    use_sde: true
    log_std_init: -2
    init_layers_orthogonal: false
  algo_hyperparams: &pybullet-algo-defaults
    n_steps: 8
    ent_coef: 0
    max_grad_norm: 0.5
    gae_lambda: 0.9
    gamma: 0.99
    vf_coef: 0.4
    learning_rate: !!float 9.6e-4
    learning_rate_decay: linear

AntBulletEnv-v0:
  <<: *pybullet-defaults

Walker2DBulletEnv-v0:
  <<: *pybullet-defaults

HopperBulletEnv-v0:
  <<: *pybullet-defaults

# Tuned
CarRacing-v0:
  n_timesteps: !!float 4e6
  env_hyperparams:
    n_envs: 4
    frame_stack: 4
    normalize: true
    normalize_kwargs:
      norm_obs: false
      norm_reward: true
  policy_hyperparams:
    use_sde: true
    log_std_init: -4.839609092563
    init_layers_orthogonal: true
    activation_fn: tanh
    share_features_extractor: false
    cnn_flatten_dim: 256
    hidden_sizes: [256]
  algo_hyperparams:
    n_steps: 64
    learning_rate: 0.000018971962220405576
    gamma: 0.9942776405534832
    gae_lambda: 0.9549244758833236
    ent_coef: 0.0000015666550584860516
    ent_coef_decay: linear
    vf_coef: 0.12164696385898476
    max_grad_norm: 2.2574480552177127
    normalize_advantage: false
    use_rms_prop: false
    sde_sample_freq: 16

_atari: &atari-defaults
  n_timesteps: !!float 1e7
  env_hyperparams: &atari-env-defaults
    n_envs: 16
    frame_stack: 4
    no_reward_timeout_steps: 1000
    no_reward_fire_steps: 500
    vec_env_class: async
  policy_hyperparams: &atari-policy-defaults
    activation_fn: relu
  algo_hyperparams:
    ent_coef: 0.01
    vf_coef: 0.25