File size: 12,379 Bytes
a82259d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63244f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
diff --git a/data/envs/metaworld/README.md b/data/envs/metaworld/README.md
index c8f64f6..2b2d72e 100644
--- a/data/envs/metaworld/README.md
+++ b/data/envs/metaworld/README.md
@@ -10,13 +10,59 @@ Command lines:
 Train:
 
 ```sh
-python train.py --env pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld
+python train.py --env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps=10000000 --learning_rate 0.00005 --restart_behavior overwrite
 ```
 
 Push to hub:
 
 ```sh
-python enjoy.py --algo=APPO --env=pick-place-v2 --experiment=default_experiment --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
+python enjoy.py --algo=PPO --env=pick-place-v2 --experiment=pick-place-v2 --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
 ```
 
-Generate dataset:
\ No newline at end of file
+Generate dataset:
+
+
+
+usage: train.py [-h] [--algo ALGO] --env ENV [--experiment EXPERIMENT] [--train_dir TRAIN_DIR] [--restart_behavior {resume,restart,overwrite}]
+                [--device {gpu,cpu}] [--seed SEED] [--num_policies NUM_POLICIES] [--async_rl ASYNC_RL] [--serial_mode SERIAL_MODE]
+                [--batched_sampling BATCHED_SAMPLING] [--num_batches_to_accumulate NUM_BATCHES_TO_ACCUMULATE]
+                [--worker_num_splits WORKER_NUM_SPLITS] [--policy_workers_per_policy POLICY_WORKERS_PER_POLICY]
+                [--max_policy_lag MAX_POLICY_LAG] [--num_workers NUM_WORKERS] [--num_envs_per_worker NUM_ENVS_PER_WORKER]
+                [--batch_size BATCH_SIZE] [--num_batches_per_epoch NUM_BATCHES_PER_EPOCH] [--num_epochs NUM_EPOCHS] [--rollout ROLLOUT]
+                [--recurrence RECURRENCE] [--shuffle_minibatches SHUFFLE_MINIBATCHES] [--gamma GAMMA] [--reward_scale REWARD_SCALE]
+                [--reward_clip REWARD_CLIP] [--value_bootstrap VALUE_BOOTSTRAP] [--normalize_returns NORMALIZE_RETURNS]
+                [--exploration_loss_coeff EXPLORATION_LOSS_COEFF] [--value_loss_coeff VALUE_LOSS_COEFF] [--kl_loss_coeff KL_LOSS_COEFF]
+                [--exploration_loss {entropy,symmetric_kl}] [--gae_lambda GAE_LAMBDA] [--ppo_clip_ratio PPO_CLIP_RATIO]
+                [--ppo_clip_value PPO_CLIP_VALUE] [--with_vtrace WITH_VTRACE] [--vtrace_rho VTRACE_RHO] [--vtrace_c VTRACE_C]
+                [--optimizer {adam,lamb}] [--adam_eps ADAM_EPS] [--adam_beta1 ADAM_BETA1] [--adam_beta2 ADAM_BETA2]
+                [--max_grad_norm MAX_GRAD_NORM] [--learning_rate LEARNING_RATE]
+                [--lr_schedule {constant,kl_adaptive_minibatch,kl_adaptive_epoch}] [--lr_schedule_kl_threshold LR_SCHEDULE_KL_THRESHOLD]
+                [--lr_adaptive_min LR_ADAPTIVE_MIN] [--lr_adaptive_max LR_ADAPTIVE_MAX] [--obs_subtract_mean OBS_SUBTRACT_MEAN]
+                [--obs_scale OBS_SCALE] [--normalize_input NORMALIZE_INPUT] [--normalize_input_keys [NORMALIZE_INPUT_KEYS ...]]
+                [--decorrelate_experience_max_seconds DECORRELATE_EXPERIENCE_MAX_SECONDS]
+                [--decorrelate_envs_on_one_worker DECORRELATE_ENVS_ON_ONE_WORKER] [--actor_worker_gpus [ACTOR_WORKER_GPUS ...]]
+                [--set_workers_cpu_affinity SET_WORKERS_CPU_AFFINITY] [--force_envs_single_thread FORCE_ENVS_SINGLE_THREAD]
+                [--default_niceness DEFAULT_NICENESS] [--log_to_file LOG_TO_FILE]
+                [--experiment_summaries_interval EXPERIMENT_SUMMARIES_INTERVAL] [--flush_summaries_interval FLUSH_SUMMARIES_INTERVAL]
+                [--stats_avg STATS_AVG] [--summaries_use_frameskip SUMMARIES_USE_FRAMESKIP] [--heartbeat_interval HEARTBEAT_INTERVAL]
+                [--heartbeat_reporting_interval HEARTBEAT_REPORTING_INTERVAL] [--train_for_env_steps TRAIN_FOR_ENV_STEPS]
+                [--train_for_seconds TRAIN_FOR_SECONDS] [--save_every_sec SAVE_EVERY_SEC] [--keep_checkpoints KEEP_CHECKPOINTS]
+                [--load_checkpoint_kind {latest,best}] [--save_milestones_sec SAVE_MILESTONES_SEC] [--save_best_every_sec SAVE_BEST_EVERY_SEC]
+                [--save_best_metric SAVE_BEST_METRIC] [--save_best_after SAVE_BEST_AFTER] [--benchmark BENCHMARK]
+                [--encoder_mlp_layers [ENCODER_MLP_LAYERS ...]]
+                [--encoder_conv_architecture {convnet_simple,convnet_impala,convnet_atari,resnet_impala}]
+                [--encoder_conv_mlp_layers [ENCODER_CONV_MLP_LAYERS ...]] [--use_rnn USE_RNN] [--rnn_size RNN_SIZE] [--rnn_type {gru,lstm}]
+                [--rnn_num_layers RNN_NUM_LAYERS] [--decoder_mlp_layers [DECODER_MLP_LAYERS ...]] [--nonlinearity {elu,relu,tanh}]
+                [--policy_initialization {orthogonal,xavier_uniform,torch_default}] [--policy_init_gain POLICY_INIT_GAIN]
+                [--actor_critic_share_weights ACTOR_CRITIC_SHARE_WEIGHTS] [--adaptive_stddev ADAPTIVE_STDDEV]
+                [--continuous_tanh_scale CONTINUOUS_TANH_SCALE] [--initial_stddev INITIAL_STDDEV] [--use_env_info_cache USE_ENV_INFO_CACHE]
+                [--env_gpu_actions ENV_GPU_ACTIONS] [--env_gpu_observations ENV_GPU_OBSERVATIONS] [--env_frameskip ENV_FRAMESKIP]
+                [--env_framestack ENV_FRAMESTACK] [--pixel_format PIXEL_FORMAT]
+                [--use_record_episode_statistics USE_RECORD_EPISODE_STATISTICS] [--with_wandb WITH_WANDB] [--wandb_user WANDB_USER]
+                [--wandb_project WANDB_PROJECT] [--wandb_group WANDB_GROUP] [--wandb_job_type WANDB_JOB_TYPE] [--wandb_tags [WANDB_TAGS ...]]
+                [--with_pbt WITH_PBT] [--pbt_mix_policies_in_one_env PBT_MIX_POLICIES_IN_ONE_ENV]
+                [--pbt_period_env_steps PBT_PERIOD_ENV_STEPS] [--pbt_start_mutation PBT_START_MUTATION]
+                [--pbt_replace_fraction PBT_REPLACE_FRACTION] [--pbt_mutation_rate PBT_MUTATION_RATE]
+                [--pbt_replace_reward_gap PBT_REPLACE_REWARD_GAP] [--pbt_replace_reward_gap_absolute PBT_REPLACE_REWARD_GAP_ABSOLUTE]
+                [--pbt_optimize_gamma PBT_OPTIMIZE_GAMMA] [--pbt_target_objective PBT_TARGET_OBJECTIVE] [--pbt_perturb_min PBT_PERTURB_MIN]
+                [--pbt_perturb_max PBT_PERTURB_MAX]
\ No newline at end of file
diff --git a/data/envs/metaworld/enjoy.py b/data/envs/metaworld/enjoy.py
index 56f08f7..c33c4db 100644
--- a/data/envs/metaworld/enjoy.py
+++ b/data/envs/metaworld/enjoy.py
@@ -1,34 +1,81 @@
 import sys
 from typing import Optional
-from sample_factory.enjoy import enjoy
 
-import metaworld
 import gym
-
+import metaworld
 from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
+from sample_factory.enjoy import enjoy
 from sample_factory.envs.env_utils import register_env
-from sample_factory.train import run_rl
 
-def parse_args(argv=None, evaluation=False):
-    # parse the command line arguments to build
-    parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
-#    add_custom_env_args(partial_cfg.env, parser, evaluation=evaluation)
-#    custom_env_override_defaults(partial_cfg.env, parser)
-    final_cfg = parse_full_cfg(parser, argv)
-    return final_cfg
+ENV_NAMES = [
+    "assembly-v2",
+    "basketball-v2",
+    "bin-picking-v2",
+    "box-close-v2",
+    "button-press-topdown-v2",
+    "button-press-topdown-wall-v2",
+    "button-press-v2",
+    "button-press-wall-v2",
+    "coffee-button-v2",
+    "coffee-pull-v2",
+    "coffee-push-v2",
+    "dial-turn-v2",
+    "disassemble-v2",
+    "door-close-v2",
+    "door-lock-v2",
+    "door-open-v2",
+    "door-unlock-v2",
+    "hand-insert-v2",
+    "drawer-close-v2",
+    "drawer-open-v2",
+    "faucet-open-v2",
+    "faucet-close-v2",
+    "hammer-v2",
+    "handle-press-side-v2",
+    "handle-press-v2",
+    "handle-pull-side-v2",
+    "handle-pull-v2",
+    "lever-pull-v2",
+    "peg-insert-side-v2",
+    "pick-place-wall-v2",
+    "pick-out-of-hole-v2",
+    "reach-v2",
+    "push-back-v2",
+    "push-v2",
+    "pick-place-v2",
+    "plate-slide-v2",
+    "plate-slide-side-v2",
+    "plate-slide-back-v2",
+    "plate-slide-back-side-v2",
+    "peg-insert-side-v2",
+    "peg-unplug-side-v2",
+    "soccer-v2",
+    "stick-push-v2",
+    "stick-pull-v2",
+    "push-wall-v2",
+    "push-v2",
+    "reach-wall-v2",
+    "reach-v2",
+    "shelf-place-v2",
+    "sweep-into-v2",
+    "sweep-v2",
+    "window-open-v2",
+    "window-close-v2",
+]
 
 
 def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
-    # see the section below explaining arguments
-    return gym.make("pick-place-v2")
+    return gym.make(full_env_name, render_mode=render_mode)
+
 
 def main():
-    """Script entry point."""
-    register_env("pick-place-v2", make_custom_env)
-    cfg = parse_args(evaluation=True)
+    for env_name in ENV_NAMES:
+        register_env(env_name, make_custom_env)
+    parser, _ = parse_sf_args(argv=None, evaluation=True)
+    cfg = parse_full_cfg(parser)
     status = enjoy(cfg)
     return status
 
 
 if __name__ == "__main__":
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())
diff --git a/data/envs/metaworld/train.py b/data/envs/metaworld/train.py
index e01df51..f0d2bb9 100644
--- a/data/envs/metaworld/train.py
+++ b/data/envs/metaworld/train.py
@@ -1,30 +1,78 @@
-from typing import Optional
-import argparse
 import sys
-import metaworld
-import gym
+from typing import Optional
 
+import gym
+import metaworld
 from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
 from sample_factory.envs.env_utils import register_env
 from sample_factory.train import run_rl
 
-
-def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
-    # see the section below explaining arguments
-    return gym.make("pick-place-v2", render_mode=render_mode)
+ENV_NAMES = [
+    "assembly-v2",
+    "basketball-v2",
+    "bin-picking-v2",
+    "box-close-v2",
+    "button-press-topdown-v2",
+    "button-press-topdown-wall-v2",
+    "button-press-v2",
+    "button-press-wall-v2",
+    "coffee-button-v2",
+    "coffee-pull-v2",
+    "coffee-push-v2",
+    "dial-turn-v2",
+    "disassemble-v2",
+    "door-close-v2",
+    "door-lock-v2",
+    "door-open-v2",
+    "door-unlock-v2",
+    "hand-insert-v2",
+    "drawer-close-v2",
+    "drawer-open-v2",
+    "faucet-open-v2",
+    "faucet-close-v2",
+    "hammer-v2",
+    "handle-press-side-v2",
+    "handle-press-v2",
+    "handle-pull-side-v2",
+    "handle-pull-v2",
+    "lever-pull-v2",
+    "peg-insert-side-v2",
+    "pick-place-wall-v2",
+    "pick-out-of-hole-v2",
+    "reach-v2",
+    "push-back-v2",
+    "push-v2",
+    "pick-place-v2",
+    "plate-slide-v2",
+    "plate-slide-side-v2",
+    "plate-slide-back-v2",
+    "plate-slide-back-side-v2",
+    "peg-insert-side-v2",
+    "peg-unplug-side-v2",
+    "soccer-v2",
+    "stick-push-v2",
+    "stick-pull-v2",
+    "push-wall-v2",
+    "push-v2",
+    "reach-wall-v2",
+    "reach-v2",
+    "shelf-place-v2",
+    "sweep-into-v2",
+    "sweep-v2",
+    "window-open-v2",
+    "window-close-v2",
+]
 
 
-def parse_args(argv=None, evaluation=False):
-    # parse the command line arguments to build
-    parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
-    final_cfg = parse_full_cfg(parser, argv)
-    return final_cfg
+def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
+    return gym.make(full_env_name, render_mode=render_mode)
 
 
 def main():
-    """Script entry point."""
-    register_env("pick-place-v2", make_custom_env)
-    cfg = parse_args()
+    for env_name in ENV_NAMES:
+        register_env(env_name, make_custom_env)
+    parser, _ = parse_sf_args(argv=None, evaluation=False)
+    cfg = parse_full_cfg(parser)
     status = run_rl(cfg)
     return status
 
diff --git a/setup.py b/setup.py
index 4cbcc4b..b35d687 100644
--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,7 @@ REQUIRED_PKGS = [
     "huggingface_hub>=0.10",  # For sharing objects, environments & trained RL policies
     "gym==0.26.2",  # For RL action spaces and API
     "hydra-core",
-    "envpool",
+    # "envpool",
 ]