File size: 12,379 Bytes
a82259d 63244f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 |
diff --git a/data/envs/metaworld/README.md b/data/envs/metaworld/README.md
index c8f64f6..2b2d72e 100644
--- a/data/envs/metaworld/README.md
+++ b/data/envs/metaworld/README.md
@@ -10,13 +10,59 @@ Command lines:
Train:
```sh
-python train.py --env pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld
+python train.py --env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps=10000000 --learning_rate 0.00005 --restart_behavior overwrite
```
Push to hub:
```sh
-python enjoy.py --algo=APPO --env=pick-place-v2 --experiment=default_experiment --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
+python enjoy.py --algo=PPO --env=pick-place-v2 --experiment=pick-place-v2 --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
```
-Generate dataset:
\ No newline at end of file
+Generate dataset:
+
+
+
+usage: train.py [-h] [--algo ALGO] --env ENV [--experiment EXPERIMENT] [--train_dir TRAIN_DIR] [--restart_behavior {resume,restart,overwrite}]
+ [--device {gpu,cpu}] [--seed SEED] [--num_policies NUM_POLICIES] [--async_rl ASYNC_RL] [--serial_mode SERIAL_MODE]
+ [--batched_sampling BATCHED_SAMPLING] [--num_batches_to_accumulate NUM_BATCHES_TO_ACCUMULATE]
+ [--worker_num_splits WORKER_NUM_SPLITS] [--policy_workers_per_policy POLICY_WORKERS_PER_POLICY]
+ [--max_policy_lag MAX_POLICY_LAG] [--num_workers NUM_WORKERS] [--num_envs_per_worker NUM_ENVS_PER_WORKER]
+ [--batch_size BATCH_SIZE] [--num_batches_per_epoch NUM_BATCHES_PER_EPOCH] [--num_epochs NUM_EPOCHS] [--rollout ROLLOUT]
+ [--recurrence RECURRENCE] [--shuffle_minibatches SHUFFLE_MINIBATCHES] [--gamma GAMMA] [--reward_scale REWARD_SCALE]
+ [--reward_clip REWARD_CLIP] [--value_bootstrap VALUE_BOOTSTRAP] [--normalize_returns NORMALIZE_RETURNS]
+ [--exploration_loss_coeff EXPLORATION_LOSS_COEFF] [--value_loss_coeff VALUE_LOSS_COEFF] [--kl_loss_coeff KL_LOSS_COEFF]
+ [--exploration_loss {entropy,symmetric_kl}] [--gae_lambda GAE_LAMBDA] [--ppo_clip_ratio PPO_CLIP_RATIO]
+ [--ppo_clip_value PPO_CLIP_VALUE] [--with_vtrace WITH_VTRACE] [--vtrace_rho VTRACE_RHO] [--vtrace_c VTRACE_C]
+ [--optimizer {adam,lamb}] [--adam_eps ADAM_EPS] [--adam_beta1 ADAM_BETA1] [--adam_beta2 ADAM_BETA2]
+ [--max_grad_norm MAX_GRAD_NORM] [--learning_rate LEARNING_RATE]
+ [--lr_schedule {constant,kl_adaptive_minibatch,kl_adaptive_epoch}] [--lr_schedule_kl_threshold LR_SCHEDULE_KL_THRESHOLD]
+ [--lr_adaptive_min LR_ADAPTIVE_MIN] [--lr_adaptive_max LR_ADAPTIVE_MAX] [--obs_subtract_mean OBS_SUBTRACT_MEAN]
+ [--obs_scale OBS_SCALE] [--normalize_input NORMALIZE_INPUT] [--normalize_input_keys [NORMALIZE_INPUT_KEYS ...]]
+ [--decorrelate_experience_max_seconds DECORRELATE_EXPERIENCE_MAX_SECONDS]
+ [--decorrelate_envs_on_one_worker DECORRELATE_ENVS_ON_ONE_WORKER] [--actor_worker_gpus [ACTOR_WORKER_GPUS ...]]
+ [--set_workers_cpu_affinity SET_WORKERS_CPU_AFFINITY] [--force_envs_single_thread FORCE_ENVS_SINGLE_THREAD]
+ [--default_niceness DEFAULT_NICENESS] [--log_to_file LOG_TO_FILE]
+ [--experiment_summaries_interval EXPERIMENT_SUMMARIES_INTERVAL] [--flush_summaries_interval FLUSH_SUMMARIES_INTERVAL]
+ [--stats_avg STATS_AVG] [--summaries_use_frameskip SUMMARIES_USE_FRAMESKIP] [--heartbeat_interval HEARTBEAT_INTERVAL]
+ [--heartbeat_reporting_interval HEARTBEAT_REPORTING_INTERVAL] [--train_for_env_steps TRAIN_FOR_ENV_STEPS]
+ [--train_for_seconds TRAIN_FOR_SECONDS] [--save_every_sec SAVE_EVERY_SEC] [--keep_checkpoints KEEP_CHECKPOINTS]
+ [--load_checkpoint_kind {latest,best}] [--save_milestones_sec SAVE_MILESTONES_SEC] [--save_best_every_sec SAVE_BEST_EVERY_SEC]
+ [--save_best_metric SAVE_BEST_METRIC] [--save_best_after SAVE_BEST_AFTER] [--benchmark BENCHMARK]
+ [--encoder_mlp_layers [ENCODER_MLP_LAYERS ...]]
+ [--encoder_conv_architecture {convnet_simple,convnet_impala,convnet_atari,resnet_impala}]
+ [--encoder_conv_mlp_layers [ENCODER_CONV_MLP_LAYERS ...]] [--use_rnn USE_RNN] [--rnn_size RNN_SIZE] [--rnn_type {gru,lstm}]
+ [--rnn_num_layers RNN_NUM_LAYERS] [--decoder_mlp_layers [DECODER_MLP_LAYERS ...]] [--nonlinearity {elu,relu,tanh}]
+ [--policy_initialization {orthogonal,xavier_uniform,torch_default}] [--policy_init_gain POLICY_INIT_GAIN]
+ [--actor_critic_share_weights ACTOR_CRITIC_SHARE_WEIGHTS] [--adaptive_stddev ADAPTIVE_STDDEV]
+ [--continuous_tanh_scale CONTINUOUS_TANH_SCALE] [--initial_stddev INITIAL_STDDEV] [--use_env_info_cache USE_ENV_INFO_CACHE]
+ [--env_gpu_actions ENV_GPU_ACTIONS] [--env_gpu_observations ENV_GPU_OBSERVATIONS] [--env_frameskip ENV_FRAMESKIP]
+ [--env_framestack ENV_FRAMESTACK] [--pixel_format PIXEL_FORMAT]
+ [--use_record_episode_statistics USE_RECORD_EPISODE_STATISTICS] [--with_wandb WITH_WANDB] [--wandb_user WANDB_USER]
+ [--wandb_project WANDB_PROJECT] [--wandb_group WANDB_GROUP] [--wandb_job_type WANDB_JOB_TYPE] [--wandb_tags [WANDB_TAGS ...]]
+ [--with_pbt WITH_PBT] [--pbt_mix_policies_in_one_env PBT_MIX_POLICIES_IN_ONE_ENV]
+ [--pbt_period_env_steps PBT_PERIOD_ENV_STEPS] [--pbt_start_mutation PBT_START_MUTATION]
+ [--pbt_replace_fraction PBT_REPLACE_FRACTION] [--pbt_mutation_rate PBT_MUTATION_RATE]
+ [--pbt_replace_reward_gap PBT_REPLACE_REWARD_GAP] [--pbt_replace_reward_gap_absolute PBT_REPLACE_REWARD_GAP_ABSOLUTE]
+ [--pbt_optimize_gamma PBT_OPTIMIZE_GAMMA] [--pbt_target_objective PBT_TARGET_OBJECTIVE] [--pbt_perturb_min PBT_PERTURB_MIN]
+ [--pbt_perturb_max PBT_PERTURB_MAX]
\ No newline at end of file
diff --git a/data/envs/metaworld/enjoy.py b/data/envs/metaworld/enjoy.py
index 56f08f7..c33c4db 100644
--- a/data/envs/metaworld/enjoy.py
+++ b/data/envs/metaworld/enjoy.py
@@ -1,34 +1,81 @@
import sys
from typing import Optional
-from sample_factory.enjoy import enjoy
-import metaworld
import gym
-
+import metaworld
from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
+from sample_factory.enjoy import enjoy
from sample_factory.envs.env_utils import register_env
-from sample_factory.train import run_rl
-def parse_args(argv=None, evaluation=False):
- # parse the command line arguments to build
- parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
-# add_custom_env_args(partial_cfg.env, parser, evaluation=evaluation)
-# custom_env_override_defaults(partial_cfg.env, parser)
- final_cfg = parse_full_cfg(parser, argv)
- return final_cfg
+ENV_NAMES = [
+ "assembly-v2",
+ "basketball-v2",
+ "bin-picking-v2",
+ "box-close-v2",
+ "button-press-topdown-v2",
+ "button-press-topdown-wall-v2",
+ "button-press-v2",
+ "button-press-wall-v2",
+ "coffee-button-v2",
+ "coffee-pull-v2",
+ "coffee-push-v2",
+ "dial-turn-v2",
+ "disassemble-v2",
+ "door-close-v2",
+ "door-lock-v2",
+ "door-open-v2",
+ "door-unlock-v2",
+ "hand-insert-v2",
+ "drawer-close-v2",
+ "drawer-open-v2",
+ "faucet-open-v2",
+ "faucet-close-v2",
+ "hammer-v2",
+ "handle-press-side-v2",
+ "handle-press-v2",
+ "handle-pull-side-v2",
+ "handle-pull-v2",
+ "lever-pull-v2",
+ "peg-insert-side-v2",
+ "pick-place-wall-v2",
+ "pick-out-of-hole-v2",
+ "reach-v2",
+ "push-back-v2",
+ "push-v2",
+ "pick-place-v2",
+ "plate-slide-v2",
+ "plate-slide-side-v2",
+ "plate-slide-back-v2",
+ "plate-slide-back-side-v2",
+ "peg-insert-side-v2",
+ "peg-unplug-side-v2",
+ "soccer-v2",
+ "stick-push-v2",
+ "stick-pull-v2",
+ "push-wall-v2",
+ "push-v2",
+ "reach-wall-v2",
+ "reach-v2",
+ "shelf-place-v2",
+ "sweep-into-v2",
+ "sweep-v2",
+ "window-open-v2",
+ "window-close-v2",
+]
def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
- # see the section below explaining arguments
- return gym.make("pick-place-v2")
+ return gym.make(full_env_name, render_mode=render_mode)
+
def main():
- """Script entry point."""
- register_env("pick-place-v2", make_custom_env)
- cfg = parse_args(evaluation=True)
+ for env_name in ENV_NAMES:
+ register_env(env_name, make_custom_env)
+ parser, _ = parse_sf_args(argv=None, evaluation=True)
+ cfg = parse_full_cfg(parser)
status = enjoy(cfg)
return status
if __name__ == "__main__":
- sys.exit(main())
\ No newline at end of file
+ sys.exit(main())
diff --git a/data/envs/metaworld/train.py b/data/envs/metaworld/train.py
index e01df51..f0d2bb9 100644
--- a/data/envs/metaworld/train.py
+++ b/data/envs/metaworld/train.py
@@ -1,30 +1,78 @@
-from typing import Optional
-import argparse
import sys
-import metaworld
-import gym
+from typing import Optional
+import gym
+import metaworld
from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
from sample_factory.envs.env_utils import register_env
from sample_factory.train import run_rl
-
-def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
- # see the section below explaining arguments
- return gym.make("pick-place-v2", render_mode=render_mode)
+ENV_NAMES = [
+ "assembly-v2",
+ "basketball-v2",
+ "bin-picking-v2",
+ "box-close-v2",
+ "button-press-topdown-v2",
+ "button-press-topdown-wall-v2",
+ "button-press-v2",
+ "button-press-wall-v2",
+ "coffee-button-v2",
+ "coffee-pull-v2",
+ "coffee-push-v2",
+ "dial-turn-v2",
+ "disassemble-v2",
+ "door-close-v2",
+ "door-lock-v2",
+ "door-open-v2",
+ "door-unlock-v2",
+ "hand-insert-v2",
+ "drawer-close-v2",
+ "drawer-open-v2",
+ "faucet-open-v2",
+ "faucet-close-v2",
+ "hammer-v2",
+ "handle-press-side-v2",
+ "handle-press-v2",
+ "handle-pull-side-v2",
+ "handle-pull-v2",
+ "lever-pull-v2",
+ "peg-insert-side-v2",
+ "pick-place-wall-v2",
+ "pick-out-of-hole-v2",
+ "reach-v2",
+ "push-back-v2",
+ "push-v2",
+ "pick-place-v2",
+ "plate-slide-v2",
+ "plate-slide-side-v2",
+ "plate-slide-back-v2",
+ "plate-slide-back-side-v2",
+ "peg-insert-side-v2",
+ "peg-unplug-side-v2",
+ "soccer-v2",
+ "stick-push-v2",
+ "stick-pull-v2",
+ "push-wall-v2",
+ "push-v2",
+ "reach-wall-v2",
+ "reach-v2",
+ "shelf-place-v2",
+ "sweep-into-v2",
+ "sweep-v2",
+ "window-open-v2",
+ "window-close-v2",
+]
-def parse_args(argv=None, evaluation=False):
- # parse the command line arguments to build
- parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
- final_cfg = parse_full_cfg(parser, argv)
- return final_cfg
+def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
+ return gym.make(full_env_name, render_mode=render_mode)
def main():
- """Script entry point."""
- register_env("pick-place-v2", make_custom_env)
- cfg = parse_args()
+ for env_name in ENV_NAMES:
+ register_env(env_name, make_custom_env)
+ parser, _ = parse_sf_args(argv=None, evaluation=False)
+ cfg = parse_full_cfg(parser)
status = run_rl(cfg)
return status
diff --git a/setup.py b/setup.py
index 4cbcc4b..b35d687 100644
--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,7 @@ REQUIRED_PKGS = [
"huggingface_hub>=0.10", # For sharing objects, environments & trained RL policies
"gym==0.26.2", # For RL action spaces and API
"hydra-core",
- "envpool",
+ # "envpool",
]
|