|
|
|
|
|
|
|
|
|
@@ -10,13 +10,59 @@ Command lines: |
|
Train: |
|
|
|
```sh |
|
-python train.py --env pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld |
|
+python train.py --env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps=10000000 --learning_rate 0.00005 --restart_behavior overwrite |
|
``` |
|
|
|
Push to hub: |
|
|
|
```sh |
|
-python enjoy.py --algo=APPO --env=pick-place-v2 --experiment=default_experiment --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best |
|
+python enjoy.py --algo=PPO --env=pick-place-v2 --experiment=pick-place-v2 --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best |
|
``` |
|
|
|
-Generate dataset: |
|
\ No newline at end of file |
|
+Generate dataset: |
|
+ |
|
+ |
|
+ |
|
+usage: train.py [-h] [--algo ALGO] --env ENV [--experiment EXPERIMENT] [--train_dir TRAIN_DIR] [--restart_behavior {resume,restart,overwrite}] |
|
+ [--device {gpu,cpu}] [--seed SEED] [--num_policies NUM_POLICIES] [--async_rl ASYNC_RL] [--serial_mode SERIAL_MODE] |
|
+ [--batched_sampling BATCHED_SAMPLING] [--num_batches_to_accumulate NUM_BATCHES_TO_ACCUMULATE] |
|
+ [--worker_num_splits WORKER_NUM_SPLITS] [--policy_workers_per_policy POLICY_WORKERS_PER_POLICY] |
|
+ [--max_policy_lag MAX_POLICY_LAG] [--num_workers NUM_WORKERS] [--num_envs_per_worker NUM_ENVS_PER_WORKER] |
|
+ [--batch_size BATCH_SIZE] [--num_batches_per_epoch NUM_BATCHES_PER_EPOCH] [--num_epochs NUM_EPOCHS] [--rollout ROLLOUT] |
|
+ [--recurrence RECURRENCE] [--shuffle_minibatches SHUFFLE_MINIBATCHES] [--gamma GAMMA] [--reward_scale REWARD_SCALE] |
|
+ [--reward_clip REWARD_CLIP] [--value_bootstrap VALUE_BOOTSTRAP] [--normalize_returns NORMALIZE_RETURNS] |
|
+ [--exploration_loss_coeff EXPLORATION_LOSS_COEFF] [--value_loss_coeff VALUE_LOSS_COEFF] [--kl_loss_coeff KL_LOSS_COEFF] |
|
+ [--exploration_loss {entropy,symmetric_kl}] [--gae_lambda GAE_LAMBDA] [--ppo_clip_ratio PPO_CLIP_RATIO] |
|
+ [--ppo_clip_value PPO_CLIP_VALUE] [--with_vtrace WITH_VTRACE] [--vtrace_rho VTRACE_RHO] [--vtrace_c VTRACE_C] |
|
+ [--optimizer {adam,lamb}] [--adam_eps ADAM_EPS] [--adam_beta1 ADAM_BETA1] [--adam_beta2 ADAM_BETA2] |
|
+ [--max_grad_norm MAX_GRAD_NORM] [--learning_rate LEARNING_RATE] |
|
+ [--lr_schedule {constant,kl_adaptive_minibatch,kl_adaptive_epoch}] [--lr_schedule_kl_threshold LR_SCHEDULE_KL_THRESHOLD] |
|
+ [--lr_adaptive_min LR_ADAPTIVE_MIN] [--lr_adaptive_max LR_ADAPTIVE_MAX] [--obs_subtract_mean OBS_SUBTRACT_MEAN] |
|
+ [--obs_scale OBS_SCALE] [--normalize_input NORMALIZE_INPUT] [--normalize_input_keys [NORMALIZE_INPUT_KEYS ...]] |
|
+ [--decorrelate_experience_max_seconds DECORRELATE_EXPERIENCE_MAX_SECONDS] |
|
+ [--decorrelate_envs_on_one_worker DECORRELATE_ENVS_ON_ONE_WORKER] [--actor_worker_gpus [ACTOR_WORKER_GPUS ...]] |
|
+ [--set_workers_cpu_affinity SET_WORKERS_CPU_AFFINITY] [--force_envs_single_thread FORCE_ENVS_SINGLE_THREAD] |
|
+ [--default_niceness DEFAULT_NICENESS] [--log_to_file LOG_TO_FILE] |
|
+ [--experiment_summaries_interval EXPERIMENT_SUMMARIES_INTERVAL] [--flush_summaries_interval FLUSH_SUMMARIES_INTERVAL] |
|
+ [--stats_avg STATS_AVG] [--summaries_use_frameskip SUMMARIES_USE_FRAMESKIP] [--heartbeat_interval HEARTBEAT_INTERVAL] |
|
+ [--heartbeat_reporting_interval HEARTBEAT_REPORTING_INTERVAL] [--train_for_env_steps TRAIN_FOR_ENV_STEPS] |
|
+ [--train_for_seconds TRAIN_FOR_SECONDS] [--save_every_sec SAVE_EVERY_SEC] [--keep_checkpoints KEEP_CHECKPOINTS] |
|
+ [--load_checkpoint_kind {latest,best}] [--save_milestones_sec SAVE_MILESTONES_SEC] [--save_best_every_sec SAVE_BEST_EVERY_SEC] |
|
+ [--save_best_metric SAVE_BEST_METRIC] [--save_best_after SAVE_BEST_AFTER] [--benchmark BENCHMARK] |
|
+ [--encoder_mlp_layers [ENCODER_MLP_LAYERS ...]] |
|
+ [--encoder_conv_architecture {convnet_simple,convnet_impala,convnet_atari,resnet_impala}] |
|
+ [--encoder_conv_mlp_layers [ENCODER_CONV_MLP_LAYERS ...]] [--use_rnn USE_RNN] [--rnn_size RNN_SIZE] [--rnn_type {gru,lstm}] |
|
+ [--rnn_num_layers RNN_NUM_LAYERS] [--decoder_mlp_layers [DECODER_MLP_LAYERS ...]] [--nonlinearity {elu,relu,tanh}] |
|
+ [--policy_initialization {orthogonal,xavier_uniform,torch_default}] [--policy_init_gain POLICY_INIT_GAIN] |
|
+ [--actor_critic_share_weights ACTOR_CRITIC_SHARE_WEIGHTS] [--adaptive_stddev ADAPTIVE_STDDEV] |
|
+ [--continuous_tanh_scale CONTINUOUS_TANH_SCALE] [--initial_stddev INITIAL_STDDEV] [--use_env_info_cache USE_ENV_INFO_CACHE] |
|
+ [--env_gpu_actions ENV_GPU_ACTIONS] [--env_gpu_observations ENV_GPU_OBSERVATIONS] [--env_frameskip ENV_FRAMESKIP] |
|
+ [--env_framestack ENV_FRAMESTACK] [--pixel_format PIXEL_FORMAT] |
|
+ [--use_record_episode_statistics USE_RECORD_EPISODE_STATISTICS] [--with_wandb WITH_WANDB] [--wandb_user WANDB_USER] |
|
+ [--wandb_project WANDB_PROJECT] [--wandb_group WANDB_GROUP] [--wandb_job_type WANDB_JOB_TYPE] [--wandb_tags [WANDB_TAGS ...]] |
|
+ [--with_pbt WITH_PBT] [--pbt_mix_policies_in_one_env PBT_MIX_POLICIES_IN_ONE_ENV] |
|
+ [--pbt_period_env_steps PBT_PERIOD_ENV_STEPS] [--pbt_start_mutation PBT_START_MUTATION] |
|
+ [--pbt_replace_fraction PBT_REPLACE_FRACTION] [--pbt_mutation_rate PBT_MUTATION_RATE] |
|
+ [--pbt_replace_reward_gap PBT_REPLACE_REWARD_GAP] [--pbt_replace_reward_gap_absolute PBT_REPLACE_REWARD_GAP_ABSOLUTE] |
|
+ [--pbt_optimize_gamma PBT_OPTIMIZE_GAMMA] [--pbt_target_objective PBT_TARGET_OBJECTIVE] [--pbt_perturb_min PBT_PERTURB_MIN] |
|
+ [--pbt_perturb_max PBT_PERTURB_MAX] |
|
\ No newline at end of file |
|
|
|
|
|
|
|
|
|
@@ -1,34 +1,81 @@ |
|
import sys |
|
from typing import Optional |
|
-from sample_factory.enjoy import enjoy |
|
|
|
-import metaworld |
|
import gym |
|
- |
|
+import metaworld |
|
from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args |
|
+from sample_factory.enjoy import enjoy |
|
from sample_factory.envs.env_utils import register_env |
|
-from sample_factory.train import run_rl |
|
|
|
-def parse_args(argv=None, evaluation=False): |
|
- # parse the command line arguments to build |
|
- parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation) |
|
-# add_custom_env_args(partial_cfg.env, parser, evaluation=evaluation) |
|
-# custom_env_override_defaults(partial_cfg.env, parser) |
|
- final_cfg = parse_full_cfg(parser, argv) |
|
- return final_cfg |
|
+ENV_NAMES = [ |
|
+ "assembly-v2", |
|
+ "basketball-v2", |
|
+ "bin-picking-v2", |
|
+ "box-close-v2", |
|
+ "button-press-topdown-v2", |
|
+ "button-press-topdown-wall-v2", |
|
+ "button-press-v2", |
|
+ "button-press-wall-v2", |
|
+ "coffee-button-v2", |
|
+ "coffee-pull-v2", |
|
+ "coffee-push-v2", |
|
+ "dial-turn-v2", |
|
+ "disassemble-v2", |
|
+ "door-close-v2", |
|
+ "door-lock-v2", |
|
+ "door-open-v2", |
|
+ "door-unlock-v2", |
|
+ "hand-insert-v2", |
|
+ "drawer-close-v2", |
|
+ "drawer-open-v2", |
|
+ "faucet-open-v2", |
|
+ "faucet-close-v2", |
|
+ "hammer-v2", |
|
+ "handle-press-side-v2", |
|
+ "handle-press-v2", |
|
+ "handle-pull-side-v2", |
|
+ "handle-pull-v2", |
|
+ "lever-pull-v2", |
|
+ "peg-insert-side-v2", |
|
+ "pick-place-wall-v2", |
|
+ "pick-out-of-hole-v2", |
|
+ "reach-v2", |
|
+ "push-back-v2", |
|
+ "push-v2", |
|
+ "pick-place-v2", |
|
+ "plate-slide-v2", |
|
+ "plate-slide-side-v2", |
|
+ "plate-slide-back-v2", |
|
+ "plate-slide-back-side-v2", |
|
+ "peg-insert-side-v2", |
|
+ "peg-unplug-side-v2", |
|
+ "soccer-v2", |
|
+ "stick-push-v2", |
|
+ "stick-pull-v2", |
|
+ "push-wall-v2", |
|
+ "push-v2", |
|
+ "reach-wall-v2", |
|
+ "reach-v2", |
|
+ "shelf-place-v2", |
|
+ "sweep-into-v2", |
|
+ "sweep-v2", |
|
+ "window-open-v2", |
|
+ "window-close-v2", |
|
+] |
|
|
|
|
|
def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None): |
|
- # see the section below explaining arguments |
|
- return gym.make("pick-place-v2") |
|
+ return gym.make(full_env_name, render_mode=render_mode) |
|
+ |
|
|
|
def main(): |
|
- """Script entry point.""" |
|
- register_env("pick-place-v2", make_custom_env) |
|
- cfg = parse_args(evaluation=True) |
|
+ for env_name in ENV_NAMES: |
|
+ register_env(env_name, make_custom_env) |
|
+ parser, _ = parse_sf_args(argv=None, evaluation=True) |
|
+ cfg = parse_full_cfg(parser) |
|
status = enjoy(cfg) |
|
return status |
|
|
|
|
|
if __name__ == "__main__": |
|
- sys.exit(main()) |
|
\ No newline at end of file |
|
+ sys.exit(main()) |
|
|
|
|
|
|
|
|
|
@@ -1,30 +1,78 @@ |
|
-from typing import Optional |
|
-import argparse |
|
import sys |
|
-import metaworld |
|
-import gym |
|
+from typing import Optional |
|
|
|
+import gym |
|
+import metaworld |
|
from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args |
|
from sample_factory.envs.env_utils import register_env |
|
from sample_factory.train import run_rl |
|
|
|
- |
|
-def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None): |
|
- # see the section below explaining arguments |
|
- return gym.make("pick-place-v2", render_mode=render_mode) |
|
+ENV_NAMES = [ |
|
+ "assembly-v2", |
|
+ "basketball-v2", |
|
+ "bin-picking-v2", |
|
+ "box-close-v2", |
|
+ "button-press-topdown-v2", |
|
+ "button-press-topdown-wall-v2", |
|
+ "button-press-v2", |
|
+ "button-press-wall-v2", |
|
+ "coffee-button-v2", |
|
+ "coffee-pull-v2", |
|
+ "coffee-push-v2", |
|
+ "dial-turn-v2", |
|
+ "disassemble-v2", |
|
+ "door-close-v2", |
|
+ "door-lock-v2", |
|
+ "door-open-v2", |
|
+ "door-unlock-v2", |
|
+ "hand-insert-v2", |
|
+ "drawer-close-v2", |
|
+ "drawer-open-v2", |
|
+ "faucet-open-v2", |
|
+ "faucet-close-v2", |
|
+ "hammer-v2", |
|
+ "handle-press-side-v2", |
|
+ "handle-press-v2", |
|
+ "handle-pull-side-v2", |
|
+ "handle-pull-v2", |
|
+ "lever-pull-v2", |
|
+ "peg-insert-side-v2", |
|
+ "pick-place-wall-v2", |
|
+ "pick-out-of-hole-v2", |
|
+ "reach-v2", |
|
+ "push-back-v2", |
|
+ "push-v2", |
|
+ "pick-place-v2", |
|
+ "plate-slide-v2", |
|
+ "plate-slide-side-v2", |
|
+ "plate-slide-back-v2", |
|
+ "plate-slide-back-side-v2", |
|
+ "peg-insert-side-v2", |
|
+ "peg-unplug-side-v2", |
|
+ "soccer-v2", |
|
+ "stick-push-v2", |
|
+ "stick-pull-v2", |
|
+ "push-wall-v2", |
|
+ "push-v2", |
|
+ "reach-wall-v2", |
|
+ "reach-v2", |
|
+ "shelf-place-v2", |
|
+ "sweep-into-v2", |
|
+ "sweep-v2", |
|
+ "window-open-v2", |
|
+ "window-close-v2", |
|
+] |
|
|
|
|
|
-def parse_args(argv=None, evaluation=False): |
|
- # parse the command line arguments to build |
|
- parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation) |
|
- final_cfg = parse_full_cfg(parser, argv) |
|
- return final_cfg |
|
+def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None): |
|
+ return gym.make(full_env_name, render_mode=render_mode) |
|
|
|
|
|
def main(): |
|
- """Script entry point.""" |
|
- register_env("pick-place-v2", make_custom_env) |
|
- cfg = parse_args() |
|
+ for env_name in ENV_NAMES: |
|
+ register_env(env_name, make_custom_env) |
|
+ parser, _ = parse_sf_args(argv=None, evaluation=False) |
|
+ cfg = parse_full_cfg(parser) |
|
status = run_rl(cfg) |
|
return status |
|
|
|
|
|
|
|
|
|
|
|
@@ -65,7 +65,7 @@ REQUIRED_PKGS = [ |
|
"huggingface_hub>=0.10", # For sharing objects, environments & trained RL policies |
|
"gym==0.26.2", # For RL action spaces and API |
|
"hydra-core", |
|
- "envpool", |
|
+ # "envpool", |
|
] |
|
|
|
|
|
|