Commit
•
a82259d
1
Parent(s):
aff049e
Upload . with huggingface_hub
Browse files- .summary/0/events.out.tfevents.1677846920.MacBook-Pro-de-Quentin.local +3 -0
- README.md +5 -5
- checkpoint_p0/best_000003537_3621888_reward_121.338.pth +3 -0
- checkpoint_p0/checkpoint_000008899_9112576.pth +3 -0
- checkpoint_p0/checkpoint_000008996_9211904.pth +3 -0
- config.json +7 -4
- git.diff +173 -0
- replay.mp4 +2 -2
- sf_log.txt +0 -0
.summary/0/events.out.tfevents.1677846920.MacBook-Pro-de-Quentin.local
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19fd4e4e1c82de8301ce4c9c412f0b6dad426311d352ef041c523247b688caac
|
3 |
+
size 2132614
|
README.md
CHANGED
@@ -5,7 +5,7 @@ tags:
|
|
5 |
- reinforcement-learning
|
6 |
- sample-factory
|
7 |
model-index:
|
8 |
-
- name:
|
9 |
results:
|
10 |
- task:
|
11 |
type: reinforcement-learning
|
@@ -15,12 +15,12 @@ model-index:
|
|
15 |
type: pick-place-v2
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
-
value:
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
22 |
|
23 |
-
A(n) **
|
24 |
|
25 |
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
@@ -38,7 +38,7 @@ python -m sample_factory.huggingface.load_from_hub -r qgallouedec/pick-place-v2-
|
|
38 |
|
39 |
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
```
|
41 |
-
python -m enjoy --algo=
|
42 |
```
|
43 |
|
44 |
|
@@ -49,7 +49,7 @@ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
|
49 |
|
50 |
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
```
|
52 |
-
python -m train --algo=
|
53 |
```
|
54 |
|
55 |
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
|
|
5 |
- reinforcement-learning
|
6 |
- sample-factory
|
7 |
model-index:
|
8 |
+
- name: APPO
|
9 |
results:
|
10 |
- task:
|
11 |
type: reinforcement-learning
|
|
|
15 |
type: pick-place-v2
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
+
value: 33.42 +/- 41.15
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
22 |
|
23 |
+
A(n) **APPO** model trained on the **pick-place-v2** environment.
|
24 |
|
25 |
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
|
|
38 |
|
39 |
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
```
|
41 |
+
python -m enjoy --algo=APPO --env=pick-place-v2 --train_dir=./train_dir --experiment=pick-place-v2-sf
|
42 |
```
|
43 |
|
44 |
|
|
|
49 |
|
50 |
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
```
|
52 |
+
python -m train --algo=APPO --env=pick-place-v2 --train_dir=./train_dir --experiment=pick-place-v2-sf --restart_behavior=resume --train_for_env_steps=10000000000
|
53 |
```
|
54 |
|
55 |
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
checkpoint_p0/best_000003537_3621888_reward_121.338.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5b2ed7bf97959a5fdfc962efaac7fe2b852d149752912a06bf5f92d9f45c12c
|
3 |
+
size 22381299
|
checkpoint_p0/checkpoint_000008899_9112576.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddaf2819e3a93915f2fd4f5be171075022bece916e7a6a791fee7ccba4ed9b50
|
3 |
+
size 22381699
|
checkpoint_p0/checkpoint_000008996_9211904.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7626cb0fa9fccdef19cbb55a3ccfd5023aafea6402b1edf3937497fa8f403dd
|
3 |
+
size 22381699
|
config.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"env": "pick-place-v2",
|
5 |
"experiment": "pick-place-v2",
|
6 |
"train_dir": "/Users/quentingallouedec/gia/data/envs/metaworld/train_dir",
|
7 |
-
"restart_behavior": "
|
8 |
"device": "cpu",
|
9 |
"seed": null,
|
10 |
"num_policies": 1,
|
@@ -65,7 +65,7 @@
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 180,
|
68 |
-
"train_for_env_steps":
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
@@ -120,15 +120,18 @@
|
|
120 |
"pbt_target_objective": "true_objective",
|
121 |
"pbt_perturb_min": 1.1,
|
122 |
"pbt_perturb_max": 1.5,
|
123 |
-
"command_line": "--env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld",
|
124 |
"cli_args": {
|
125 |
"env": "pick-place-v2",
|
126 |
"experiment": "pick-place-v2",
|
|
|
|
|
|
|
127 |
"with_wandb": true,
|
128 |
"wandb_user": "qgallouedec",
|
129 |
"wandb_project": "sample_facotry_metaworld"
|
130 |
},
|
131 |
"git_hash": "2fd3320ea3064145b39fc5f05f1959e8efbd2051",
|
132 |
"git_repo_name": "https://github.com/huggingface/gia.git",
|
133 |
-
"wandb_unique_id": "pick-place-
|
134 |
}
|
|
|
4 |
"env": "pick-place-v2",
|
5 |
"experiment": "pick-place-v2",
|
6 |
"train_dir": "/Users/quentingallouedec/gia/data/envs/metaworld/train_dir",
|
7 |
+
"restart_behavior": "overwrite",
|
8 |
"device": "cpu",
|
9 |
"seed": null,
|
10 |
"num_policies": 1,
|
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 180,
|
68 |
+
"train_for_env_steps": 10000000,
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
|
|
120 |
"pbt_target_objective": "true_objective",
|
121 |
"pbt_perturb_min": 1.1,
|
122 |
"pbt_perturb_max": 1.5,
|
123 |
+
"command_line": "--env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps=10000000 --restart_behavior overwrite --device cpu",
|
124 |
"cli_args": {
|
125 |
"env": "pick-place-v2",
|
126 |
"experiment": "pick-place-v2",
|
127 |
+
"restart_behavior": "overwrite",
|
128 |
+
"device": "cpu",
|
129 |
+
"train_for_env_steps": 10000000,
|
130 |
"with_wandb": true,
|
131 |
"wandb_user": "qgallouedec",
|
132 |
"wandb_project": "sample_facotry_metaworld"
|
133 |
},
|
134 |
"git_hash": "2fd3320ea3064145b39fc5f05f1959e8efbd2051",
|
135 |
"git_repo_name": "https://github.com/huggingface/gia.git",
|
136 |
+
"wandb_unique_id": "pick-place-v2_20230303_133519_050568"
|
137 |
}
|
git.diff
CHANGED
@@ -1,3 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
diff --git a/data/envs/metaworld/train.py b/data/envs/metaworld/train.py
|
2 |
index e01df51..f0d2bb9 100644
|
3 |
--- a/data/envs/metaworld/train.py
|
|
|
1 |
+
diff --git a/data/envs/metaworld/README.md b/data/envs/metaworld/README.md
|
2 |
+
index c8f64f6..2b2d72e 100644
|
3 |
+
--- a/data/envs/metaworld/README.md
|
4 |
+
+++ b/data/envs/metaworld/README.md
|
5 |
+
@@ -10,13 +10,59 @@ Command lines:
|
6 |
+
Train:
|
7 |
+
|
8 |
+
```sh
|
9 |
+
-python train.py --env pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld
|
10 |
+
+python train.py --env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps=10000000 --learning_rate 0.00005 --restart_behavior overwrite
|
11 |
+
```
|
12 |
+
|
13 |
+
Push to hub:
|
14 |
+
|
15 |
+
```sh
|
16 |
+
-python enjoy.py --algo=APPO --env=pick-place-v2 --experiment=default_experiment --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
|
17 |
+
+python enjoy.py --algo=PPO --env=pick-place-v2 --experiment=pick-place-v2 --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
|
18 |
+
```
|
19 |
+
|
20 |
+
-Generate dataset:
|
21 |
+
|
22 |
+
+Generate dataset:
|
23 |
+
+
|
24 |
+
+
|
25 |
+
+
|
26 |
+
+usage: train.py [-h] [--algo ALGO] --env ENV [--experiment EXPERIMENT] [--train_dir TRAIN_DIR] [--restart_behavior {resume,restart,overwrite}]
|
27 |
+
+ [--device {gpu,cpu}] [--seed SEED] [--num_policies NUM_POLICIES] [--async_rl ASYNC_RL] [--serial_mode SERIAL_MODE]
|
28 |
+
+ [--batched_sampling BATCHED_SAMPLING] [--num_batches_to_accumulate NUM_BATCHES_TO_ACCUMULATE]
|
29 |
+
+ [--worker_num_splits WORKER_NUM_SPLITS] [--policy_workers_per_policy POLICY_WORKERS_PER_POLICY]
|
30 |
+
+ [--max_policy_lag MAX_POLICY_LAG] [--num_workers NUM_WORKERS] [--num_envs_per_worker NUM_ENVS_PER_WORKER]
|
31 |
+
+ [--batch_size BATCH_SIZE] [--num_batches_per_epoch NUM_BATCHES_PER_EPOCH] [--num_epochs NUM_EPOCHS] [--rollout ROLLOUT]
|
32 |
+
+ [--recurrence RECURRENCE] [--shuffle_minibatches SHUFFLE_MINIBATCHES] [--gamma GAMMA] [--reward_scale REWARD_SCALE]
|
33 |
+
+ [--reward_clip REWARD_CLIP] [--value_bootstrap VALUE_BOOTSTRAP] [--normalize_returns NORMALIZE_RETURNS]
|
34 |
+
+ [--exploration_loss_coeff EXPLORATION_LOSS_COEFF] [--value_loss_coeff VALUE_LOSS_COEFF] [--kl_loss_coeff KL_LOSS_COEFF]
|
35 |
+
+ [--exploration_loss {entropy,symmetric_kl}] [--gae_lambda GAE_LAMBDA] [--ppo_clip_ratio PPO_CLIP_RATIO]
|
36 |
+
+ [--ppo_clip_value PPO_CLIP_VALUE] [--with_vtrace WITH_VTRACE] [--vtrace_rho VTRACE_RHO] [--vtrace_c VTRACE_C]
|
37 |
+
+ [--optimizer {adam,lamb}] [--adam_eps ADAM_EPS] [--adam_beta1 ADAM_BETA1] [--adam_beta2 ADAM_BETA2]
|
38 |
+
+ [--max_grad_norm MAX_GRAD_NORM] [--learning_rate LEARNING_RATE]
|
39 |
+
+ [--lr_schedule {constant,kl_adaptive_minibatch,kl_adaptive_epoch}] [--lr_schedule_kl_threshold LR_SCHEDULE_KL_THRESHOLD]
|
40 |
+
+ [--lr_adaptive_min LR_ADAPTIVE_MIN] [--lr_adaptive_max LR_ADAPTIVE_MAX] [--obs_subtract_mean OBS_SUBTRACT_MEAN]
|
41 |
+
+ [--obs_scale OBS_SCALE] [--normalize_input NORMALIZE_INPUT] [--normalize_input_keys [NORMALIZE_INPUT_KEYS ...]]
|
42 |
+
+ [--decorrelate_experience_max_seconds DECORRELATE_EXPERIENCE_MAX_SECONDS]
|
43 |
+
+ [--decorrelate_envs_on_one_worker DECORRELATE_ENVS_ON_ONE_WORKER] [--actor_worker_gpus [ACTOR_WORKER_GPUS ...]]
|
44 |
+
+ [--set_workers_cpu_affinity SET_WORKERS_CPU_AFFINITY] [--force_envs_single_thread FORCE_ENVS_SINGLE_THREAD]
|
45 |
+
+ [--default_niceness DEFAULT_NICENESS] [--log_to_file LOG_TO_FILE]
|
46 |
+
+ [--experiment_summaries_interval EXPERIMENT_SUMMARIES_INTERVAL] [--flush_summaries_interval FLUSH_SUMMARIES_INTERVAL]
|
47 |
+
+ [--stats_avg STATS_AVG] [--summaries_use_frameskip SUMMARIES_USE_FRAMESKIP] [--heartbeat_interval HEARTBEAT_INTERVAL]
|
48 |
+
+ [--heartbeat_reporting_interval HEARTBEAT_REPORTING_INTERVAL] [--train_for_env_steps TRAIN_FOR_ENV_STEPS]
|
49 |
+
+ [--train_for_seconds TRAIN_FOR_SECONDS] [--save_every_sec SAVE_EVERY_SEC] [--keep_checkpoints KEEP_CHECKPOINTS]
|
50 |
+
+ [--load_checkpoint_kind {latest,best}] [--save_milestones_sec SAVE_MILESTONES_SEC] [--save_best_every_sec SAVE_BEST_EVERY_SEC]
|
51 |
+
+ [--save_best_metric SAVE_BEST_METRIC] [--save_best_after SAVE_BEST_AFTER] [--benchmark BENCHMARK]
|
52 |
+
+ [--encoder_mlp_layers [ENCODER_MLP_LAYERS ...]]
|
53 |
+
+ [--encoder_conv_architecture {convnet_simple,convnet_impala,convnet_atari,resnet_impala}]
|
54 |
+
+ [--encoder_conv_mlp_layers [ENCODER_CONV_MLP_LAYERS ...]] [--use_rnn USE_RNN] [--rnn_size RNN_SIZE] [--rnn_type {gru,lstm}]
|
55 |
+
+ [--rnn_num_layers RNN_NUM_LAYERS] [--decoder_mlp_layers [DECODER_MLP_LAYERS ...]] [--nonlinearity {elu,relu,tanh}]
|
56 |
+
+ [--policy_initialization {orthogonal,xavier_uniform,torch_default}] [--policy_init_gain POLICY_INIT_GAIN]
|
57 |
+
+ [--actor_critic_share_weights ACTOR_CRITIC_SHARE_WEIGHTS] [--adaptive_stddev ADAPTIVE_STDDEV]
|
58 |
+
+ [--continuous_tanh_scale CONTINUOUS_TANH_SCALE] [--initial_stddev INITIAL_STDDEV] [--use_env_info_cache USE_ENV_INFO_CACHE]
|
59 |
+
+ [--env_gpu_actions ENV_GPU_ACTIONS] [--env_gpu_observations ENV_GPU_OBSERVATIONS] [--env_frameskip ENV_FRAMESKIP]
|
60 |
+
+ [--env_framestack ENV_FRAMESTACK] [--pixel_format PIXEL_FORMAT]
|
61 |
+
+ [--use_record_episode_statistics USE_RECORD_EPISODE_STATISTICS] [--with_wandb WITH_WANDB] [--wandb_user WANDB_USER]
|
62 |
+
+ [--wandb_project WANDB_PROJECT] [--wandb_group WANDB_GROUP] [--wandb_job_type WANDB_JOB_TYPE] [--wandb_tags [WANDB_TAGS ...]]
|
63 |
+
+ [--with_pbt WITH_PBT] [--pbt_mix_policies_in_one_env PBT_MIX_POLICIES_IN_ONE_ENV]
|
64 |
+
+ [--pbt_period_env_steps PBT_PERIOD_ENV_STEPS] [--pbt_start_mutation PBT_START_MUTATION]
|
65 |
+
+ [--pbt_replace_fraction PBT_REPLACE_FRACTION] [--pbt_mutation_rate PBT_MUTATION_RATE]
|
66 |
+
+ [--pbt_replace_reward_gap PBT_REPLACE_REWARD_GAP] [--pbt_replace_reward_gap_absolute PBT_REPLACE_REWARD_GAP_ABSOLUTE]
|
67 |
+
+ [--pbt_optimize_gamma PBT_OPTIMIZE_GAMMA] [--pbt_target_objective PBT_TARGET_OBJECTIVE] [--pbt_perturb_min PBT_PERTURB_MIN]
|
68 |
+
+ [--pbt_perturb_max PBT_PERTURB_MAX]
|
69 |
+
|
70 |
+
diff --git a/data/envs/metaworld/enjoy.py b/data/envs/metaworld/enjoy.py
|
71 |
+
index 56f08f7..c33c4db 100644
|
72 |
+
--- a/data/envs/metaworld/enjoy.py
|
73 |
+
+++ b/data/envs/metaworld/enjoy.py
|
74 |
+
@@ -1,34 +1,81 @@
|
75 |
+
import sys
|
76 |
+
from typing import Optional
|
77 |
+
-from sample_factory.enjoy import enjoy
|
78 |
+
|
79 |
+
-import metaworld
|
80 |
+
import gym
|
81 |
+
-
|
82 |
+
+import metaworld
|
83 |
+
from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
|
84 |
+
+from sample_factory.enjoy import enjoy
|
85 |
+
from sample_factory.envs.env_utils import register_env
|
86 |
+
-from sample_factory.train import run_rl
|
87 |
+
|
88 |
+
-def parse_args(argv=None, evaluation=False):
|
89 |
+
- # parse the command line arguments to build
|
90 |
+
- parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
|
91 |
+
-# add_custom_env_args(partial_cfg.env, parser, evaluation=evaluation)
|
92 |
+
-# custom_env_override_defaults(partial_cfg.env, parser)
|
93 |
+
- final_cfg = parse_full_cfg(parser, argv)
|
94 |
+
- return final_cfg
|
95 |
+
+ENV_NAMES = [
|
96 |
+
+ "assembly-v2",
|
97 |
+
+ "basketball-v2",
|
98 |
+
+ "bin-picking-v2",
|
99 |
+
+ "box-close-v2",
|
100 |
+
+ "button-press-topdown-v2",
|
101 |
+
+ "button-press-topdown-wall-v2",
|
102 |
+
+ "button-press-v2",
|
103 |
+
+ "button-press-wall-v2",
|
104 |
+
+ "coffee-button-v2",
|
105 |
+
+ "coffee-pull-v2",
|
106 |
+
+ "coffee-push-v2",
|
107 |
+
+ "dial-turn-v2",
|
108 |
+
+ "disassemble-v2",
|
109 |
+
+ "door-close-v2",
|
110 |
+
+ "door-lock-v2",
|
111 |
+
+ "door-open-v2",
|
112 |
+
+ "door-unlock-v2",
|
113 |
+
+ "hand-insert-v2",
|
114 |
+
+ "drawer-close-v2",
|
115 |
+
+ "drawer-open-v2",
|
116 |
+
+ "faucet-open-v2",
|
117 |
+
+ "faucet-close-v2",
|
118 |
+
+ "hammer-v2",
|
119 |
+
+ "handle-press-side-v2",
|
120 |
+
+ "handle-press-v2",
|
121 |
+
+ "handle-pull-side-v2",
|
122 |
+
+ "handle-pull-v2",
|
123 |
+
+ "lever-pull-v2",
|
124 |
+
+ "peg-insert-side-v2",
|
125 |
+
+ "pick-place-wall-v2",
|
126 |
+
+ "pick-out-of-hole-v2",
|
127 |
+
+ "reach-v2",
|
128 |
+
+ "push-back-v2",
|
129 |
+
+ "push-v2",
|
130 |
+
+ "pick-place-v2",
|
131 |
+
+ "plate-slide-v2",
|
132 |
+
+ "plate-slide-side-v2",
|
133 |
+
+ "plate-slide-back-v2",
|
134 |
+
+ "plate-slide-back-side-v2",
|
135 |
+
+ "peg-insert-side-v2",
|
136 |
+
+ "peg-unplug-side-v2",
|
137 |
+
+ "soccer-v2",
|
138 |
+
+ "stick-push-v2",
|
139 |
+
+ "stick-pull-v2",
|
140 |
+
+ "push-wall-v2",
|
141 |
+
+ "push-v2",
|
142 |
+
+ "reach-wall-v2",
|
143 |
+
+ "reach-v2",
|
144 |
+
+ "shelf-place-v2",
|
145 |
+
+ "sweep-into-v2",
|
146 |
+
+ "sweep-v2",
|
147 |
+
+ "window-open-v2",
|
148 |
+
+ "window-close-v2",
|
149 |
+
+]
|
150 |
+
|
151 |
+
|
152 |
+
def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
|
153 |
+
- # see the section below explaining arguments
|
154 |
+
- return gym.make("pick-place-v2")
|
155 |
+
+ return gym.make(full_env_name, render_mode=render_mode)
|
156 |
+
+
|
157 |
+
|
158 |
+
def main():
|
159 |
+
- """Script entry point."""
|
160 |
+
- register_env("pick-place-v2", make_custom_env)
|
161 |
+
- cfg = parse_args(evaluation=True)
|
162 |
+
+ for env_name in ENV_NAMES:
|
163 |
+
+ register_env(env_name, make_custom_env)
|
164 |
+
+ parser, _ = parse_sf_args(argv=None, evaluation=True)
|
165 |
+
+ cfg = parse_full_cfg(parser)
|
166 |
+
status = enjoy(cfg)
|
167 |
+
return status
|
168 |
+
|
169 |
+
|
170 |
+
if __name__ == "__main__":
|
171 |
+
- sys.exit(main())
|
172 |
+
|
173 |
+
+ sys.exit(main())
|
174 |
diff --git a/data/envs/metaworld/train.py b/data/envs/metaworld/train.py
|
175 |
index e01df51..f0d2bb9 100644
|
176 |
--- a/data/envs/metaworld/train.py
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a5406d414d56ac16bcfc09b9daa07718d5e10d6c34e17831a93dd18d4236dc3
|
3 |
+
size 3386466
|
sf_log.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|