qgallouedec HF staff commited on
Commit
a82259d
1 Parent(s): aff049e

Upload . with huggingface_hub

Browse files
.summary/0/events.out.tfevents.1677846920.MacBook-Pro-de-Quentin.local ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19fd4e4e1c82de8301ce4c9c412f0b6dad426311d352ef041c523247b688caac
3
+ size 2132614
README.md CHANGED
@@ -5,7 +5,7 @@ tags:
5
  - reinforcement-learning
6
  - sample-factory
7
  model-index:
8
- - name: PPO
9
  results:
10
  - task:
11
  type: reinforcement-learning
@@ -15,12 +15,12 @@ model-index:
15
  type: pick-place-v2
16
  metrics:
17
  - type: mean_reward
18
- value: 24.42 +/- 7.77
19
  name: mean_reward
20
  verified: false
21
  ---
22
 
23
- A(n) **PPO** model trained on the **pick-place-v2** environment.
24
 
25
  This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
  Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
@@ -38,7 +38,7 @@ python -m sample_factory.huggingface.load_from_hub -r qgallouedec/pick-place-v2-
38
 
39
  To run the model after download, use the `enjoy` script corresponding to this environment:
40
  ```
41
- python -m enjoy --algo=PPO --env=pick-place-v2 --train_dir=./train_dir --experiment=pick-place-v2-sf
42
  ```
43
 
44
 
@@ -49,7 +49,7 @@ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
49
 
50
  To continue training with this model, use the `train` script corresponding to this environment:
51
  ```
52
- python -m train --algo=PPO --env=pick-place-v2 --train_dir=./train_dir --experiment=pick-place-v2-sf --restart_behavior=resume --train_for_env_steps=10000000000
53
  ```
54
 
55
  Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
 
5
  - reinforcement-learning
6
  - sample-factory
7
  model-index:
8
+ - name: APPO
9
  results:
10
  - task:
11
  type: reinforcement-learning
 
15
  type: pick-place-v2
16
  metrics:
17
  - type: mean_reward
18
+ value: 33.42 +/- 41.15
19
  name: mean_reward
20
  verified: false
21
  ---
22
 
23
+ A(n) **APPO** model trained on the **pick-place-v2** environment.
24
 
25
  This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
  Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
 
38
 
39
  To run the model after download, use the `enjoy` script corresponding to this environment:
40
  ```
41
+ python -m enjoy --algo=APPO --env=pick-place-v2 --train_dir=./train_dir --experiment=pick-place-v2-sf
42
  ```
43
 
44
 
 
49
 
50
  To continue training with this model, use the `train` script corresponding to this environment:
51
  ```
52
+ python -m train --algo=APPO --env=pick-place-v2 --train_dir=./train_dir --experiment=pick-place-v2-sf --restart_behavior=resume --train_for_env_steps=10000000000
53
  ```
54
 
55
  Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
checkpoint_p0/best_000003537_3621888_reward_121.338.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5b2ed7bf97959a5fdfc962efaac7fe2b852d149752912a06bf5f92d9f45c12c
3
+ size 22381299
checkpoint_p0/checkpoint_000008899_9112576.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddaf2819e3a93915f2fd4f5be171075022bece916e7a6a791fee7ccba4ed9b50
3
+ size 22381699
checkpoint_p0/checkpoint_000008996_9211904.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7626cb0fa9fccdef19cbb55a3ccfd5023aafea6402b1edf3937497fa8f403dd
3
+ size 22381699
config.json CHANGED
@@ -4,7 +4,7 @@
4
  "env": "pick-place-v2",
5
  "experiment": "pick-place-v2",
6
  "train_dir": "/Users/quentingallouedec/gia/data/envs/metaworld/train_dir",
7
- "restart_behavior": "resume",
8
  "device": "cpu",
9
  "seed": null,
10
  "num_policies": 1,
@@ -65,7 +65,7 @@
65
  "summaries_use_frameskip": true,
66
  "heartbeat_interval": 20,
67
  "heartbeat_reporting_interval": 180,
68
- "train_for_env_steps": 10000000000,
69
  "train_for_seconds": 10000000000,
70
  "save_every_sec": 120,
71
  "keep_checkpoints": 2,
@@ -120,15 +120,18 @@
120
  "pbt_target_objective": "true_objective",
121
  "pbt_perturb_min": 1.1,
122
  "pbt_perturb_max": 1.5,
123
- "command_line": "--env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld",
124
  "cli_args": {
125
  "env": "pick-place-v2",
126
  "experiment": "pick-place-v2",
 
 
 
127
  "with_wandb": true,
128
  "wandb_user": "qgallouedec",
129
  "wandb_project": "sample_facotry_metaworld"
130
  },
131
  "git_hash": "2fd3320ea3064145b39fc5f05f1959e8efbd2051",
132
  "git_repo_name": "https://github.com/huggingface/gia.git",
133
- "wandb_unique_id": "pick-place-v2_20230303_111825_996649"
134
  }
 
4
  "env": "pick-place-v2",
5
  "experiment": "pick-place-v2",
6
  "train_dir": "/Users/quentingallouedec/gia/data/envs/metaworld/train_dir",
7
+ "restart_behavior": "overwrite",
8
  "device": "cpu",
9
  "seed": null,
10
  "num_policies": 1,
 
65
  "summaries_use_frameskip": true,
66
  "heartbeat_interval": 20,
67
  "heartbeat_reporting_interval": 180,
68
+ "train_for_env_steps": 10000000,
69
  "train_for_seconds": 10000000000,
70
  "save_every_sec": 120,
71
  "keep_checkpoints": 2,
 
120
  "pbt_target_objective": "true_objective",
121
  "pbt_perturb_min": 1.1,
122
  "pbt_perturb_max": 1.5,
123
+ "command_line": "--env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps=10000000 --restart_behavior overwrite --device cpu",
124
  "cli_args": {
125
  "env": "pick-place-v2",
126
  "experiment": "pick-place-v2",
127
+ "restart_behavior": "overwrite",
128
+ "device": "cpu",
129
+ "train_for_env_steps": 10000000,
130
  "with_wandb": true,
131
  "wandb_user": "qgallouedec",
132
  "wandb_project": "sample_facotry_metaworld"
133
  },
134
  "git_hash": "2fd3320ea3064145b39fc5f05f1959e8efbd2051",
135
  "git_repo_name": "https://github.com/huggingface/gia.git",
136
+ "wandb_unique_id": "pick-place-v2_20230303_133519_050568"
137
  }
git.diff CHANGED
@@ -1,3 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  diff --git a/data/envs/metaworld/train.py b/data/envs/metaworld/train.py
2
  index e01df51..f0d2bb9 100644
3
  --- a/data/envs/metaworld/train.py
 
1
+ diff --git a/data/envs/metaworld/README.md b/data/envs/metaworld/README.md
2
+ index c8f64f6..2b2d72e 100644
3
+ --- a/data/envs/metaworld/README.md
4
+ +++ b/data/envs/metaworld/README.md
5
+ @@ -10,13 +10,59 @@ Command lines:
6
+ Train:
7
+
8
+ ```sh
9
+ -python train.py --env pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld
10
+ +python train.py --env pick-place-v2 --experiment=pick-place-v2 --with_wandb=True --wandb_user=qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps=10000000 --learning_rate 0.00005 --restart_behavior overwrite
11
+ ```
12
+
13
+ Push to hub:
14
+
15
+ ```sh
16
+ -python enjoy.py --algo=APPO --env=pick-place-v2 --experiment=default_experiment --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
17
+ +python enjoy.py --algo=PPO --env=pick-place-v2 --experiment=pick-place-v2 --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/pick-place-v2-sf --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
18
+ ```
19
+
20
+ -Generate dataset:
21
+
22
+ +Generate dataset:
23
+ +
24
+ +
25
+ +
26
+ +usage: train.py [-h] [--algo ALGO] --env ENV [--experiment EXPERIMENT] [--train_dir TRAIN_DIR] [--restart_behavior {resume,restart,overwrite}]
27
+ + [--device {gpu,cpu}] [--seed SEED] [--num_policies NUM_POLICIES] [--async_rl ASYNC_RL] [--serial_mode SERIAL_MODE]
28
+ + [--batched_sampling BATCHED_SAMPLING] [--num_batches_to_accumulate NUM_BATCHES_TO_ACCUMULATE]
29
+ + [--worker_num_splits WORKER_NUM_SPLITS] [--policy_workers_per_policy POLICY_WORKERS_PER_POLICY]
30
+ + [--max_policy_lag MAX_POLICY_LAG] [--num_workers NUM_WORKERS] [--num_envs_per_worker NUM_ENVS_PER_WORKER]
31
+ + [--batch_size BATCH_SIZE] [--num_batches_per_epoch NUM_BATCHES_PER_EPOCH] [--num_epochs NUM_EPOCHS] [--rollout ROLLOUT]
32
+ + [--recurrence RECURRENCE] [--shuffle_minibatches SHUFFLE_MINIBATCHES] [--gamma GAMMA] [--reward_scale REWARD_SCALE]
33
+ + [--reward_clip REWARD_CLIP] [--value_bootstrap VALUE_BOOTSTRAP] [--normalize_returns NORMALIZE_RETURNS]
34
+ + [--exploration_loss_coeff EXPLORATION_LOSS_COEFF] [--value_loss_coeff VALUE_LOSS_COEFF] [--kl_loss_coeff KL_LOSS_COEFF]
35
+ + [--exploration_loss {entropy,symmetric_kl}] [--gae_lambda GAE_LAMBDA] [--ppo_clip_ratio PPO_CLIP_RATIO]
36
+ + [--ppo_clip_value PPO_CLIP_VALUE] [--with_vtrace WITH_VTRACE] [--vtrace_rho VTRACE_RHO] [--vtrace_c VTRACE_C]
37
+ + [--optimizer {adam,lamb}] [--adam_eps ADAM_EPS] [--adam_beta1 ADAM_BETA1] [--adam_beta2 ADAM_BETA2]
38
+ + [--max_grad_norm MAX_GRAD_NORM] [--learning_rate LEARNING_RATE]
39
+ + [--lr_schedule {constant,kl_adaptive_minibatch,kl_adaptive_epoch}] [--lr_schedule_kl_threshold LR_SCHEDULE_KL_THRESHOLD]
40
+ + [--lr_adaptive_min LR_ADAPTIVE_MIN] [--lr_adaptive_max LR_ADAPTIVE_MAX] [--obs_subtract_mean OBS_SUBTRACT_MEAN]
41
+ + [--obs_scale OBS_SCALE] [--normalize_input NORMALIZE_INPUT] [--normalize_input_keys [NORMALIZE_INPUT_KEYS ...]]
42
+ + [--decorrelate_experience_max_seconds DECORRELATE_EXPERIENCE_MAX_SECONDS]
43
+ + [--decorrelate_envs_on_one_worker DECORRELATE_ENVS_ON_ONE_WORKER] [--actor_worker_gpus [ACTOR_WORKER_GPUS ...]]
44
+ + [--set_workers_cpu_affinity SET_WORKERS_CPU_AFFINITY] [--force_envs_single_thread FORCE_ENVS_SINGLE_THREAD]
45
+ + [--default_niceness DEFAULT_NICENESS] [--log_to_file LOG_TO_FILE]
46
+ + [--experiment_summaries_interval EXPERIMENT_SUMMARIES_INTERVAL] [--flush_summaries_interval FLUSH_SUMMARIES_INTERVAL]
47
+ + [--stats_avg STATS_AVG] [--summaries_use_frameskip SUMMARIES_USE_FRAMESKIP] [--heartbeat_interval HEARTBEAT_INTERVAL]
48
+ + [--heartbeat_reporting_interval HEARTBEAT_REPORTING_INTERVAL] [--train_for_env_steps TRAIN_FOR_ENV_STEPS]
49
+ + [--train_for_seconds TRAIN_FOR_SECONDS] [--save_every_sec SAVE_EVERY_SEC] [--keep_checkpoints KEEP_CHECKPOINTS]
50
+ + [--load_checkpoint_kind {latest,best}] [--save_milestones_sec SAVE_MILESTONES_SEC] [--save_best_every_sec SAVE_BEST_EVERY_SEC]
51
+ + [--save_best_metric SAVE_BEST_METRIC] [--save_best_after SAVE_BEST_AFTER] [--benchmark BENCHMARK]
52
+ + [--encoder_mlp_layers [ENCODER_MLP_LAYERS ...]]
53
+ + [--encoder_conv_architecture {convnet_simple,convnet_impala,convnet_atari,resnet_impala}]
54
+ + [--encoder_conv_mlp_layers [ENCODER_CONV_MLP_LAYERS ...]] [--use_rnn USE_RNN] [--rnn_size RNN_SIZE] [--rnn_type {gru,lstm}]
55
+ + [--rnn_num_layers RNN_NUM_LAYERS] [--decoder_mlp_layers [DECODER_MLP_LAYERS ...]] [--nonlinearity {elu,relu,tanh}]
56
+ + [--policy_initialization {orthogonal,xavier_uniform,torch_default}] [--policy_init_gain POLICY_INIT_GAIN]
57
+ + [--actor_critic_share_weights ACTOR_CRITIC_SHARE_WEIGHTS] [--adaptive_stddev ADAPTIVE_STDDEV]
58
+ + [--continuous_tanh_scale CONTINUOUS_TANH_SCALE] [--initial_stddev INITIAL_STDDEV] [--use_env_info_cache USE_ENV_INFO_CACHE]
59
+ + [--env_gpu_actions ENV_GPU_ACTIONS] [--env_gpu_observations ENV_GPU_OBSERVATIONS] [--env_frameskip ENV_FRAMESKIP]
60
+ + [--env_framestack ENV_FRAMESTACK] [--pixel_format PIXEL_FORMAT]
61
+ + [--use_record_episode_statistics USE_RECORD_EPISODE_STATISTICS] [--with_wandb WITH_WANDB] [--wandb_user WANDB_USER]
62
+ + [--wandb_project WANDB_PROJECT] [--wandb_group WANDB_GROUP] [--wandb_job_type WANDB_JOB_TYPE] [--wandb_tags [WANDB_TAGS ...]]
63
+ + [--with_pbt WITH_PBT] [--pbt_mix_policies_in_one_env PBT_MIX_POLICIES_IN_ONE_ENV]
64
+ + [--pbt_period_env_steps PBT_PERIOD_ENV_STEPS] [--pbt_start_mutation PBT_START_MUTATION]
65
+ + [--pbt_replace_fraction PBT_REPLACE_FRACTION] [--pbt_mutation_rate PBT_MUTATION_RATE]
66
+ + [--pbt_replace_reward_gap PBT_REPLACE_REWARD_GAP] [--pbt_replace_reward_gap_absolute PBT_REPLACE_REWARD_GAP_ABSOLUTE]
67
+ + [--pbt_optimize_gamma PBT_OPTIMIZE_GAMMA] [--pbt_target_objective PBT_TARGET_OBJECTIVE] [--pbt_perturb_min PBT_PERTURB_MIN]
68
+ + [--pbt_perturb_max PBT_PERTURB_MAX]
69
+
70
+ diff --git a/data/envs/metaworld/enjoy.py b/data/envs/metaworld/enjoy.py
71
+ index 56f08f7..c33c4db 100644
72
+ --- a/data/envs/metaworld/enjoy.py
73
+ +++ b/data/envs/metaworld/enjoy.py
74
+ @@ -1,34 +1,81 @@
75
+ import sys
76
+ from typing import Optional
77
+ -from sample_factory.enjoy import enjoy
78
+
79
+ -import metaworld
80
+ import gym
81
+ -
82
+ +import metaworld
83
+ from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
84
+ +from sample_factory.enjoy import enjoy
85
+ from sample_factory.envs.env_utils import register_env
86
+ -from sample_factory.train import run_rl
87
+
88
+ -def parse_args(argv=None, evaluation=False):
89
+ - # parse the command line arguments to build
90
+ - parser, partial_cfg = parse_sf_args(argv=argv, evaluation=evaluation)
91
+ -# add_custom_env_args(partial_cfg.env, parser, evaluation=evaluation)
92
+ -# custom_env_override_defaults(partial_cfg.env, parser)
93
+ - final_cfg = parse_full_cfg(parser, argv)
94
+ - return final_cfg
95
+ +ENV_NAMES = [
96
+ + "assembly-v2",
97
+ + "basketball-v2",
98
+ + "bin-picking-v2",
99
+ + "box-close-v2",
100
+ + "button-press-topdown-v2",
101
+ + "button-press-topdown-wall-v2",
102
+ + "button-press-v2",
103
+ + "button-press-wall-v2",
104
+ + "coffee-button-v2",
105
+ + "coffee-pull-v2",
106
+ + "coffee-push-v2",
107
+ + "dial-turn-v2",
108
+ + "disassemble-v2",
109
+ + "door-close-v2",
110
+ + "door-lock-v2",
111
+ + "door-open-v2",
112
+ + "door-unlock-v2",
113
+ + "hand-insert-v2",
114
+ + "drawer-close-v2",
115
+ + "drawer-open-v2",
116
+ + "faucet-open-v2",
117
+ + "faucet-close-v2",
118
+ + "hammer-v2",
119
+ + "handle-press-side-v2",
120
+ + "handle-press-v2",
121
+ + "handle-pull-side-v2",
122
+ + "handle-pull-v2",
123
+ + "lever-pull-v2",
124
+ + "peg-insert-side-v2",
125
+ + "pick-place-wall-v2",
126
+ + "pick-out-of-hole-v2",
127
+ + "reach-v2",
128
+ + "push-back-v2",
129
+ + "push-v2",
130
+ + "pick-place-v2",
131
+ + "plate-slide-v2",
132
+ + "plate-slide-side-v2",
133
+ + "plate-slide-back-v2",
134
+ + "plate-slide-back-side-v2",
135
+ + "peg-insert-side-v2",
136
+ + "peg-unplug-side-v2",
137
+ + "soccer-v2",
138
+ + "stick-push-v2",
139
+ + "stick-pull-v2",
140
+ + "push-wall-v2",
141
+ + "push-v2",
142
+ + "reach-wall-v2",
143
+ + "reach-v2",
144
+ + "shelf-place-v2",
145
+ + "sweep-into-v2",
146
+ + "sweep-v2",
147
+ + "window-open-v2",
148
+ + "window-close-v2",
149
+ +]
150
+
151
+
152
+ def make_custom_env(full_env_name: str, cfg=None, env_config=None, render_mode: Optional[str] = None):
153
+ - # see the section below explaining arguments
154
+ - return gym.make("pick-place-v2")
155
+ + return gym.make(full_env_name, render_mode=render_mode)
156
+ +
157
+
158
+ def main():
159
+ - """Script entry point."""
160
+ - register_env("pick-place-v2", make_custom_env)
161
+ - cfg = parse_args(evaluation=True)
162
+ + for env_name in ENV_NAMES:
163
+ + register_env(env_name, make_custom_env)
164
+ + parser, _ = parse_sf_args(argv=None, evaluation=True)
165
+ + cfg = parse_full_cfg(parser)
166
+ status = enjoy(cfg)
167
+ return status
168
+
169
+
170
+ if __name__ == "__main__":
171
+ - sys.exit(main())
172
+
173
+ + sys.exit(main())
174
  diff --git a/data/envs/metaworld/train.py b/data/envs/metaworld/train.py
175
  index e01df51..f0d2bb9 100644
176
  --- a/data/envs/metaworld/train.py
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f9f8146633325df4e663a71d66827f81598ffa38b7bee2b3b8305e1f5810e00
3
- size 3419609
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a5406d414d56ac16bcfc09b9daa07718d5e10d6c34e17831a93dd18d4236dc3
3
+ size 3386466
sf_log.txt CHANGED
The diff for this file is too large to render. See raw diff