benjipeng commited on
Commit
b26580a
1 Parent(s): a0e8e2b

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -25,7 +25,6 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
- library_name: stable-baselines3
3
  tags:
4
  - LunarLander-v2
 
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
- - stable-baselines3
 
8
  model-index:
9
  - name: PPO
10
  results:
@@ -16,22 +17,45 @@ model-index:
16
  type: LunarLander-v2
17
  metrics:
18
  - type: mean_reward
19
- value: 270.83 +/- 18.63
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
- # **PPO** Agent playing **LunarLander-v2**
25
- This is a trained model of a **PPO** agent playing **LunarLander-v2**
26
- using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
27
 
28
- ## Usage (with Stable-baselines3)
29
- TODO: Add your code
30
-
31
-
32
- ```python
33
- from stable_baselines3 import ...
34
- from huggingface_sb3 import load_from_hub
35
-
36
- ...
37
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
 
2
  tags:
3
  - LunarLander-v2
4
+ - ppo
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
+ - custom-implementation
8
+ - deep-rl-course
9
  model-index:
10
  - name: PPO
11
  results:
 
17
  type: LunarLander-v2
18
  metrics:
19
  - type: mean_reward
20
+ value: -163.76 +/- 70.87
21
  name: mean_reward
22
  verified: false
23
  ---
24
 
25
+ # PPO Agent Playing LunarLander-v2
 
 
26
 
27
+ This is a trained model of a PPO agent playing LunarLander-v2.
28
+
29
+ # Hyperparameters
30
+ ```python
31
+ {'exp_name': 'ppo'
32
+ 'seed': 1
33
+ 'torch_deterministic': True
34
+ 'cuda': True
35
+ 'track': False
36
+ 'wandb_project_name': 'cleanRL'
37
+ 'wandb_entity': None
38
+ 'capture_video': False
39
+ 'env_id': 'LunarLander-v2'
40
+ 'total_timesteps': 20000
41
+ 'learning_rate': 0.0001
42
+ 'num_envs': 4
43
+ 'num_steps': 128
44
+ 'anneal_lr': True
45
+ 'gae': True
46
+ 'gamma': 0.99
47
+ 'gae_lambda': 0.95
48
+ 'num_minibatches': 4
49
+ 'update_epochs': 10
50
+ 'norm_adv': True
51
+ 'clip_coef': 0.2
52
+ 'clip_vloss': True
53
+ 'ent_coef': 0.01
54
+ 'vf_coef': 0.5
55
+ 'max_grad_norm': 0.5
56
+ 'target_kl': None
57
+ 'repo_id': 'caioiglesias/ppo-LunarLander-v2'
58
+ 'batch_size': 512
59
+ 'minibatch_size': 128}
60
+ ```
61
+
logs/events.out.tfevents.1677778172.6c4e5c51f044.5969.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8112361cc6956e7fecc20dfa6aec42694ec4fc064d2dc457c843ec53998eaf0
3
+ size 131
logs/events.out.tfevents.1677778358.6c4e5c51f044.6795.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256bebb1129e3b69bd8bca443b2341118781084f0607dc1c7f128004f4c6d850
3
+ size 130
logs/events.out.tfevents.1677778804.6c4e5c51f044.8674.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ade4164e0dc73c967b7802f8d4cc9c78d9f5d6a32563dd6145e95792bb3f3a8
3
+ size 130
logs/events.out.tfevents.1677778921.6c4e5c51f044.9210.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9ddc2fbb8552404d8d920271b54c934f72dc0fd0ef526bffe4d8cd71fdf2a6
3
+ size 131
logs/events.out.tfevents.1677779087.6c4e5c51f044.9950.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f48bf409ca78f8ca1ccb4399ca5a61d9533acac9ac5ac6a060e9799bf19cc4f
3
+ size 130
logs/events.out.tfevents.1677779308.6c4e5c51f044.10895.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9292ec812a9448fd51628afa0e2e574f767dc3c2095c446031598eda281d9c75
3
+ size 128
logs/events.out.tfevents.1677779398.6c4e5c51f044.11326.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4f1487e70ca8be7b451048c5182c317ddc29ae5463310d540eb9aa3edffc85
3
+ size 129
logs/events.out.tfevents.1677779454.6c4e5c51f044.11620.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3746f17b6ad9976c84475bd53bdf5fb20c2fda64319124eda9cdf5c5b9da47b
3
+ size 129
logs/events.out.tfevents.1677779487.6c4e5c51f044.11814.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:833d9d4966c09756cf92cedf71f4060ad01c3711369f4c735e018b4b3d121a6c
3
+ size 130
logs/events.out.tfevents.1677780239.6c4e5c51f044.14981.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f42828263f51b4c97f8ad655af2b49020d6188ff71fbbfecb544d47e8632536
3
+ size 130
logs/events.out.tfevents.1677780304.6c4e5c51f044.15313.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70c8370f5109d29992438b1d4d001bbec64c29687dd20c75d0ec229a6c6c2456
3
+ size 130
logs/events.out.tfevents.1677780356.6c4e5c51f044.15591.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc057ad3d8c930c4223e76de1a5cce27fe294d81f50e08905b22100db531ceba
3
+ size 130
logs/events.out.tfevents.1677780408.6c4e5c51f044.15873.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0521fa0dc7433d6bb220946e5e20073fda5df07b2f9f6377bc45fae47a1ad3f
3
+ size 130
logs/events.out.tfevents.1677780462.6c4e5c51f044.16151.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a16bdcdbb3c14c58b3624d0844bd4b8d41aab07e9ebdbac1869bff5984c07d
3
+ size 130
logs/events.out.tfevents.1677780524.6c4e5c51f044.16477.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db67b6757ae274dbb0e93d08feaec1124fc8b02faa34c364d8b7cd69f6af293
3
+ size 130
logs/events.out.tfevents.1677780717.6c4e5c51f044.17340.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07fc8fc6cb35096271a520146109dd6c74dd99f93e834107b48cc4c8fca625a2
3
+ size 130
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffaa3dd30af56b56d88d87494df24536cdc01d8efd82fb83ff72bf5b9ad3348
3
+ size 130
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 270.8287894204931, "std_reward": 18.62932463836783, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2023-10-28T01:58:23.835709"}
 
1
+ {"env_id": "LunarLander-v2", "mean_reward": -163.75834850473012, "std_reward": 70.87311858332026, "n_evaluation_episodes": 10, "eval_datetime": "2023-03-02T18:12:24.436630"}