johnjim0816
commited on
Commit
·
7e0d2ec
1
Parent(s):
ccb908b
update CartPole-v1 PPO
Browse files- CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/config.yaml +0 -35
- CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/logs/log.txt +0 -51
- CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/config.yaml +0 -31
- CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/logs/log.txt +0 -52
- CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/actor.pth +0 -3
- CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/critic.pth +0 -3
- CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/config.yaml +0 -32
- CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/logs/log.txt +0 -53
- CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/actor.pth +0 -3
- CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/critic.pth +0 -3
- CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/res.csv +0 -11
- CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/config.yaml +65 -0
- CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/logs/log.txt +69 -0
- CartPole-v1/{Train_CartPole-v1_mp_PPO_20230401-223204/tb_logs/events.out.tfevents.1680359524.dell-Precision-5820-Tower.31414.0 → Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/interact/events.out.tfevents.1684302533.JMac.local.61381.0} +2 -2
- CartPole-v1/{Test_CartPole-v1_PPO_20230401-223412/tb_logs/events.out.tfevents.1680359652.dell-Precision-5820-Tower.4337.0 → Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/model/events.out.tfevents.1684302533.JMac.local.61381.1} +1 -1
- CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/config.yaml +0 -31
- CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/logs/log.txt +0 -252
- CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/actor.pth +0 -3
- CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/critic.pth +0 -3
- CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/res.csv +0 -201
- CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/config.yaml +0 -32
- CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/logs/log.txt +0 -43
- CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/actor.pth +0 -3
- CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/critic.pth +0 -3
- CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/res.csv +0 -302
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/config.yaml +65 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/logs/log.txt +270 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/10 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/20 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/30 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/40 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/50 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/60 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/70 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/80 +0 -0
- CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/best +0 -0
- CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/actor.pth → Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/interact/events.out.tfevents.1684302280.JMac.local.60840.0} +2 -2
- CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/critic.pth → Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/model/events.out.tfevents.1684302280.JMac.local.60840.1} +2 -2
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/config.yaml
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PPO
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_PPO_20221217-204003
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
new_step_api: true
|
12 |
-
render: false
|
13 |
-
save_fig: true
|
14 |
-
seed: 1
|
15 |
-
show_fig: false
|
16 |
-
test_eps: 10
|
17 |
-
train_eps: 200
|
18 |
-
wrapper: null
|
19 |
-
algo_cfg:
|
20 |
-
actor_hidden_dim: 256
|
21 |
-
actor_lr: 0.0003
|
22 |
-
continuous: false
|
23 |
-
critic_hidden_dim: 256
|
24 |
-
critic_lr: 0.001
|
25 |
-
entropy_coef: 0.01
|
26 |
-
eps_clip: 0.2
|
27 |
-
gamma: 0.99
|
28 |
-
k_epochs: 4
|
29 |
-
kl_alpha: 2
|
30 |
-
kl_beta: 1.5
|
31 |
-
kl_lambda: 0.5
|
32 |
-
kl_target: 0.01
|
33 |
-
ppo_type: kl
|
34 |
-
sgd_batch_size: 64
|
35 |
-
train_batch_size: 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/logs/log.txt
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
2022-12-17 20:42:14 - r - INFO: - Hyperparameters:
|
2 |
-
2022-12-17 20:42:14 - r - INFO: - ================================================================================
|
3 |
-
2022-12-17 20:42:14 - r - INFO: - Name Value Type
|
4 |
-
2022-12-17 20:42:14 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2022-12-17 20:42:14 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2022-12-17 20:42:14 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2022-12-17 20:42:14 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2022-12-17 20:42:14 - r - INFO: - algo_name PPO <class 'str'>
|
9 |
-
2022-12-17 20:42:14 - r - INFO: - mode test <class 'str'>
|
10 |
-
2022-12-17 20:42:14 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2022-12-17 20:42:14 - r - INFO: - device cuda <class 'str'>
|
12 |
-
2022-12-17 20:42:14 - r - INFO: - train_eps 200 <class 'int'>
|
13 |
-
2022-12-17 20:42:14 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2022-12-17 20:42:14 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2022-12-17 20:42:14 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2022-12-17 20:42:14 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2022-12-17 20:42:14 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
18 |
-
2022-12-17 20:42:14 - r - INFO: - load_path Train_CartPole-v1_PPO_20221217-204003 <class 'str'>
|
19 |
-
2022-12-17 20:42:14 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2022-12-17 20:42:14 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2022-12-17 20:42:14 - r - INFO: - ppo_type kl <class 'str'>
|
22 |
-
2022-12-17 20:42:14 - r - INFO: - continuous 0 <class 'bool'>
|
23 |
-
2022-12-17 20:42:14 - r - INFO: - gamma 0.99 <class 'float'>
|
24 |
-
2022-12-17 20:42:14 - r - INFO: - k_epochs 4 <class 'int'>
|
25 |
-
2022-12-17 20:42:14 - r - INFO: - actor_lr 0.0003 <class 'float'>
|
26 |
-
2022-12-17 20:42:14 - r - INFO: - critic_lr 0.001 <class 'float'>
|
27 |
-
2022-12-17 20:42:14 - r - INFO: - eps_clip 0.2 <class 'float'>
|
28 |
-
2022-12-17 20:42:14 - r - INFO: - entropy_coef 0.01 <class 'float'>
|
29 |
-
2022-12-17 20:42:14 - r - INFO: - train_batch_size 100 <class 'int'>
|
30 |
-
2022-12-17 20:42:14 - r - INFO: - sgd_batch_size 64 <class 'int'>
|
31 |
-
2022-12-17 20:42:14 - r - INFO: - actor_hidden_dim 256 <class 'int'>
|
32 |
-
2022-12-17 20:42:14 - r - INFO: - critic_hidden_dim 256 <class 'int'>
|
33 |
-
2022-12-17 20:42:14 - r - INFO: - kl_alpha 2 <class 'int'>
|
34 |
-
2022-12-17 20:42:14 - r - INFO: - kl_beta 1.5 <class 'float'>
|
35 |
-
2022-12-17 20:42:14 - r - INFO: - kl_lambda 0.5 <class 'float'>
|
36 |
-
2022-12-17 20:42:14 - r - INFO: - kl_target 0.01 <class 'float'>
|
37 |
-
2022-12-17 20:42:14 - r - INFO: - ================================================================================
|
38 |
-
2022-12-17 20:42:15 - r - INFO: - n_states: 4, n_actions: 2
|
39 |
-
2022-12-17 20:42:16 - r - INFO: - Start testing!
|
40 |
-
2022-12-17 20:42:16 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cuda
|
41 |
-
2022-12-17 20:42:17 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
42 |
-
2022-12-17 20:42:17 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
43 |
-
2022-12-17 20:42:18 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
44 |
-
2022-12-17 20:42:18 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
45 |
-
2022-12-17 20:42:18 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
46 |
-
2022-12-17 20:42:19 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
47 |
-
2022-12-17 20:42:19 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
48 |
-
2022-12-17 20:42:19 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
49 |
-
2022-12-17 20:42:19 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
50 |
-
2022-12-17 20:42:20 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
51 |
-
2022-12-17 20:42:20 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/learning_curve.png
DELETED
Binary file (25.5 kB)
|
|
CartPole-v1/Test_CartPole-v1_PPO-KL_20221217-204214/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/config.yaml
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PPO
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_PPO_20230220-212959
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
new_step_api: true
|
12 |
-
render: false
|
13 |
-
save_fig: true
|
14 |
-
seed: 1
|
15 |
-
show_fig: false
|
16 |
-
test_eps: 10
|
17 |
-
train_eps: 200
|
18 |
-
wrapper: null
|
19 |
-
algo_cfg:
|
20 |
-
actor_hidden_dim: 256
|
21 |
-
actor_lr: 0.0003
|
22 |
-
continuous: false
|
23 |
-
critic_hidden_dim: 256
|
24 |
-
critic_lr: 0.001
|
25 |
-
entropy_coef: 0.01
|
26 |
-
eps_clip: 0.2
|
27 |
-
gamma: 0.99
|
28 |
-
k_epochs: 4
|
29 |
-
ppo_type: clip
|
30 |
-
sgd_batch_size: 128
|
31 |
-
train_batch_size: 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/logs/log.txt
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
2023-02-20 21:31:53 - r - INFO: - Hyperparameters:
|
2 |
-
2023-02-20 21:31:53 - r - INFO: - ================================================================================
|
3 |
-
2023-02-20 21:31:53 - r - INFO: - Name Value Type
|
4 |
-
2023-02-20 21:31:53 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-02-20 21:31:53 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-02-20 21:31:53 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-02-20 21:31:53 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-02-20 21:31:53 - r - INFO: - algo_name PPO <class 'str'>
|
9 |
-
2023-02-20 21:31:53 - r - INFO: - mode test <class 'str'>
|
10 |
-
2023-02-20 21:31:53 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-02-20 21:31:53 - r - INFO: - device cuda <class 'str'>
|
12 |
-
2023-02-20 21:31:53 - r - INFO: - train_eps 200 <class 'int'>
|
13 |
-
2023-02-20 21:31:53 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2023-02-20 21:31:53 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2023-02-20 21:31:53 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2023-02-20 21:31:53 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2023-02-20 21:31:53 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
18 |
-
2023-02-20 21:31:53 - r - INFO: - load_path Train_CartPole-v1_PPO_20230220-212959 <class 'str'>
|
19 |
-
2023-02-20 21:31:53 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2023-02-20 21:31:53 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2023-02-20 21:31:53 - r - INFO: - ppo_type clip <class 'str'>
|
22 |
-
2023-02-20 21:31:53 - r - INFO: - continuous 0 <class 'bool'>
|
23 |
-
2023-02-20 21:31:53 - r - INFO: - gamma 0.99 <class 'float'>
|
24 |
-
2023-02-20 21:31:53 - r - INFO: - k_epochs 4 <class 'int'>
|
25 |
-
2023-02-20 21:31:53 - r - INFO: - actor_lr 0.0003 <class 'float'>
|
26 |
-
2023-02-20 21:31:53 - r - INFO: - critic_lr 0.001 <class 'float'>
|
27 |
-
2023-02-20 21:31:53 - r - INFO: - eps_clip 0.2 <class 'float'>
|
28 |
-
2023-02-20 21:31:53 - r - INFO: - entropy_coef 0.01 <class 'float'>
|
29 |
-
2023-02-20 21:31:53 - r - INFO: - train_batch_size 256 <class 'int'>
|
30 |
-
2023-02-20 21:31:53 - r - INFO: - sgd_batch_size 128 <class 'int'>
|
31 |
-
2023-02-20 21:31:53 - r - INFO: - actor_hidden_dim 256 <class 'int'>
|
32 |
-
2023-02-20 21:31:53 - r - INFO: - critic_hidden_dim 256 <class 'int'>
|
33 |
-
2023-02-20 21:31:53 - r - INFO: - task_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153 <class 'str'>
|
34 |
-
2023-02-20 21:31:53 - r - INFO: - model_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/models <class 'str'>
|
35 |
-
2023-02-20 21:31:53 - r - INFO: - res_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/results <class 'str'>
|
36 |
-
2023-02-20 21:31:53 - r - INFO: - log_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/logs <class 'str'>
|
37 |
-
2023-02-20 21:31:53 - r - INFO: - traj_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Test_CartPole-v1_PPO_20230220-213153/traj <class 'str'>
|
38 |
-
2023-02-20 21:31:53 - r - INFO: - ================================================================================
|
39 |
-
2023-02-20 21:31:53 - r - INFO: - n_states: 4, n_actions: 2
|
40 |
-
2023-02-20 21:31:54 - r - INFO: - Start testing!
|
41 |
-
2023-02-20 21:31:54 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cuda
|
42 |
-
2023-02-20 21:31:55 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
43 |
-
2023-02-20 21:31:55 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
44 |
-
2023-02-20 21:31:56 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
45 |
-
2023-02-20 21:31:56 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
46 |
-
2023-02-20 21:31:56 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
47 |
-
2023-02-20 21:31:56 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
48 |
-
2023-02-20 21:31:56 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
49 |
-
2023-02-20 21:31:57 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
50 |
-
2023-02-20 21:31:57 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
51 |
-
2023-02-20 21:31:57 - r - INFO: - Episode: 10/10, Reward: 189.000, Step: 189
|
52 |
-
2023-02-20 21:31:57 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/actor.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7d07e4388597f766e04099380da27cb55fd877e8d26cdd14eab48bc097525216
|
3 |
-
size 272215
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/models/critic.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6743877f9534c272b5e6d8bae3cbc87b1fa32bb21af2935c28e503122e042c2d
|
3 |
-
size 271191
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/learning_curve.png
DELETED
Binary file (24.9 kB)
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230220-213153/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,189.0,189
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/config.yaml
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PPO
|
3 |
-
device: cpu
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_mp_PPO_20230401-223204
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
n_workers: 1
|
12 |
-
new_step_api: true
|
13 |
-
render: false
|
14 |
-
save_fig: true
|
15 |
-
seed: 1
|
16 |
-
show_fig: false
|
17 |
-
test_eps: 10
|
18 |
-
train_eps: 300
|
19 |
-
wrapper: null
|
20 |
-
algo_cfg:
|
21 |
-
actor_hidden_dim: 256
|
22 |
-
actor_lr: 0.0003
|
23 |
-
continuous: false
|
24 |
-
critic_hidden_dim: 256
|
25 |
-
critic_lr: 0.001
|
26 |
-
entropy_coef: 0.01
|
27 |
-
eps_clip: 0.2
|
28 |
-
gamma: 0.99
|
29 |
-
k_epochs: 4
|
30 |
-
ppo_type: clip
|
31 |
-
sgd_batch_size: 128
|
32 |
-
train_batch_size: 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/logs/log.txt
DELETED
@@ -1,53 +0,0 @@
|
|
1 |
-
2023-04-01 22:34:12 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-01 22:34:12 - r - INFO: - ================================================================================
|
3 |
-
2023-04-01 22:34:12 - r - INFO: - Name Value Type
|
4 |
-
2023-04-01 22:34:12 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-01 22:34:12 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-01 22:34:12 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-01 22:34:12 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-01 22:34:12 - r - INFO: - algo_name PPO <class 'str'>
|
9 |
-
2023-04-01 22:34:12 - r - INFO: - mode test <class 'str'>
|
10 |
-
2023-04-01 22:34:12 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-04-01 22:34:12 - r - INFO: - device cpu <class 'str'>
|
12 |
-
2023-04-01 22:34:12 - r - INFO: - train_eps 300 <class 'int'>
|
13 |
-
2023-04-01 22:34:12 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2023-04-01 22:34:12 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2023-04-01 22:34:12 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2023-04-01 22:34:12 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2023-04-01 22:34:12 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
18 |
-
2023-04-01 22:34:12 - r - INFO: - load_path Train_CartPole-v1_mp_PPO_20230401-223204 <class 'str'>
|
19 |
-
2023-04-01 22:34:12 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2023-04-01 22:34:12 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2023-04-01 22:34:12 - r - INFO: - n_workers 1 <class 'int'>
|
22 |
-
2023-04-01 22:34:12 - r - INFO: - ppo_type clip <class 'str'>
|
23 |
-
2023-04-01 22:34:12 - r - INFO: - continuous 0 <class 'bool'>
|
24 |
-
2023-04-01 22:34:12 - r - INFO: - gamma 0.99 <class 'float'>
|
25 |
-
2023-04-01 22:34:12 - r - INFO: - k_epochs 4 <class 'int'>
|
26 |
-
2023-04-01 22:34:12 - r - INFO: - actor_lr 0.0003 <class 'float'>
|
27 |
-
2023-04-01 22:34:12 - r - INFO: - critic_lr 0.001 <class 'float'>
|
28 |
-
2023-04-01 22:34:12 - r - INFO: - eps_clip 0.2 <class 'float'>
|
29 |
-
2023-04-01 22:34:12 - r - INFO: - entropy_coef 0.01 <class 'float'>
|
30 |
-
2023-04-01 22:34:12 - r - INFO: - train_batch_size 256 <class 'int'>
|
31 |
-
2023-04-01 22:34:12 - r - INFO: - sgd_batch_size 128 <class 'int'>
|
32 |
-
2023-04-01 22:34:12 - r - INFO: - actor_hidden_dim 256 <class 'int'>
|
33 |
-
2023-04-01 22:34:12 - r - INFO: - critic_hidden_dim 256 <class 'int'>
|
34 |
-
2023-04-01 22:34:12 - r - INFO: - task_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412 <class 'str'>
|
35 |
-
2023-04-01 22:34:12 - r - INFO: - res_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/results <class 'str'>
|
36 |
-
2023-04-01 22:34:12 - r - INFO: - log_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/logs <class 'str'>
|
37 |
-
2023-04-01 22:34:12 - r - INFO: - traj_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/traj <class 'str'>
|
38 |
-
2023-04-01 22:34:12 - r - INFO: - tb_dir /home/dingli/joyrl_offline/tasks/Test_CartPole-v1_PPO_20230401-223412/tb_logs <class 'str'>
|
39 |
-
2023-04-01 22:34:12 - r - INFO: - ================================================================================
|
40 |
-
2023-04-01 22:34:12 - r - INFO: - n_states: 4, n_actions: 2
|
41 |
-
2023-04-01 22:34:12 - r - INFO: - Start testing!
|
42 |
-
2023-04-01 22:34:12 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cpu
|
43 |
-
2023-04-01 22:34:12 - r - INFO: - Episode: 1/10, Reward: 136.000, Step: 136
|
44 |
-
2023-04-01 22:34:12 - r - INFO: - Episode: 2/10, Reward: 136.000, Step: 136
|
45 |
-
2023-04-01 22:34:12 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
46 |
-
2023-04-01 22:34:12 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
47 |
-
2023-04-01 22:34:12 - r - INFO: - Episode: 5/10, Reward: 187.000, Step: 187
|
48 |
-
2023-04-01 22:34:13 - r - INFO: - Episode: 6/10, Reward: 192.000, Step: 192
|
49 |
-
2023-04-01 22:34:13 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
50 |
-
2023-04-01 22:34:13 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
51 |
-
2023-04-01 22:34:13 - r - INFO: - Episode: 9/10, Reward: 159.000, Step: 159
|
52 |
-
2023-04-01 22:34:13 - r - INFO: - Episode: 10/10, Reward: 124.000, Step: 124
|
53 |
-
2023-04-01 22:34:13 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/actor.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f6a3e48d551bcba327ff4c5d3cc464a6a94b83eda543a54d231016e021e8cbd3
|
3 |
-
size 272151
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/models/critic.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a4db7aeb3805e1deb11428a34a600a40068a0f711986f38fdf9e0f9895f8a45c
|
3 |
-
size 271127
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/learning_curve.png
DELETED
Binary file (38.8 kB)
|
|
CartPole-v1/Test_CartPole-v1_PPO_20230401-223412/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,136.0,136
|
3 |
-
1,136.0,136
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,187.0,187
|
7 |
-
5,192.0,192
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,159.0,159
|
11 |
-
9,124.0,124
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/config.yaml
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
general_cfg:
|
2 |
+
algo_name: PPO
|
3 |
+
collect_traj: false
|
4 |
+
device: cpu
|
5 |
+
env_name: gym
|
6 |
+
load_checkpoint: true
|
7 |
+
load_model_step: best
|
8 |
+
load_path: Train_single_CartPole-v1_PPO_20230517-134440
|
9 |
+
max_episode: 10
|
10 |
+
max_step: 200
|
11 |
+
mode: test
|
12 |
+
model_save_fre: 10
|
13 |
+
mp_backend: single
|
14 |
+
n_workers: 2
|
15 |
+
online_eval: true
|
16 |
+
online_eval_episode: 10
|
17 |
+
seed: 1
|
18 |
+
algo_cfg:
|
19 |
+
actor_hidden_dim: 256
|
20 |
+
actor_layers:
|
21 |
+
- activation: relu
|
22 |
+
layer_dim:
|
23 |
+
- 256
|
24 |
+
layer_type: linear
|
25 |
+
- activation: relu
|
26 |
+
layer_dim:
|
27 |
+
- 256
|
28 |
+
layer_type: linear
|
29 |
+
actor_lr: 0.0003
|
30 |
+
batch_size: 256
|
31 |
+
buffer_type: ONPOLICY_QUE
|
32 |
+
continuous: false
|
33 |
+
critic_hidden_dim: 256
|
34 |
+
critic_layers:
|
35 |
+
- activation: relu
|
36 |
+
layer_dim:
|
37 |
+
- 256
|
38 |
+
layer_type: linear
|
39 |
+
- activation: relu
|
40 |
+
layer_dim:
|
41 |
+
- 256
|
42 |
+
layer_type: linear
|
43 |
+
critic_loss_coef: 0.5
|
44 |
+
critic_lr: 0.001
|
45 |
+
entropy_coef: 0.01
|
46 |
+
eps_clip: 0.2
|
47 |
+
gamma: 0.99
|
48 |
+
independ_actor: true
|
49 |
+
k_epochs: 4
|
50 |
+
kl_alpha: 2
|
51 |
+
kl_beta: 1.5
|
52 |
+
kl_lambda: 0.5
|
53 |
+
kl_target: 0.1
|
54 |
+
lr: 0.0001
|
55 |
+
min_policy: 0
|
56 |
+
ppo_type: clip
|
57 |
+
sgd_batch_size: 128
|
58 |
+
share_optimizer: false
|
59 |
+
env_cfg:
|
60 |
+
id: CartPole-v1
|
61 |
+
ignore_params:
|
62 |
+
- wrapper
|
63 |
+
- ignore_params
|
64 |
+
render_mode: null
|
65 |
+
wrapper: null
|
CartPole-v1/Test_single_CartPole-v1_PPO_20230517-134853/logs/log.txt
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - General Configs:
|
2 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
|
3 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - Name Value Type
|
4 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - algo_name PPO <class 'str'>
|
6 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - mode test <class 'str'>
|
7 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - device cpu <class 'str'>
|
8 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - seed 1 <class 'int'>
|
9 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - max_episode 10 <class 'int'>
|
10 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
11 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
12 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
13 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
14 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - model_save_fre 10 <class 'int'>
|
17 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
|
18 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_PPO_20230517-134440 <class 'str'>
|
19 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
20 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
|
21 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - Algo Configs:
|
22 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
|
23 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - Name Value Type
|
24 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - independ_actor 1 <class 'bool'>
|
25 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - share_optimizer 0 <class 'bool'>
|
26 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ppo_type clip <class 'str'>
|
27 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - eps_clip 0.2 <class 'float'>
|
28 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - kl_target 0.1 <class 'float'>
|
29 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - kl_lambda 0.5 <class 'float'>
|
30 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - kl_beta 1.5 <class 'float'>
|
31 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - kl_alpha 2 <class 'int'>
|
32 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - continuous 0 <class 'bool'>
|
33 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
34 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - k_epochs 4 <class 'int'>
|
35 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
36 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - actor_lr 0.0003 <class 'float'>
|
37 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - critic_lr 0.001 <class 'float'>
|
38 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - critic_loss_coef 0.5 <class 'float'>
|
39 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - entropy_coef 0.01 <class 'float'>
|
40 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - batch_size 256 <class 'int'>
|
41 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - sgd_batch_size 128 <class 'int'>
|
42 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - actor_hidden_dim 256 <class 'int'>
|
43 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - critic_hidden_dim 256 <class 'int'>
|
44 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - min_policy 0 <class 'int'>
|
45 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - actor_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
46 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - critic_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
47 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - buffer_type ONPOLICY_QUE <class 'str'>
|
48 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
|
49 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - Env Configs:
|
50 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
|
51 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - Name Value Type
|
52 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
53 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - render_mode None <class 'str'>
|
54 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - wrapper None <class 'str'>
|
55 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
56 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - ================================================================================
|
57 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
58 |
+
2023-05-17 13:48:53 - SimpleLog - INFO: - Start testing!
|
59 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
|
60 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
|
61 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
|
62 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
|
63 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
|
64 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
|
65 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
|
66 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
|
67 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
|
68 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
|
69 |
+
2023-05-17 13:48:54 - SimpleLog - INFO: - Finish testing! total time consumed: 0.50s
|
CartPole-v1/{Train_CartPole-v1_mp_PPO_20230401-223204/tb_logs/events.out.tfevents.1680359524.dell-Precision-5820-Tower.31414.0 → Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/interact/events.out.tfevents.1684302533.JMac.local.61381.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:192f3f9444f62d66ccd1d47939a013e4b5a4c4a0aafcbd0862831a91c15db487
|
3 |
+
size 1056
|
CartPole-v1/{Test_CartPole-v1_PPO_20230401-223412/tb_logs/events.out.tfevents.1680359652.dell-Precision-5820-Tower.4337.0 → Test_single_CartPole-v1_PPO_20230517-134853/tb_logs/model/events.out.tfevents.1684302533.JMac.local.61381.1}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ba2d347be4135f91135b543380189593fd17038b8a32b037a3d7ab5938a2f7a
|
3 |
size 40
|
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/config.yaml
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PPO
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
new_step_api: true
|
12 |
-
render: false
|
13 |
-
save_fig: true
|
14 |
-
seed: 1
|
15 |
-
show_fig: false
|
16 |
-
test_eps: 10
|
17 |
-
train_eps: 200
|
18 |
-
wrapper: null
|
19 |
-
algo_cfg:
|
20 |
-
actor_hidden_dim: 256
|
21 |
-
actor_lr: 0.0003
|
22 |
-
continuous: false
|
23 |
-
critic_hidden_dim: 256
|
24 |
-
critic_lr: 0.001
|
25 |
-
entropy_coef: 0.01
|
26 |
-
eps_clip: 0.2
|
27 |
-
gamma: 0.99
|
28 |
-
k_epochs: 4
|
29 |
-
ppo_type: clip
|
30 |
-
sgd_batch_size: 128
|
31 |
-
train_batch_size: 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/logs/log.txt
DELETED
@@ -1,252 +0,0 @@
|
|
1 |
-
2023-02-20 21:29:59 - r - INFO: - Hyperparameters:
|
2 |
-
2023-02-20 21:29:59 - r - INFO: - ================================================================================
|
3 |
-
2023-02-20 21:29:59 - r - INFO: - Name Value Type
|
4 |
-
2023-02-20 21:29:59 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-02-20 21:29:59 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-02-20 21:29:59 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-02-20 21:29:59 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-02-20 21:29:59 - r - INFO: - algo_name PPO <class 'str'>
|
9 |
-
2023-02-20 21:29:59 - r - INFO: - mode train <class 'str'>
|
10 |
-
2023-02-20 21:29:59 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-02-20 21:29:59 - r - INFO: - device cuda <class 'str'>
|
12 |
-
2023-02-20 21:29:59 - r - INFO: - train_eps 200 <class 'int'>
|
13 |
-
2023-02-20 21:29:59 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2023-02-20 21:29:59 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2023-02-20 21:29:59 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2023-02-20 21:29:59 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2023-02-20 21:29:59 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
-
2023-02-20 21:29:59 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
19 |
-
2023-02-20 21:29:59 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2023-02-20 21:29:59 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2023-02-20 21:29:59 - r - INFO: - ppo_type clip <class 'str'>
|
22 |
-
2023-02-20 21:29:59 - r - INFO: - continuous 0 <class 'bool'>
|
23 |
-
2023-02-20 21:29:59 - r - INFO: - gamma 0.99 <class 'float'>
|
24 |
-
2023-02-20 21:29:59 - r - INFO: - k_epochs 4 <class 'int'>
|
25 |
-
2023-02-20 21:29:59 - r - INFO: - actor_lr 0.0003 <class 'float'>
|
26 |
-
2023-02-20 21:29:59 - r - INFO: - critic_lr 0.001 <class 'float'>
|
27 |
-
2023-02-20 21:29:59 - r - INFO: - eps_clip 0.2 <class 'float'>
|
28 |
-
2023-02-20 21:29:59 - r - INFO: - entropy_coef 0.01 <class 'float'>
|
29 |
-
2023-02-20 21:29:59 - r - INFO: - train_batch_size 256 <class 'int'>
|
30 |
-
2023-02-20 21:29:59 - r - INFO: - sgd_batch_size 128 <class 'int'>
|
31 |
-
2023-02-20 21:29:59 - r - INFO: - actor_hidden_dim 256 <class 'int'>
|
32 |
-
2023-02-20 21:29:59 - r - INFO: - critic_hidden_dim 256 <class 'int'>
|
33 |
-
2023-02-20 21:29:59 - r - INFO: - task_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959 <class 'str'>
|
34 |
-
2023-02-20 21:29:59 - r - INFO: - model_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/models <class 'str'>
|
35 |
-
2023-02-20 21:29:59 - r - INFO: - res_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/results <class 'str'>
|
36 |
-
2023-02-20 21:29:59 - r - INFO: - log_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/logs <class 'str'>
|
37 |
-
2023-02-20 21:29:59 - r - INFO: - traj_dir C:\Users\24438\Desktop\rl-tutorials\joyrl/tasks/Train_CartPole-v1_PPO_20230220-212959/traj <class 'str'>
|
38 |
-
2023-02-20 21:29:59 - r - INFO: - ================================================================================
|
39 |
-
2023-02-20 21:29:59 - r - INFO: - n_states: 4, n_actions: 2
|
40 |
-
2023-02-20 21:29:59 - r - INFO: - Start training!
|
41 |
-
2023-02-20 21:29:59 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cuda
|
42 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 1/200, Reward: 25.000, Step: 25
|
43 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 2/200, Reward: 15.000, Step: 15
|
44 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 3/200, Reward: 13.000, Step: 13
|
45 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 4/200, Reward: 14.000, Step: 14
|
46 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 5/200, Reward: 25.000, Step: 25
|
47 |
-
2023-02-20 21:30:01 - r - INFO: - Current episode 5 has the best eval reward: 23.900
|
48 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 6/200, Reward: 41.000, Step: 41
|
49 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 7/200, Reward: 33.000, Step: 33
|
50 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 8/200, Reward: 12.000, Step: 12
|
51 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 9/200, Reward: 20.000, Step: 20
|
52 |
-
2023-02-20 21:30:01 - r - INFO: - Episode: 10/200, Reward: 33.000, Step: 33
|
53 |
-
2023-02-20 21:30:02 - r - INFO: - Current episode 10 has the best eval reward: 27.400
|
54 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 11/200, Reward: 19.000, Step: 19
|
55 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 12/200, Reward: 35.000, Step: 35
|
56 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 13/200, Reward: 35.000, Step: 35
|
57 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 14/200, Reward: 9.000, Step: 9
|
58 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 15/200, Reward: 32.000, Step: 32
|
59 |
-
2023-02-20 21:30:02 - r - INFO: - Current episode 15 has the best eval reward: 37.600
|
60 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 16/200, Reward: 29.000, Step: 29
|
61 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 17/200, Reward: 17.000, Step: 17
|
62 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 18/200, Reward: 17.000, Step: 17
|
63 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 19/200, Reward: 17.000, Step: 17
|
64 |
-
2023-02-20 21:30:02 - r - INFO: - Episode: 20/200, Reward: 20.000, Step: 20
|
65 |
-
2023-02-20 21:30:03 - r - INFO: - Episode: 21/200, Reward: 24.000, Step: 24
|
66 |
-
2023-02-20 21:30:03 - r - INFO: - Episode: 22/200, Reward: 44.000, Step: 44
|
67 |
-
2023-02-20 21:30:03 - r - INFO: - Episode: 23/200, Reward: 39.000, Step: 39
|
68 |
-
2023-02-20 21:30:03 - r - INFO: - Episode: 24/200, Reward: 48.000, Step: 48
|
69 |
-
2023-02-20 21:30:03 - r - INFO: - Episode: 25/200, Reward: 52.000, Step: 52
|
70 |
-
2023-02-20 21:30:03 - r - INFO: - Episode: 26/200, Reward: 32.000, Step: 32
|
71 |
-
2023-02-20 21:30:03 - r - INFO: - Episode: 27/200, Reward: 45.000, Step: 45
|
72 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 28/200, Reward: 68.000, Step: 68
|
73 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 29/200, Reward: 45.000, Step: 45
|
74 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 30/200, Reward: 16.000, Step: 16
|
75 |
-
2023-02-20 21:30:04 - r - INFO: - Current episode 30 has the best eval reward: 45.300
|
76 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 31/200, Reward: 38.000, Step: 38
|
77 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 32/200, Reward: 17.000, Step: 17
|
78 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 33/200, Reward: 35.000, Step: 35
|
79 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 34/200, Reward: 12.000, Step: 12
|
80 |
-
2023-02-20 21:30:04 - r - INFO: - Episode: 35/200, Reward: 56.000, Step: 56
|
81 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 36/200, Reward: 36.000, Step: 36
|
82 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 37/200, Reward: 15.000, Step: 15
|
83 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 38/200, Reward: 25.000, Step: 25
|
84 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 39/200, Reward: 28.000, Step: 28
|
85 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 40/200, Reward: 56.000, Step: 56
|
86 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 41/200, Reward: 18.000, Step: 18
|
87 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 42/200, Reward: 33.000, Step: 33
|
88 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 43/200, Reward: 30.000, Step: 30
|
89 |
-
2023-02-20 21:30:05 - r - INFO: - Episode: 44/200, Reward: 30.000, Step: 30
|
90 |
-
2023-02-20 21:30:06 - r - INFO: - Episode: 45/200, Reward: 28.000, Step: 28
|
91 |
-
2023-02-20 21:30:06 - r - INFO: - Episode: 46/200, Reward: 38.000, Step: 38
|
92 |
-
2023-02-20 21:30:06 - r - INFO: - Episode: 47/200, Reward: 70.000, Step: 70
|
93 |
-
2023-02-20 21:30:06 - r - INFO: - Episode: 48/200, Reward: 18.000, Step: 18
|
94 |
-
2023-02-20 21:30:06 - r - INFO: - Episode: 49/200, Reward: 16.000, Step: 16
|
95 |
-
2023-02-20 21:30:06 - r - INFO: - Episode: 50/200, Reward: 36.000, Step: 36
|
96 |
-
2023-02-20 21:30:07 - r - INFO: - Current episode 50 has the best eval reward: 48.700
|
97 |
-
2023-02-20 21:30:07 - r - INFO: - Episode: 51/200, Reward: 26.000, Step: 26
|
98 |
-
2023-02-20 21:30:07 - r - INFO: - Episode: 52/200, Reward: 34.000, Step: 34
|
99 |
-
2023-02-20 21:30:07 - r - INFO: - Episode: 53/200, Reward: 70.000, Step: 70
|
100 |
-
2023-02-20 21:30:07 - r - INFO: - Episode: 54/200, Reward: 39.000, Step: 39
|
101 |
-
2023-02-20 21:30:07 - r - INFO: - Episode: 55/200, Reward: 87.000, Step: 87
|
102 |
-
2023-02-20 21:30:08 - r - INFO: - Episode: 56/200, Reward: 75.000, Step: 75
|
103 |
-
2023-02-20 21:30:08 - r - INFO: - Episode: 57/200, Reward: 21.000, Step: 21
|
104 |
-
2023-02-20 21:30:08 - r - INFO: - Episode: 58/200, Reward: 72.000, Step: 72
|
105 |
-
2023-02-20 21:30:08 - r - INFO: - Episode: 59/200, Reward: 43.000, Step: 43
|
106 |
-
2023-02-20 21:30:08 - r - INFO: - Episode: 60/200, Reward: 48.000, Step: 48
|
107 |
-
2023-02-20 21:30:09 - r - INFO: - Episode: 61/200, Reward: 64.000, Step: 64
|
108 |
-
2023-02-20 21:30:09 - r - INFO: - Episode: 62/200, Reward: 135.000, Step: 135
|
109 |
-
2023-02-20 21:30:09 - r - INFO: - Episode: 63/200, Reward: 108.000, Step: 108
|
110 |
-
2023-02-20 21:30:09 - r - INFO: - Episode: 64/200, Reward: 38.000, Step: 38
|
111 |
-
2023-02-20 21:30:09 - r - INFO: - Episode: 65/200, Reward: 22.000, Step: 22
|
112 |
-
2023-02-20 21:30:10 - r - INFO: - Current episode 65 has the best eval reward: 68.000
|
113 |
-
2023-02-20 21:30:10 - r - INFO: - Episode: 66/200, Reward: 60.000, Step: 60
|
114 |
-
2023-02-20 21:30:10 - r - INFO: - Episode: 67/200, Reward: 74.000, Step: 74
|
115 |
-
2023-02-20 21:30:10 - r - INFO: - Episode: 68/200, Reward: 93.000, Step: 93
|
116 |
-
2023-02-20 21:30:10 - r - INFO: - Episode: 69/200, Reward: 55.000, Step: 55
|
117 |
-
2023-02-20 21:30:10 - r - INFO: - Episode: 70/200, Reward: 48.000, Step: 48
|
118 |
-
2023-02-20 21:30:11 - r - INFO: - Episode: 71/200, Reward: 29.000, Step: 29
|
119 |
-
2023-02-20 21:30:11 - r - INFO: - Episode: 72/200, Reward: 59.000, Step: 59
|
120 |
-
2023-02-20 21:30:11 - r - INFO: - Episode: 73/200, Reward: 35.000, Step: 35
|
121 |
-
2023-02-20 21:30:11 - r - INFO: - Episode: 74/200, Reward: 40.000, Step: 40
|
122 |
-
2023-02-20 21:30:11 - r - INFO: - Episode: 75/200, Reward: 113.000, Step: 113
|
123 |
-
2023-02-20 21:30:12 - r - INFO: - Episode: 76/200, Reward: 114.000, Step: 114
|
124 |
-
2023-02-20 21:30:12 - r - INFO: - Episode: 77/200, Reward: 52.000, Step: 52
|
125 |
-
2023-02-20 21:30:13 - r - INFO: - Episode: 78/200, Reward: 139.000, Step: 139
|
126 |
-
2023-02-20 21:30:13 - r - INFO: - Episode: 79/200, Reward: 138.000, Step: 138
|
127 |
-
2023-02-20 21:30:13 - r - INFO: - Episode: 80/200, Reward: 54.000, Step: 54
|
128 |
-
2023-02-20 21:30:14 - r - INFO: - Current episode 80 has the best eval reward: 110.900
|
129 |
-
2023-02-20 21:30:14 - r - INFO: - Episode: 81/200, Reward: 156.000, Step: 156
|
130 |
-
2023-02-20 21:30:15 - r - INFO: - Episode: 82/200, Reward: 140.000, Step: 140
|
131 |
-
2023-02-20 21:30:15 - r - INFO: - Episode: 83/200, Reward: 144.000, Step: 144
|
132 |
-
2023-02-20 21:30:15 - r - INFO: - Episode: 84/200, Reward: 118.000, Step: 118
|
133 |
-
2023-02-20 21:30:15 - r - INFO: - Episode: 85/200, Reward: 156.000, Step: 156
|
134 |
-
2023-02-20 21:30:16 - r - INFO: - Episode: 86/200, Reward: 135.000, Step: 135
|
135 |
-
2023-02-20 21:30:17 - r - INFO: - Episode: 87/200, Reward: 144.000, Step: 144
|
136 |
-
2023-02-20 21:30:17 - r - INFO: - Episode: 88/200, Reward: 160.000, Step: 160
|
137 |
-
2023-02-20 21:30:17 - r - INFO: - Episode: 89/200, Reward: 30.000, Step: 30
|
138 |
-
2023-02-20 21:30:17 - r - INFO: - Episode: 90/200, Reward: 194.000, Step: 194
|
139 |
-
2023-02-20 21:30:19 - r - INFO: - Current episode 90 has the best eval reward: 169.300
|
140 |
-
2023-02-20 21:30:20 - r - INFO: - Episode: 91/200, Reward: 200.000, Step: 200
|
141 |
-
2023-02-20 21:30:20 - r - INFO: - Episode: 92/200, Reward: 200.000, Step: 200
|
142 |
-
2023-02-20 21:30:20 - r - INFO: - Episode: 93/200, Reward: 160.000, Step: 160
|
143 |
-
2023-02-20 21:30:21 - r - INFO: - Episode: 94/200, Reward: 200.000, Step: 200
|
144 |
-
2023-02-20 21:30:21 - r - INFO: - Episode: 95/200, Reward: 59.000, Step: 59
|
145 |
-
2023-02-20 21:30:23 - r - INFO: - Episode: 96/200, Reward: 200.000, Step: 200
|
146 |
-
2023-02-20 21:30:23 - r - INFO: - Episode: 97/200, Reward: 182.000, Step: 182
|
147 |
-
2023-02-20 21:30:23 - r - INFO: - Episode: 98/200, Reward: 125.000, Step: 125
|
148 |
-
2023-02-20 21:30:23 - r - INFO: - Episode: 99/200, Reward: 140.000, Step: 140
|
149 |
-
2023-02-20 21:30:24 - r - INFO: - Episode: 100/200, Reward: 146.000, Step: 146
|
150 |
-
2023-02-20 21:30:25 - r - INFO: - Episode: 101/200, Reward: 130.000, Step: 130
|
151 |
-
2023-02-20 21:30:26 - r - INFO: - Episode: 102/200, Reward: 74.000, Step: 74
|
152 |
-
2023-02-20 21:30:26 - r - INFO: - Episode: 103/200, Reward: 167.000, Step: 167
|
153 |
-
2023-02-20 21:30:26 - r - INFO: - Episode: 104/200, Reward: 171.000, Step: 171
|
154 |
-
2023-02-20 21:30:26 - r - INFO: - Episode: 105/200, Reward: 150.000, Step: 150
|
155 |
-
2023-02-20 21:30:28 - r - INFO: - Episode: 106/200, Reward: 105.000, Step: 105
|
156 |
-
2023-02-20 21:30:28 - r - INFO: - Episode: 107/200, Reward: 65.000, Step: 65
|
157 |
-
2023-02-20 21:30:28 - r - INFO: - Episode: 108/200, Reward: 170.000, Step: 170
|
158 |
-
2023-02-20 21:30:29 - r - INFO: - Episode: 109/200, Reward: 172.000, Step: 172
|
159 |
-
2023-02-20 21:30:29 - r - INFO: - Episode: 110/200, Reward: 164.000, Step: 164
|
160 |
-
2023-02-20 21:30:30 - r - INFO: - Current episode 110 has the best eval reward: 180.000
|
161 |
-
2023-02-20 21:30:31 - r - INFO: - Episode: 111/200, Reward: 148.000, Step: 148
|
162 |
-
2023-02-20 21:30:31 - r - INFO: - Episode: 112/200, Reward: 116.000, Step: 116
|
163 |
-
2023-02-20 21:30:31 - r - INFO: - Episode: 113/200, Reward: 59.000, Step: 59
|
164 |
-
2023-02-20 21:30:31 - r - INFO: - Episode: 114/200, Reward: 200.000, Step: 200
|
165 |
-
2023-02-20 21:30:31 - r - INFO: - Episode: 115/200, Reward: 36.000, Step: 36
|
166 |
-
2023-02-20 21:30:33 - r - INFO: - Episode: 116/200, Reward: 200.000, Step: 200
|
167 |
-
2023-02-20 21:30:34 - r - INFO: - Episode: 117/200, Reward: 200.000, Step: 200
|
168 |
-
2023-02-20 21:30:34 - r - INFO: - Episode: 118/200, Reward: 158.000, Step: 158
|
169 |
-
2023-02-20 21:30:34 - r - INFO: - Episode: 119/200, Reward: 200.000, Step: 200
|
170 |
-
2023-02-20 21:30:34 - r - INFO: - Episode: 120/200, Reward: 200.000, Step: 200
|
171 |
-
2023-02-20 21:30:37 - r - INFO: - Current episode 120 has the best eval reward: 200.000
|
172 |
-
2023-02-20 21:30:37 - r - INFO: - Episode: 121/200, Reward: 200.000, Step: 200
|
173 |
-
2023-02-20 21:30:38 - r - INFO: - Episode: 122/200, Reward: 172.000, Step: 172
|
174 |
-
2023-02-20 21:30:38 - r - INFO: - Episode: 123/200, Reward: 137.000, Step: 137
|
175 |
-
2023-02-20 21:30:38 - r - INFO: - Episode: 124/200, Reward: 189.000, Step: 189
|
176 |
-
2023-02-20 21:30:38 - r - INFO: - Episode: 125/200, Reward: 200.000, Step: 200
|
177 |
-
2023-02-20 21:30:40 - r - INFO: - Episode: 126/200, Reward: 200.000, Step: 200
|
178 |
-
2023-02-20 21:30:41 - r - INFO: - Episode: 127/200, Reward: 197.000, Step: 197
|
179 |
-
2023-02-20 21:30:41 - r - INFO: - Episode: 128/200, Reward: 125.000, Step: 125
|
180 |
-
2023-02-20 21:30:41 - r - INFO: - Episode: 129/200, Reward: 194.000, Step: 194
|
181 |
-
2023-02-20 21:30:41 - r - INFO: - Episode: 130/200, Reward: 167.000, Step: 167
|
182 |
-
2023-02-20 21:30:43 - r - INFO: - Episode: 131/200, Reward: 135.000, Step: 135
|
183 |
-
2023-02-20 21:30:43 - r - INFO: - Episode: 132/200, Reward: 200.000, Step: 200
|
184 |
-
2023-02-20 21:30:44 - r - INFO: - Episode: 133/200, Reward: 200.000, Step: 200
|
185 |
-
2023-02-20 21:30:44 - r - INFO: - Episode: 134/200, Reward: 170.000, Step: 170
|
186 |
-
2023-02-20 21:30:44 - r - INFO: - Episode: 135/200, Reward: 195.000, Step: 195
|
187 |
-
2023-02-20 21:30:47 - r - INFO: - Episode: 136/200, Reward: 150.000, Step: 150
|
188 |
-
2023-02-20 21:30:47 - r - INFO: - Episode: 137/200, Reward: 187.000, Step: 187
|
189 |
-
2023-02-20 21:30:47 - r - INFO: - Episode: 138/200, Reward: 172.000, Step: 172
|
190 |
-
2023-02-20 21:30:47 - r - INFO: - Episode: 139/200, Reward: 124.000, Step: 124
|
191 |
-
2023-02-20 21:30:47 - r - INFO: - Episode: 140/200, Reward: 105.000, Step: 105
|
192 |
-
2023-02-20 21:30:49 - r - INFO: - Episode: 141/200, Reward: 49.000, Step: 49
|
193 |
-
2023-02-20 21:30:49 - r - INFO: - Episode: 142/200, Reward: 108.000, Step: 108
|
194 |
-
2023-02-20 21:30:49 - r - INFO: - Episode: 143/200, Reward: 117.000, Step: 117
|
195 |
-
2023-02-20 21:30:50 - r - INFO: - Episode: 144/200, Reward: 136.000, Step: 136
|
196 |
-
2023-02-20 21:30:50 - r - INFO: - Episode: 145/200, Reward: 120.000, Step: 120
|
197 |
-
2023-02-20 21:30:52 - r - INFO: - Episode: 146/200, Reward: 172.000, Step: 172
|
198 |
-
2023-02-20 21:30:52 - r - INFO: - Episode: 147/200, Reward: 134.000, Step: 134
|
199 |
-
2023-02-20 21:30:52 - r - INFO: - Episode: 148/200, Reward: 200.000, Step: 200
|
200 |
-
2023-02-20 21:30:53 - r - INFO: - Episode: 149/200, Reward: 200.000, Step: 200
|
201 |
-
2023-02-20 21:30:53 - r - INFO: - Episode: 150/200, Reward: 150.000, Step: 150
|
202 |
-
2023-02-20 21:30:55 - r - INFO: - Episode: 151/200, Reward: 190.000, Step: 190
|
203 |
-
2023-02-20 21:30:55 - r - INFO: - Episode: 152/200, Reward: 200.000, Step: 200
|
204 |
-
2023-02-20 21:30:56 - r - INFO: - Episode: 153/200, Reward: 200.000, Step: 200
|
205 |
-
2023-02-20 21:30:56 - r - INFO: - Episode: 154/200, Reward: 200.000, Step: 200
|
206 |
-
2023-02-20 21:30:56 - r - INFO: - Episode: 155/200, Reward: 179.000, Step: 179
|
207 |
-
2023-02-20 21:30:59 - r - INFO: - Episode: 156/200, Reward: 200.000, Step: 200
|
208 |
-
2023-02-20 21:30:59 - r - INFO: - Episode: 157/200, Reward: 200.000, Step: 200
|
209 |
-
2023-02-20 21:30:59 - r - INFO: - Episode: 158/200, Reward: 200.000, Step: 200
|
210 |
-
2023-02-20 21:31:00 - r - INFO: - Episode: 159/200, Reward: 200.000, Step: 200
|
211 |
-
2023-02-20 21:31:00 - r - INFO: - Episode: 160/200, Reward: 195.000, Step: 195
|
212 |
-
2023-02-20 21:31:02 - r - INFO: - Episode: 161/200, Reward: 195.000, Step: 195
|
213 |
-
2023-02-20 21:31:02 - r - INFO: - Episode: 162/200, Reward: 142.000, Step: 142
|
214 |
-
2023-02-20 21:31:03 - r - INFO: - Episode: 163/200, Reward: 200.000, Step: 200
|
215 |
-
2023-02-20 21:31:03 - r - INFO: - Episode: 164/200, Reward: 108.000, Step: 108
|
216 |
-
2023-02-20 21:31:03 - r - INFO: - Episode: 165/200, Reward: 200.000, Step: 200
|
217 |
-
2023-02-20 21:31:05 - r - INFO: - Episode: 166/200, Reward: 165.000, Step: 165
|
218 |
-
2023-02-20 21:31:05 - r - INFO: - Episode: 167/200, Reward: 153.000, Step: 153
|
219 |
-
2023-02-20 21:31:05 - r - INFO: - Episode: 168/200, Reward: 85.000, Step: 85
|
220 |
-
2023-02-20 21:31:05 - r - INFO: - Episode: 169/200, Reward: 139.000, Step: 139
|
221 |
-
2023-02-20 21:31:06 - r - INFO: - Episode: 170/200, Reward: 155.000, Step: 155
|
222 |
-
2023-02-20 21:31:08 - r - INFO: - Episode: 171/200, Reward: 166.000, Step: 166
|
223 |
-
2023-02-20 21:31:08 - r - INFO: - Episode: 172/200, Reward: 182.000, Step: 182
|
224 |
-
2023-02-20 21:31:08 - r - INFO: - Episode: 173/200, Reward: 190.000, Step: 190
|
225 |
-
2023-02-20 21:31:08 - r - INFO: - Episode: 174/200, Reward: 35.000, Step: 35
|
226 |
-
2023-02-20 21:31:09 - r - INFO: - Episode: 175/200, Reward: 124.000, Step: 124
|
227 |
-
2023-02-20 21:31:11 - r - INFO: - Episode: 176/200, Reward: 114.000, Step: 114
|
228 |
-
2023-02-20 21:31:11 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200
|
229 |
-
2023-02-20 21:31:11 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200
|
230 |
-
2023-02-20 21:31:12 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200
|
231 |
-
2023-02-20 21:31:12 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200
|
232 |
-
2023-02-20 21:31:14 - r - INFO: - Episode: 181/200, Reward: 110.000, Step: 110
|
233 |
-
2023-02-20 21:31:14 - r - INFO: - Episode: 182/200, Reward: 128.000, Step: 128
|
234 |
-
2023-02-20 21:31:14 - r - INFO: - Episode: 183/200, Reward: 107.000, Step: 107
|
235 |
-
2023-02-20 21:31:15 - r - INFO: - Episode: 184/200, Reward: 192.000, Step: 192
|
236 |
-
2023-02-20 21:31:15 - r - INFO: - Episode: 185/200, Reward: 106.000, Step: 106
|
237 |
-
2023-02-20 21:31:16 - r - INFO: - Episode: 186/200, Reward: 32.000, Step: 32
|
238 |
-
2023-02-20 21:31:16 - r - INFO: - Episode: 187/200, Reward: 107.000, Step: 107
|
239 |
-
2023-02-20 21:31:17 - r - INFO: - Episode: 188/200, Reward: 129.000, Step: 129
|
240 |
-
2023-02-20 21:31:17 - r - INFO: - Episode: 189/200, Reward: 122.000, Step: 122
|
241 |
-
2023-02-20 21:31:17 - r - INFO: - Episode: 190/200, Reward: 126.000, Step: 126
|
242 |
-
2023-02-20 21:31:18 - r - INFO: - Episode: 191/200, Reward: 120.000, Step: 120
|
243 |
-
2023-02-20 21:31:19 - r - INFO: - Episode: 192/200, Reward: 127.000, Step: 127
|
244 |
-
2023-02-20 21:31:19 - r - INFO: - Episode: 193/200, Reward: 132.000, Step: 132
|
245 |
-
2023-02-20 21:31:19 - r - INFO: - Episode: 194/200, Reward: 128.000, Step: 128
|
246 |
-
2023-02-20 21:31:19 - r - INFO: - Episode: 195/200, Reward: 142.000, Step: 142
|
247 |
-
2023-02-20 21:31:21 - r - INFO: - Episode: 196/200, Reward: 137.000, Step: 137
|
248 |
-
2023-02-20 21:31:21 - r - INFO: - Episode: 197/200, Reward: 125.000, Step: 125
|
249 |
-
2023-02-20 21:31:21 - r - INFO: - Episode: 198/200, Reward: 118.000, Step: 118
|
250 |
-
2023-02-20 21:31:22 - r - INFO: - Episode: 199/200, Reward: 158.000, Step: 158
|
251 |
-
2023-02-20 21:31:22 - r - INFO: - Episode: 200/200, Reward: 144.000, Step: 144
|
252 |
-
2023-02-20 21:31:23 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/actor.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7d07e4388597f766e04099380da27cb55fd877e8d26cdd14eab48bc097525216
|
3 |
-
size 272215
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/models/critic.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6743877f9534c272b5e6d8bae3cbc87b1fa32bb21af2935c28e503122e042c2d
|
3 |
-
size 271191
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/learning_curve.png
DELETED
Binary file (77.4 kB)
|
|
CartPole-v1/Train_CartPole-v1_PPO_20230220-212959/results/res.csv
DELETED
@@ -1,201 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,25.0,25
|
3 |
-
1,15.0,15
|
4 |
-
2,13.0,13
|
5 |
-
3,14.0,14
|
6 |
-
4,25.0,25
|
7 |
-
5,41.0,41
|
8 |
-
6,33.0,33
|
9 |
-
7,12.0,12
|
10 |
-
8,20.0,20
|
11 |
-
9,33.0,33
|
12 |
-
10,19.0,19
|
13 |
-
11,35.0,35
|
14 |
-
12,35.0,35
|
15 |
-
13,9.0,9
|
16 |
-
14,32.0,32
|
17 |
-
15,29.0,29
|
18 |
-
16,17.0,17
|
19 |
-
17,17.0,17
|
20 |
-
18,17.0,17
|
21 |
-
19,20.0,20
|
22 |
-
20,24.0,24
|
23 |
-
21,44.0,44
|
24 |
-
22,39.0,39
|
25 |
-
23,48.0,48
|
26 |
-
24,52.0,52
|
27 |
-
25,32.0,32
|
28 |
-
26,45.0,45
|
29 |
-
27,68.0,68
|
30 |
-
28,45.0,45
|
31 |
-
29,16.0,16
|
32 |
-
30,38.0,38
|
33 |
-
31,17.0,17
|
34 |
-
32,35.0,35
|
35 |
-
33,12.0,12
|
36 |
-
34,56.0,56
|
37 |
-
35,36.0,36
|
38 |
-
36,15.0,15
|
39 |
-
37,25.0,25
|
40 |
-
38,28.0,28
|
41 |
-
39,56.0,56
|
42 |
-
40,18.0,18
|
43 |
-
41,33.0,33
|
44 |
-
42,30.0,30
|
45 |
-
43,30.0,30
|
46 |
-
44,28.0,28
|
47 |
-
45,38.0,38
|
48 |
-
46,70.0,70
|
49 |
-
47,18.0,18
|
50 |
-
48,16.0,16
|
51 |
-
49,36.0,36
|
52 |
-
50,26.0,26
|
53 |
-
51,34.0,34
|
54 |
-
52,70.0,70
|
55 |
-
53,39.0,39
|
56 |
-
54,87.0,87
|
57 |
-
55,75.0,75
|
58 |
-
56,21.0,21
|
59 |
-
57,72.0,72
|
60 |
-
58,43.0,43
|
61 |
-
59,48.0,48
|
62 |
-
60,64.0,64
|
63 |
-
61,135.0,135
|
64 |
-
62,108.0,108
|
65 |
-
63,38.0,38
|
66 |
-
64,22.0,22
|
67 |
-
65,60.0,60
|
68 |
-
66,74.0,74
|
69 |
-
67,93.0,93
|
70 |
-
68,55.0,55
|
71 |
-
69,48.0,48
|
72 |
-
70,29.0,29
|
73 |
-
71,59.0,59
|
74 |
-
72,35.0,35
|
75 |
-
73,40.0,40
|
76 |
-
74,113.0,113
|
77 |
-
75,114.0,114
|
78 |
-
76,52.0,52
|
79 |
-
77,139.0,139
|
80 |
-
78,138.0,138
|
81 |
-
79,54.0,54
|
82 |
-
80,156.0,156
|
83 |
-
81,140.0,140
|
84 |
-
82,144.0,144
|
85 |
-
83,118.0,118
|
86 |
-
84,156.0,156
|
87 |
-
85,135.0,135
|
88 |
-
86,144.0,144
|
89 |
-
87,160.0,160
|
90 |
-
88,30.0,30
|
91 |
-
89,194.0,194
|
92 |
-
90,200.0,200
|
93 |
-
91,200.0,200
|
94 |
-
92,160.0,160
|
95 |
-
93,200.0,200
|
96 |
-
94,59.0,59
|
97 |
-
95,200.0,200
|
98 |
-
96,182.0,182
|
99 |
-
97,125.0,125
|
100 |
-
98,140.0,140
|
101 |
-
99,146.0,146
|
102 |
-
100,130.0,130
|
103 |
-
101,74.0,74
|
104 |
-
102,167.0,167
|
105 |
-
103,171.0,171
|
106 |
-
104,150.0,150
|
107 |
-
105,105.0,105
|
108 |
-
106,65.0,65
|
109 |
-
107,170.0,170
|
110 |
-
108,172.0,172
|
111 |
-
109,164.0,164
|
112 |
-
110,148.0,148
|
113 |
-
111,116.0,116
|
114 |
-
112,59.0,59
|
115 |
-
113,200.0,200
|
116 |
-
114,36.0,36
|
117 |
-
115,200.0,200
|
118 |
-
116,200.0,200
|
119 |
-
117,158.0,158
|
120 |
-
118,200.0,200
|
121 |
-
119,200.0,200
|
122 |
-
120,200.0,200
|
123 |
-
121,172.0,172
|
124 |
-
122,137.0,137
|
125 |
-
123,189.0,189
|
126 |
-
124,200.0,200
|
127 |
-
125,200.0,200
|
128 |
-
126,197.0,197
|
129 |
-
127,125.0,125
|
130 |
-
128,194.0,194
|
131 |
-
129,167.0,167
|
132 |
-
130,135.0,135
|
133 |
-
131,200.0,200
|
134 |
-
132,200.0,200
|
135 |
-
133,170.0,170
|
136 |
-
134,195.0,195
|
137 |
-
135,150.0,150
|
138 |
-
136,187.0,187
|
139 |
-
137,172.0,172
|
140 |
-
138,124.0,124
|
141 |
-
139,105.0,105
|
142 |
-
140,49.0,49
|
143 |
-
141,108.0,108
|
144 |
-
142,117.0,117
|
145 |
-
143,136.0,136
|
146 |
-
144,120.0,120
|
147 |
-
145,172.0,172
|
148 |
-
146,134.0,134
|
149 |
-
147,200.0,200
|
150 |
-
148,200.0,200
|
151 |
-
149,150.0,150
|
152 |
-
150,190.0,190
|
153 |
-
151,200.0,200
|
154 |
-
152,200.0,200
|
155 |
-
153,200.0,200
|
156 |
-
154,179.0,179
|
157 |
-
155,200.0,200
|
158 |
-
156,200.0,200
|
159 |
-
157,200.0,200
|
160 |
-
158,200.0,200
|
161 |
-
159,195.0,195
|
162 |
-
160,195.0,195
|
163 |
-
161,142.0,142
|
164 |
-
162,200.0,200
|
165 |
-
163,108.0,108
|
166 |
-
164,200.0,200
|
167 |
-
165,165.0,165
|
168 |
-
166,153.0,153
|
169 |
-
167,85.0,85
|
170 |
-
168,139.0,139
|
171 |
-
169,155.0,155
|
172 |
-
170,166.0,166
|
173 |
-
171,182.0,182
|
174 |
-
172,190.0,190
|
175 |
-
173,35.0,35
|
176 |
-
174,124.0,124
|
177 |
-
175,114.0,114
|
178 |
-
176,200.0,200
|
179 |
-
177,200.0,200
|
180 |
-
178,200.0,200
|
181 |
-
179,200.0,200
|
182 |
-
180,110.0,110
|
183 |
-
181,128.0,128
|
184 |
-
182,107.0,107
|
185 |
-
183,192.0,192
|
186 |
-
184,106.0,106
|
187 |
-
185,32.0,32
|
188 |
-
186,107.0,107
|
189 |
-
187,129.0,129
|
190 |
-
188,122.0,122
|
191 |
-
189,126.0,126
|
192 |
-
190,120.0,120
|
193 |
-
191,127.0,127
|
194 |
-
192,132.0,132
|
195 |
-
193,128.0,128
|
196 |
-
194,142.0,142
|
197 |
-
195,137.0,137
|
198 |
-
196,125.0,125
|
199 |
-
197,118.0,118
|
200 |
-
198,158.0,158
|
201 |
-
199,144.0,144
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/config.yaml
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: PPO
|
3 |
-
device: cpu
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
n_workers: 3
|
12 |
-
new_step_api: true
|
13 |
-
render: false
|
14 |
-
save_fig: true
|
15 |
-
seed: 1
|
16 |
-
show_fig: false
|
17 |
-
test_eps: 10
|
18 |
-
train_eps: 300
|
19 |
-
wrapper: null
|
20 |
-
algo_cfg:
|
21 |
-
actor_hidden_dim: 256
|
22 |
-
actor_lr: 0.0003
|
23 |
-
continuous: false
|
24 |
-
critic_hidden_dim: 256
|
25 |
-
critic_lr: 0.001
|
26 |
-
entropy_coef: 0.01
|
27 |
-
eps_clip: 0.2
|
28 |
-
gamma: 0.99
|
29 |
-
k_epochs: 4
|
30 |
-
ppo_type: clip
|
31 |
-
sgd_batch_size: 128
|
32 |
-
train_batch_size: 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/logs/log.txt
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
2023-04-01 22:32:04 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-01 22:32:04 - r - INFO: - ================================================================================
|
3 |
-
2023-04-01 22:32:04 - r - INFO: - Name Value Type
|
4 |
-
2023-04-01 22:32:04 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-01 22:32:04 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-01 22:32:04 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-01 22:32:04 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-01 22:32:04 - r - INFO: - algo_name PPO <class 'str'>
|
9 |
-
2023-04-01 22:32:04 - r - INFO: - mode train <class 'str'>
|
10 |
-
2023-04-01 22:32:04 - r - INFO: - seed 1 <class 'int'>
|
11 |
-
2023-04-01 22:32:04 - r - INFO: - device cpu <class 'str'>
|
12 |
-
2023-04-01 22:32:04 - r - INFO: - train_eps 300 <class 'int'>
|
13 |
-
2023-04-01 22:32:04 - r - INFO: - test_eps 10 <class 'int'>
|
14 |
-
2023-04-01 22:32:04 - r - INFO: - eval_eps 10 <class 'int'>
|
15 |
-
2023-04-01 22:32:04 - r - INFO: - eval_per_episode 5 <class 'int'>
|
16 |
-
2023-04-01 22:32:04 - r - INFO: - max_steps 200 <class 'int'>
|
17 |
-
2023-04-01 22:32:04 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
-
2023-04-01 22:32:04 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
19 |
-
2023-04-01 22:32:04 - r - INFO: - show_fig 0 <class 'bool'>
|
20 |
-
2023-04-01 22:32:04 - r - INFO: - save_fig 1 <class 'bool'>
|
21 |
-
2023-04-01 22:32:04 - r - INFO: - n_workers 3 <class 'int'>
|
22 |
-
2023-04-01 22:32:04 - r - INFO: - ppo_type clip <class 'str'>
|
23 |
-
2023-04-01 22:32:04 - r - INFO: - continuous 0 <class 'bool'>
|
24 |
-
2023-04-01 22:32:04 - r - INFO: - gamma 0.99 <class 'float'>
|
25 |
-
2023-04-01 22:32:04 - r - INFO: - k_epochs 4 <class 'int'>
|
26 |
-
2023-04-01 22:32:04 - r - INFO: - actor_lr 0.0003 <class 'float'>
|
27 |
-
2023-04-01 22:32:04 - r - INFO: - critic_lr 0.001 <class 'float'>
|
28 |
-
2023-04-01 22:32:04 - r - INFO: - eps_clip 0.2 <class 'float'>
|
29 |
-
2023-04-01 22:32:04 - r - INFO: - entropy_coef 0.01 <class 'float'>
|
30 |
-
2023-04-01 22:32:04 - r - INFO: - train_batch_size 256 <class 'int'>
|
31 |
-
2023-04-01 22:32:04 - r - INFO: - sgd_batch_size 128 <class 'int'>
|
32 |
-
2023-04-01 22:32:04 - r - INFO: - actor_hidden_dim 256 <class 'int'>
|
33 |
-
2023-04-01 22:32:04 - r - INFO: - critic_hidden_dim 256 <class 'int'>
|
34 |
-
2023-04-01 22:32:04 - r - INFO: - task_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204 <class 'str'>
|
35 |
-
2023-04-01 22:32:04 - r - INFO: - res_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/results <class 'str'>
|
36 |
-
2023-04-01 22:32:04 - r - INFO: - log_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/logs <class 'str'>
|
37 |
-
2023-04-01 22:32:04 - r - INFO: - traj_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/traj <class 'str'>
|
38 |
-
2023-04-01 22:32:04 - r - INFO: - tb_dir /home/dingli/joyrl_offline/tasks/Train_CartPole-v1_mp_PPO_20230401-223204/tb_logs <class 'str'>
|
39 |
-
2023-04-01 22:32:04 - r - INFO: - ================================================================================
|
40 |
-
2023-04-01 22:32:04 - r - INFO: - n_states: 4, n_actions: 2
|
41 |
-
2023-04-01 22:32:04 - r - INFO: - Start training!
|
42 |
-
2023-04-01 22:32:04 - r - INFO: - Env: CartPole-v1, Algorithm: PPO, Device: cpu
|
43 |
-
2023-04-01 22:33:16 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/actor.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f6a3e48d551bcba327ff4c5d3cc464a6a94b83eda543a54d231016e021e8cbd3
|
3 |
-
size 272151
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/models/critic.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a4db7aeb3805e1deb11428a34a600a40068a0f711986f38fdf9e0f9895f8a45c
|
3 |
-
size 271127
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/learning_curve.png
DELETED
Binary file (86.7 kB)
|
|
CartPole-v1/Train_CartPole-v1_mp_PPO_20230401-223204/results/res.csv
DELETED
@@ -1,302 +0,0 @@
|
|
1 |
-
episodes,rewards
|
2 |
-
0,39.0
|
3 |
-
1,10.0
|
4 |
-
2,11.0
|
5 |
-
3,10.0
|
6 |
-
4,30.0
|
7 |
-
5,28.0
|
8 |
-
6,9.0
|
9 |
-
7,12.0
|
10 |
-
8,13.0
|
11 |
-
9,16.0
|
12 |
-
10,15.0
|
13 |
-
11,16.0
|
14 |
-
12,38.0
|
15 |
-
13,33.0
|
16 |
-
14,11.0
|
17 |
-
15,20.0
|
18 |
-
16,14.0
|
19 |
-
17,43.0
|
20 |
-
18,22.0
|
21 |
-
19,11.0
|
22 |
-
20,34.0
|
23 |
-
21,12.0
|
24 |
-
22,29.0
|
25 |
-
23,12.0
|
26 |
-
24,39.0
|
27 |
-
25,11.0
|
28 |
-
26,13.0
|
29 |
-
27,25.0
|
30 |
-
28,26.0
|
31 |
-
29,36.0
|
32 |
-
30,24.0
|
33 |
-
31,18.0
|
34 |
-
32,40.0
|
35 |
-
33,13.0
|
36 |
-
34,30.0
|
37 |
-
35,24.0
|
38 |
-
36,22.0
|
39 |
-
37,10.0
|
40 |
-
38,32.0
|
41 |
-
39,40.0
|
42 |
-
40,15.0
|
43 |
-
41,57.0
|
44 |
-
42,45.0
|
45 |
-
43,47.0
|
46 |
-
44,38.0
|
47 |
-
45,53.0
|
48 |
-
46,13.0
|
49 |
-
47,26.0
|
50 |
-
48,19.0
|
51 |
-
49,16.0
|
52 |
-
50,30.0
|
53 |
-
51,12.0
|
54 |
-
52,18.0
|
55 |
-
53,62.0
|
56 |
-
54,16.0
|
57 |
-
55,46.0
|
58 |
-
56,40.0
|
59 |
-
57,25.0
|
60 |
-
58,41.0
|
61 |
-
59,82.0
|
62 |
-
60,18.0
|
63 |
-
61,29.0
|
64 |
-
62,43.0
|
65 |
-
63,26.0
|
66 |
-
64,28.0
|
67 |
-
65,17.0
|
68 |
-
66,48.0
|
69 |
-
67,31.0
|
70 |
-
68,35.0
|
71 |
-
69,31.0
|
72 |
-
70,43.0
|
73 |
-
71,49.0
|
74 |
-
72,25.0
|
75 |
-
73,65.0
|
76 |
-
74,16.0
|
77 |
-
75,37.0
|
78 |
-
76,44.0
|
79 |
-
77,37.0
|
80 |
-
78,16.0
|
81 |
-
79,73.0
|
82 |
-
80,23.0
|
83 |
-
81,34.0
|
84 |
-
82,20.0
|
85 |
-
83,55.0
|
86 |
-
84,18.0
|
87 |
-
85,16.0
|
88 |
-
86,56.0
|
89 |
-
87,22.0
|
90 |
-
88,40.0
|
91 |
-
89,40.0
|
92 |
-
90,63.0
|
93 |
-
91,52.0
|
94 |
-
92,43.0
|
95 |
-
93,38.0
|
96 |
-
94,34.0
|
97 |
-
95,61.0
|
98 |
-
96,34.0
|
99 |
-
97,32.0
|
100 |
-
98,47.0
|
101 |
-
99,21.0
|
102 |
-
100,24.0
|
103 |
-
101,54.0
|
104 |
-
102,52.0
|
105 |
-
103,13.0
|
106 |
-
104,26.0
|
107 |
-
105,64.0
|
108 |
-
106,63.0
|
109 |
-
107,88.0
|
110 |
-
108,65.0
|
111 |
-
109,114.0
|
112 |
-
110,84.0
|
113 |
-
111,58.0
|
114 |
-
112,148.0
|
115 |
-
113,32.0
|
116 |
-
114,85.0
|
117 |
-
115,67.0
|
118 |
-
116,45.0
|
119 |
-
117,170.0
|
120 |
-
118,137.0
|
121 |
-
119,159.0
|
122 |
-
120,60.0
|
123 |
-
121,200.0
|
124 |
-
122,78.0
|
125 |
-
123,58.0
|
126 |
-
124,60.0
|
127 |
-
125,191.0
|
128 |
-
126,61.0
|
129 |
-
127,99.0
|
130 |
-
128,26.0
|
131 |
-
129,79.0
|
132 |
-
130,109.0
|
133 |
-
131,173.0
|
134 |
-
132,61.0
|
135 |
-
133,92.0
|
136 |
-
134,175.0
|
137 |
-
135,106.0
|
138 |
-
136,32.0
|
139 |
-
137,84.0
|
140 |
-
138,105.0
|
141 |
-
139,78.0
|
142 |
-
140,78.0
|
143 |
-
141,60.0
|
144 |
-
142,93.0
|
145 |
-
143,69.0
|
146 |
-
144,85.0
|
147 |
-
145,106.0
|
148 |
-
146,148.0
|
149 |
-
147,93.0
|
150 |
-
148,119.0
|
151 |
-
149,139.0
|
152 |
-
150,124.0
|
153 |
-
151,173.0
|
154 |
-
152,200.0
|
155 |
-
153,26.0
|
156 |
-
154,151.0
|
157 |
-
155,122.0
|
158 |
-
156,182.0
|
159 |
-
157,62.0
|
160 |
-
158,38.0
|
161 |
-
159,145.0
|
162 |
-
160,75.0
|
163 |
-
161,78.0
|
164 |
-
162,200.0
|
165 |
-
163,189.0
|
166 |
-
164,94.0
|
167 |
-
165,122.0
|
168 |
-
166,95.0
|
169 |
-
167,200.0
|
170 |
-
168,154.0
|
171 |
-
169,200.0
|
172 |
-
170,200.0
|
173 |
-
171,125.0
|
174 |
-
172,200.0
|
175 |
-
173,187.0
|
176 |
-
174,165.0
|
177 |
-
175,154.0
|
178 |
-
176,200.0
|
179 |
-
177,200.0
|
180 |
-
178,180.0
|
181 |
-
179,200.0
|
182 |
-
180,191.0
|
183 |
-
181,168.0
|
184 |
-
182,200.0
|
185 |
-
183,78.0
|
186 |
-
184,124.0
|
187 |
-
185,137.0
|
188 |
-
186,165.0
|
189 |
-
187,77.0
|
190 |
-
188,144.0
|
191 |
-
189,200.0
|
192 |
-
190,74.0
|
193 |
-
191,200.0
|
194 |
-
192,99.0
|
195 |
-
193,183.0
|
196 |
-
194,200.0
|
197 |
-
195,49.0
|
198 |
-
196,127.0
|
199 |
-
197,148.0
|
200 |
-
198,188.0
|
201 |
-
199,200.0
|
202 |
-
200,113.0
|
203 |
-
201,200.0
|
204 |
-
202,146.0
|
205 |
-
203,130.0
|
206 |
-
204,128.0
|
207 |
-
205,200.0
|
208 |
-
206,35.0
|
209 |
-
207,58.0
|
210 |
-
208,130.0
|
211 |
-
209,66.0
|
212 |
-
210,62.0
|
213 |
-
211,163.0
|
214 |
-
212,147.0
|
215 |
-
213,106.0
|
216 |
-
214,200.0
|
217 |
-
215,114.0
|
218 |
-
216,147.0
|
219 |
-
217,101.0
|
220 |
-
218,175.0
|
221 |
-
219,88.0
|
222 |
-
220,144.0
|
223 |
-
221,200.0
|
224 |
-
222,200.0
|
225 |
-
223,199.0
|
226 |
-
224,200.0
|
227 |
-
225,199.0
|
228 |
-
226,142.0
|
229 |
-
227,200.0
|
230 |
-
228,200.0
|
231 |
-
229,164.0
|
232 |
-
230,200.0
|
233 |
-
231,181.0
|
234 |
-
232,200.0
|
235 |
-
233,200.0
|
236 |
-
234,200.0
|
237 |
-
235,105.0
|
238 |
-
236,200.0
|
239 |
-
237,200.0
|
240 |
-
238,200.0
|
241 |
-
239,169.0
|
242 |
-
240,200.0
|
243 |
-
241,179.0
|
244 |
-
242,108.0
|
245 |
-
243,177.0
|
246 |
-
244,132.0
|
247 |
-
245,116.0
|
248 |
-
246,75.0
|
249 |
-
247,161.0
|
250 |
-
248,192.0
|
251 |
-
249,184.0
|
252 |
-
250,200.0
|
253 |
-
251,115.0
|
254 |
-
252,200.0
|
255 |
-
253,200.0
|
256 |
-
254,129.0
|
257 |
-
255,40.0
|
258 |
-
256,80.0
|
259 |
-
257,190.0
|
260 |
-
258,200.0
|
261 |
-
259,200.0
|
262 |
-
260,151.0
|
263 |
-
261,138.0
|
264 |
-
262,187.0
|
265 |
-
263,163.0
|
266 |
-
264,101.0
|
267 |
-
265,116.0
|
268 |
-
266,147.0
|
269 |
-
267,80.0
|
270 |
-
268,200.0
|
271 |
-
269,104.0
|
272 |
-
270,200.0
|
273 |
-
271,176.0
|
274 |
-
272,157.0
|
275 |
-
273,200.0
|
276 |
-
274,200.0
|
277 |
-
275,125.0
|
278 |
-
276,77.0
|
279 |
-
277,200.0
|
280 |
-
278,200.0
|
281 |
-
279,200.0
|
282 |
-
280,200.0
|
283 |
-
281,200.0
|
284 |
-
282,174.0
|
285 |
-
283,200.0
|
286 |
-
284,200.0
|
287 |
-
285,25.0
|
288 |
-
286,150.0
|
289 |
-
287,200.0
|
290 |
-
288,200.0
|
291 |
-
289,200.0
|
292 |
-
290,163.0
|
293 |
-
291,200.0
|
294 |
-
292,200.0
|
295 |
-
293,200.0
|
296 |
-
294,200.0
|
297 |
-
295,200.0
|
298 |
-
296,200.0
|
299 |
-
297,123.0
|
300 |
-
298,154.0
|
301 |
-
299,200.0
|
302 |
-
300,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/config.yaml
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
general_cfg:
|
2 |
+
algo_name: PPO
|
3 |
+
collect_traj: false
|
4 |
+
device: cpu
|
5 |
+
env_name: gym
|
6 |
+
load_checkpoint: false
|
7 |
+
load_model_step: best
|
8 |
+
load_path: Train_single_CartPole-v1_DQN_20230515-211721
|
9 |
+
max_episode: 200
|
10 |
+
max_step: 200
|
11 |
+
mode: train
|
12 |
+
model_save_fre: 10
|
13 |
+
mp_backend: single
|
14 |
+
n_workers: 2
|
15 |
+
online_eval: true
|
16 |
+
online_eval_episode: 10
|
17 |
+
seed: 1
|
18 |
+
algo_cfg:
|
19 |
+
actor_hidden_dim: 256
|
20 |
+
actor_layers:
|
21 |
+
- activation: relu
|
22 |
+
layer_dim:
|
23 |
+
- 256
|
24 |
+
layer_type: linear
|
25 |
+
- activation: relu
|
26 |
+
layer_dim:
|
27 |
+
- 256
|
28 |
+
layer_type: linear
|
29 |
+
actor_lr: 0.0003
|
30 |
+
batch_size: 256
|
31 |
+
buffer_type: ONPOLICY_QUE
|
32 |
+
continuous: false
|
33 |
+
critic_hidden_dim: 256
|
34 |
+
critic_layers:
|
35 |
+
- activation: relu
|
36 |
+
layer_dim:
|
37 |
+
- 256
|
38 |
+
layer_type: linear
|
39 |
+
- activation: relu
|
40 |
+
layer_dim:
|
41 |
+
- 256
|
42 |
+
layer_type: linear
|
43 |
+
critic_loss_coef: 0.5
|
44 |
+
critic_lr: 0.001
|
45 |
+
entropy_coef: 0.01
|
46 |
+
eps_clip: 0.2
|
47 |
+
gamma: 0.99
|
48 |
+
independ_actor: true
|
49 |
+
k_epochs: 4
|
50 |
+
kl_alpha: 2
|
51 |
+
kl_beta: 1.5
|
52 |
+
kl_lambda: 0.5
|
53 |
+
kl_target: 0.1
|
54 |
+
lr: 0.0001
|
55 |
+
min_policy: 0
|
56 |
+
ppo_type: clip
|
57 |
+
sgd_batch_size: 128
|
58 |
+
share_optimizer: false
|
59 |
+
env_cfg:
|
60 |
+
id: CartPole-v1
|
61 |
+
ignore_params:
|
62 |
+
- wrapper
|
63 |
+
- ignore_params
|
64 |
+
render_mode: null
|
65 |
+
wrapper: null
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/logs/log.txt
ADDED
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - General Configs:
|
2 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
|
3 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - Name Value Type
|
4 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - algo_name PPO <class 'str'>
|
6 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - mode train <class 'str'>
|
7 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - device cpu <class 'str'>
|
8 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - seed 1 <class 'int'>
|
9 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - max_episode 200 <class 'int'>
|
10 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
11 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
12 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
13 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
14 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - model_save_fre 10 <class 'int'>
|
17 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
|
19 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
20 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
|
21 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - Algo Configs:
|
22 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
|
23 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - Name Value Type
|
24 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - independ_actor 1 <class 'bool'>
|
25 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - share_optimizer 0 <class 'bool'>
|
26 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ppo_type clip <class 'str'>
|
27 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - eps_clip 0.2 <class 'float'>
|
28 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - kl_target 0.1 <class 'float'>
|
29 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - kl_lambda 0.5 <class 'float'>
|
30 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - kl_beta 1.5 <class 'float'>
|
31 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - kl_alpha 2 <class 'int'>
|
32 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - continuous 0 <class 'bool'>
|
33 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
34 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - k_epochs 4 <class 'int'>
|
35 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
36 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - actor_lr 0.0003 <class 'float'>
|
37 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - critic_lr 0.001 <class 'float'>
|
38 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - critic_loss_coef 0.5 <class 'float'>
|
39 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - entropy_coef 0.01 <class 'float'>
|
40 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - batch_size 256 <class 'int'>
|
41 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - sgd_batch_size 128 <class 'int'>
|
42 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - actor_hidden_dim 256 <class 'int'>
|
43 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - critic_hidden_dim 256 <class 'int'>
|
44 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - min_policy 0 <class 'int'>
|
45 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - actor_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
46 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - critic_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
47 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - buffer_type ONPOLICY_QUE <class 'str'>
|
48 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
|
49 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - Env Configs:
|
50 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
|
51 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - Name Value Type
|
52 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
53 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - render_mode None <class 'str'>
|
54 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - wrapper None <class 'str'>
|
55 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
56 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - ================================================================================
|
57 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
58 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - Start training!
|
59 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 0, ep_reward: 13.0, ep_step: 13
|
60 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 1, ep_reward: 30.0, ep_step: 30
|
61 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 2, ep_reward: 15.0, ep_step: 15
|
62 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 3, ep_reward: 10.0, ep_step: 10
|
63 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 4, ep_reward: 38.0, ep_step: 38
|
64 |
+
2023-05-17 13:44:40 - SimpleLog - INFO: - episode: 5, ep_reward: 18.0, ep_step: 18
|
65 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 6, ep_reward: 20.0, ep_step: 20
|
66 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 7, ep_reward: 26.0, ep_step: 26
|
67 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 8, ep_reward: 9.0, ep_step: 9
|
68 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 9, ep_reward: 12.0, ep_step: 12
|
69 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 10, ep_reward: 47.0, ep_step: 47
|
70 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 11, ep_reward: 21.0, ep_step: 21
|
71 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 12, ep_reward: 27.0, ep_step: 27
|
72 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 13, ep_reward: 12.0, ep_step: 12
|
73 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 14, ep_reward: 37.0, ep_step: 37
|
74 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 15, ep_reward: 20.0, ep_step: 20
|
75 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 16, ep_reward: 36.0, ep_step: 36
|
76 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 17, ep_reward: 21.0, ep_step: 21
|
77 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 18, ep_reward: 40.0, ep_step: 40
|
78 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 19, ep_reward: 15.0, ep_step: 15
|
79 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 20, ep_reward: 14.0, ep_step: 14
|
80 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 21, ep_reward: 89.0, ep_step: 89
|
81 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 22, ep_reward: 27.0, ep_step: 27
|
82 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 23, ep_reward: 38.0, ep_step: 38
|
83 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 24, ep_reward: 29.0, ep_step: 29
|
84 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 25, ep_reward: 20.0, ep_step: 20
|
85 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 26, ep_reward: 50.0, ep_step: 50
|
86 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 27, ep_reward: 44.0, ep_step: 44
|
87 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 28, ep_reward: 30.0, ep_step: 30
|
88 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 29, ep_reward: 24.0, ep_step: 24
|
89 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 30, ep_reward: 19.0, ep_step: 19
|
90 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 31, ep_reward: 19.0, ep_step: 19
|
91 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 32, ep_reward: 28.0, ep_step: 28
|
92 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 33, ep_reward: 59.0, ep_step: 59
|
93 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 34, ep_reward: 22.0, ep_step: 22
|
94 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 35, ep_reward: 33.0, ep_step: 33
|
95 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 36, ep_reward: 18.0, ep_step: 18
|
96 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 37, ep_reward: 13.0, ep_step: 13
|
97 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 38, ep_reward: 68.0, ep_step: 68
|
98 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 39, ep_reward: 20.0, ep_step: 20
|
99 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 40, ep_reward: 17.0, ep_step: 17
|
100 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 41, ep_reward: 32.0, ep_step: 32
|
101 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 42, ep_reward: 17.0, ep_step: 17
|
102 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 43, ep_reward: 35.0, ep_step: 35
|
103 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 44, ep_reward: 18.0, ep_step: 18
|
104 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 45, ep_reward: 23.0, ep_step: 23
|
105 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 46, ep_reward: 15.0, ep_step: 15
|
106 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 47, ep_reward: 19.0, ep_step: 19
|
107 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 48, ep_reward: 31.0, ep_step: 31
|
108 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 49, ep_reward: 39.0, ep_step: 39
|
109 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 50, ep_reward: 26.0, ep_step: 26
|
110 |
+
2023-05-17 13:44:41 - SimpleLog - INFO: - episode: 51, ep_reward: 82.0, ep_step: 82
|
111 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 52, ep_reward: 80.0, ep_step: 80
|
112 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 53, ep_reward: 20.0, ep_step: 20
|
113 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 54, ep_reward: 39.0, ep_step: 39
|
114 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 55, ep_reward: 74.0, ep_step: 74
|
115 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 56, ep_reward: 72.0, ep_step: 72
|
116 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 57, ep_reward: 28.0, ep_step: 28
|
117 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 58, ep_reward: 65.0, ep_step: 65
|
118 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 59, ep_reward: 54.0, ep_step: 54
|
119 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 60, ep_reward: 79.0, ep_step: 79
|
120 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 61, ep_reward: 55.0, ep_step: 55
|
121 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 62, ep_reward: 43.0, ep_step: 43
|
122 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 63, ep_reward: 84.0, ep_step: 84
|
123 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 64, ep_reward: 39.0, ep_step: 39
|
124 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 65, ep_reward: 115.0, ep_step: 115
|
125 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 66, ep_reward: 87.0, ep_step: 87
|
126 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 67, ep_reward: 71.0, ep_step: 71
|
127 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 68, ep_reward: 52.0, ep_step: 52
|
128 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - update_step: 10, online_eval_reward: 147.000
|
129 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 147.000, save the best model!
|
130 |
+
2023-05-17 13:44:42 - SimpleLog - INFO: - episode: 69, ep_reward: 71.0, ep_step: 71
|
131 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 70, ep_reward: 48.0, ep_step: 48
|
132 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 71, ep_reward: 104.0, ep_step: 104
|
133 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 72, ep_reward: 30.0, ep_step: 30
|
134 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 73, ep_reward: 152.0, ep_step: 152
|
135 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 74, ep_reward: 51.0, ep_step: 51
|
136 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 75, ep_reward: 19.0, ep_step: 19
|
137 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 76, ep_reward: 44.0, ep_step: 44
|
138 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 77, ep_reward: 138.0, ep_step: 138
|
139 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 78, ep_reward: 26.0, ep_step: 26
|
140 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 79, ep_reward: 53.0, ep_step: 53
|
141 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 80, ep_reward: 108.0, ep_step: 108
|
142 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 81, ep_reward: 97.0, ep_step: 97
|
143 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 82, ep_reward: 87.0, ep_step: 87
|
144 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 83, ep_reward: 51.0, ep_step: 51
|
145 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 84, ep_reward: 101.0, ep_step: 101
|
146 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 85, ep_reward: 32.0, ep_step: 32
|
147 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 86, ep_reward: 76.0, ep_step: 76
|
148 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 87, ep_reward: 75.0, ep_step: 75
|
149 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 88, ep_reward: 44.0, ep_step: 44
|
150 |
+
2023-05-17 13:44:43 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200
|
151 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 90, ep_reward: 126.0, ep_step: 126
|
152 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 91, ep_reward: 115.0, ep_step: 115
|
153 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 92, ep_reward: 29.0, ep_step: 29
|
154 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 93, ep_reward: 152.0, ep_step: 152
|
155 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 94, ep_reward: 78.0, ep_step: 78
|
156 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 95, ep_reward: 120.0, ep_step: 120
|
157 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 96, ep_reward: 140.0, ep_step: 140
|
158 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 97, ep_reward: 128.0, ep_step: 128
|
159 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - update_step: 20, online_eval_reward: 129.000
|
160 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 98, ep_reward: 102.0, ep_step: 102
|
161 |
+
2023-05-17 13:44:44 - SimpleLog - INFO: - episode: 99, ep_reward: 87.0, ep_step: 87
|
162 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 100, ep_reward: 107.0, ep_step: 107
|
163 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 101, ep_reward: 66.0, ep_step: 66
|
164 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 102, ep_reward: 192.0, ep_step: 192
|
165 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 103, ep_reward: 125.0, ep_step: 125
|
166 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 104, ep_reward: 113.0, ep_step: 113
|
167 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 105, ep_reward: 138.0, ep_step: 138
|
168 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 106, ep_reward: 112.0, ep_step: 112
|
169 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 107, ep_reward: 164.0, ep_step: 164
|
170 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 108, ep_reward: 108.0, ep_step: 108
|
171 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 109, ep_reward: 28.0, ep_step: 28
|
172 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 110, ep_reward: 200.0, ep_step: 200
|
173 |
+
2023-05-17 13:44:45 - SimpleLog - INFO: - episode: 111, ep_reward: 200.0, ep_step: 200
|
174 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 112, ep_reward: 200.0, ep_step: 200
|
175 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 113, ep_reward: 200.0, ep_step: 200
|
176 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 114, ep_reward: 151.0, ep_step: 151
|
177 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 115, ep_reward: 107.0, ep_step: 107
|
178 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 116, ep_reward: 66.0, ep_step: 66
|
179 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 117, ep_reward: 94.0, ep_step: 94
|
180 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 118, ep_reward: 25.0, ep_step: 25
|
181 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 119, ep_reward: 47.0, ep_step: 47
|
182 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - update_step: 30, online_eval_reward: 155.000
|
183 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 155.000, save the best model!
|
184 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 120, ep_reward: 136.0, ep_step: 136
|
185 |
+
2023-05-17 13:44:46 - SimpleLog - INFO: - episode: 121, ep_reward: 119.0, ep_step: 119
|
186 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 122, ep_reward: 136.0, ep_step: 136
|
187 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 123, ep_reward: 182.0, ep_step: 182
|
188 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 124, ep_reward: 99.0, ep_step: 99
|
189 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 125, ep_reward: 33.0, ep_step: 33
|
190 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 126, ep_reward: 161.0, ep_step: 161
|
191 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 127, ep_reward: 171.0, ep_step: 171
|
192 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 128, ep_reward: 172.0, ep_step: 172
|
193 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 129, ep_reward: 178.0, ep_step: 178
|
194 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 130, ep_reward: 167.0, ep_step: 167
|
195 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 131, ep_reward: 149.0, ep_step: 149
|
196 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 132, ep_reward: 127.0, ep_step: 127
|
197 |
+
2023-05-17 13:44:47 - SimpleLog - INFO: - episode: 133, ep_reward: 112.0, ep_step: 112
|
198 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 134, ep_reward: 52.0, ep_step: 52
|
199 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 135, ep_reward: 48.0, ep_step: 48
|
200 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 136, ep_reward: 110.0, ep_step: 110
|
201 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 137, ep_reward: 170.0, ep_step: 170
|
202 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 138, ep_reward: 147.0, ep_step: 147
|
203 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - update_step: 40, online_eval_reward: 200.000
|
204 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
205 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 139, ep_reward: 127.0, ep_step: 127
|
206 |
+
2023-05-17 13:44:48 - SimpleLog - INFO: - episode: 140, ep_reward: 164.0, ep_step: 164
|
207 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 141, ep_reward: 200.0, ep_step: 200
|
208 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 142, ep_reward: 103.0, ep_step: 103
|
209 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 143, ep_reward: 200.0, ep_step: 200
|
210 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 144, ep_reward: 200.0, ep_step: 200
|
211 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 145, ep_reward: 147.0, ep_step: 147
|
212 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 146, ep_reward: 200.0, ep_step: 200
|
213 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 147, ep_reward: 157.0, ep_step: 157
|
214 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 148, ep_reward: 200.0, ep_step: 200
|
215 |
+
2023-05-17 13:44:49 - SimpleLog - INFO: - episode: 149, ep_reward: 163.0, ep_step: 163
|
216 |
+
2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 150, ep_reward: 200.0, ep_step: 200
|
217 |
+
2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 151, ep_reward: 200.0, ep_step: 200
|
218 |
+
2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 152, ep_reward: 146.0, ep_step: 146
|
219 |
+
2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 153, ep_reward: 118.0, ep_step: 118
|
220 |
+
2023-05-17 13:44:50 - SimpleLog - INFO: - update_step: 50, online_eval_reward: 200.000
|
221 |
+
2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 154, ep_reward: 191.0, ep_step: 191
|
222 |
+
2023-05-17 13:44:50 - SimpleLog - INFO: - episode: 155, ep_reward: 200.0, ep_step: 200
|
223 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 156, ep_reward: 153.0, ep_step: 153
|
224 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 157, ep_reward: 200.0, ep_step: 200
|
225 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 158, ep_reward: 160.0, ep_step: 160
|
226 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 159, ep_reward: 137.0, ep_step: 137
|
227 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 160, ep_reward: 132.0, ep_step: 132
|
228 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 161, ep_reward: 147.0, ep_step: 147
|
229 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 162, ep_reward: 142.0, ep_step: 142
|
230 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 163, ep_reward: 133.0, ep_step: 133
|
231 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 164, ep_reward: 183.0, ep_step: 183
|
232 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 165, ep_reward: 161.0, ep_step: 161
|
233 |
+
2023-05-17 13:44:51 - SimpleLog - INFO: - episode: 166, ep_reward: 186.0, ep_step: 186
|
234 |
+
2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 167, ep_reward: 200.0, ep_step: 200
|
235 |
+
2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 168, ep_reward: 195.0, ep_step: 195
|
236 |
+
2023-05-17 13:44:52 - SimpleLog - INFO: - update_step: 60, online_eval_reward: 200.000
|
237 |
+
2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 169, ep_reward: 200.0, ep_step: 200
|
238 |
+
2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 170, ep_reward: 200.0, ep_step: 200
|
239 |
+
2023-05-17 13:44:52 - SimpleLog - INFO: - episode: 171, ep_reward: 200.0, ep_step: 200
|
240 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 172, ep_reward: 200.0, ep_step: 200
|
241 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 173, ep_reward: 200.0, ep_step: 200
|
242 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 174, ep_reward: 200.0, ep_step: 200
|
243 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 175, ep_reward: 153.0, ep_step: 153
|
244 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 176, ep_reward: 158.0, ep_step: 158
|
245 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 177, ep_reward: 53.0, ep_step: 53
|
246 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 178, ep_reward: 157.0, ep_step: 157
|
247 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 179, ep_reward: 133.0, ep_step: 133
|
248 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 180, ep_reward: 126.0, ep_step: 126
|
249 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 181, ep_reward: 147.0, ep_step: 147
|
250 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 182, ep_reward: 145.0, ep_step: 145
|
251 |
+
2023-05-17 13:44:53 - SimpleLog - INFO: - episode: 183, ep_reward: 32.0, ep_step: 32
|
252 |
+
2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 184, ep_reward: 136.0, ep_step: 136
|
253 |
+
2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 185, ep_reward: 153.0, ep_step: 153
|
254 |
+
2023-05-17 13:44:54 - SimpleLog - INFO: - update_step: 70, online_eval_reward: 200.000
|
255 |
+
2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 186, ep_reward: 199.0, ep_step: 199
|
256 |
+
2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 187, ep_reward: 200.0, ep_step: 200
|
257 |
+
2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 188, ep_reward: 200.0, ep_step: 200
|
258 |
+
2023-05-17 13:44:54 - SimpleLog - INFO: - episode: 189, ep_reward: 193.0, ep_step: 193
|
259 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 190, ep_reward: 167.0, ep_step: 167
|
260 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 191, ep_reward: 200.0, ep_step: 200
|
261 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 192, ep_reward: 200.0, ep_step: 200
|
262 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 193, ep_reward: 200.0, ep_step: 200
|
263 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 194, ep_reward: 200.0, ep_step: 200
|
264 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 195, ep_reward: 200.0, ep_step: 200
|
265 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 196, ep_reward: 200.0, ep_step: 200
|
266 |
+
2023-05-17 13:44:55 - SimpleLog - INFO: - episode: 197, ep_reward: 200.0, ep_step: 200
|
267 |
+
2023-05-17 13:44:56 - SimpleLog - INFO: - episode: 198, ep_reward: 200.0, ep_step: 200
|
268 |
+
2023-05-17 13:44:56 - SimpleLog - INFO: - update_step: 80, online_eval_reward: 200.000
|
269 |
+
2023-05-17 13:44:56 - SimpleLog - INFO: - episode: 199, ep_reward: 187.0, ep_step: 187
|
270 |
+
2023-05-17 13:44:56 - SimpleLog - INFO: - Finish training! total time consumed: 15.62s
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/10
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/20
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/30
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/40
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/50
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/60
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/70
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/80
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_PPO_20230517-134440/models/best
ADDED
Binary file (544 kB). View file
|
|
CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/actor.pth → Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/interact/events.out.tfevents.1684302280.JMac.local.60840.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb3386d3c6b92bb605ad7e8d7b3df8091705eb6f30172bb0663894b85308602d
|
3 |
+
size 20980
|
CartPole-v1/{Test_CartPole-v1_PPO-KL_20221217-204214/models/critic.pth → Train_single_CartPole-v1_PPO_20230517-134440/tb_logs/model/events.out.tfevents.1684302280.JMac.local.60840.1}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41d25a9a4855f35bdfc28b0a6ac033431ec9e3076deb21795f21d721a1583fa9
|
3 |
+
size 12920
|