johnjim0816
commited on
Commit
•
c93cde9
1
Parent(s):
7e0d2ec
update Cartpole-v1 DuelingDQN
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml +0 -41
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt +0 -14
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml +0 -47
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt +0 -53
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml +0 -47
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt +0 -53
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv +0 -11
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml +0 -41
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt +0 -119
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv +0 -101
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt +0 -43
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt +0 -3
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv +0 -202
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt +0 -43
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt +0 -3
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv +0 -401
- CartPole-v1/{Train_CartPole-v1_DuelingDQN_ray_20230407-153236 → Train_ray_CartPole-v1_DuelingDQN_20230517-224129}/config.yaml +19 -22
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt +169 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best +0 -0
- CartPole-v1/{Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0} +2 -2
- CartPole-v1/{Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0} +2 -2
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DuelingDQN
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_DuelingDQN_20221122-125403
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
save_fig: true
|
12 |
-
seed: 1
|
13 |
-
show_fig: false
|
14 |
-
test_eps: 10
|
15 |
-
train_eps: 100
|
16 |
-
algo_cfg:
|
17 |
-
batch_size: 64
|
18 |
-
buffer_size: 100000
|
19 |
-
epsilon_decay: 500
|
20 |
-
epsilon_end: 0.01
|
21 |
-
epsilon_start: 0.95
|
22 |
-
gamma: 0.99
|
23 |
-
hidden_dim: 256
|
24 |
-
lr: 0.0001
|
25 |
-
target_update: 4
|
26 |
-
value_layers:
|
27 |
-
- activation: relu
|
28 |
-
layer_dim:
|
29 |
-
- n_states
|
30 |
-
- 256
|
31 |
-
layer_type: linear
|
32 |
-
- activation: relu
|
33 |
-
layer_dim:
|
34 |
-
- 256
|
35 |
-
- 256
|
36 |
-
layer_type: linear
|
37 |
-
- activation: none
|
38 |
-
layer_dim:
|
39 |
-
- 256
|
40 |
-
- n_actions
|
41 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
2022-11-22 12:54:55 - r - INFO: - n_states: 4, n_actions: 2
|
2 |
-
2022-11-22 12:54:58 - r - INFO: - Start testing!
|
3 |
-
2022-11-22 12:54:58 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda
|
4 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
5 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
6 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
7 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
8 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
9 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
10 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
11 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
12 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
13 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
14 |
-
2022-11-22 12:54:58 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png
DELETED
Binary file (27.6 kB)
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DuelingDQN
|
3 |
-
device: cpu
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_DuelingDQN_mp_20230407-170853
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
mp_backend: mp
|
12 |
-
n_workers: 1
|
13 |
-
new_step_api: true
|
14 |
-
render: false
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
-
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 200
|
21 |
-
wrapper: null
|
22 |
-
algo_cfg:
|
23 |
-
batch_size: 64
|
24 |
-
buffer_size: 100000
|
25 |
-
epsilon_decay: 500
|
26 |
-
epsilon_end: 0.01
|
27 |
-
epsilon_start: 0.95
|
28 |
-
gamma: 0.99
|
29 |
-
hidden_dim: 256
|
30 |
-
lr: 0.0001
|
31 |
-
target_update: 4
|
32 |
-
value_layers:
|
33 |
-
- activation: relu
|
34 |
-
layer_dim:
|
35 |
-
- n_states
|
36 |
-
- 256
|
37 |
-
layer_type: linear
|
38 |
-
- activation: relu
|
39 |
-
layer_dim:
|
40 |
-
- 256
|
41 |
-
- 256
|
42 |
-
layer_type: linear
|
43 |
-
- activation: none
|
44 |
-
layer_dim:
|
45 |
-
- 256
|
46 |
-
- n_actions
|
47 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt
DELETED
@@ -1,53 +0,0 @@
|
|
1 |
-
2023-04-07 17:11:20 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-07 17:11:20 - r - INFO: - ================================================================================
|
3 |
-
2023-04-07 17:11:20 - r - INFO: - Name Value Type
|
4 |
-
2023-04-07 17:11:20 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-07 17:11:20 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-07 17:11:20 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-07 17:11:20 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-07 17:11:20 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-07 17:11:20 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
10 |
-
2023-04-07 17:11:20 - r - INFO: - mode test <class 'str'>
|
11 |
-
2023-04-07 17:11:20 - r - INFO: - mp_backend mp <class 'str'>
|
12 |
-
2023-04-07 17:11:20 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-07 17:11:20 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-07 17:11:20 - r - INFO: - train_eps 200 <class 'int'>
|
15 |
-
2023-04-07 17:11:20 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-07 17:11:20 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-07 17:11:20 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-07 17:11:20 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-07 17:11:20 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
20 |
-
2023-04-07 17:11:20 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_mp_20230407-170853 <class 'str'>
|
21 |
-
2023-04-07 17:11:20 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-07 17:11:20 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-07 17:11:20 - r - INFO: - n_workers 1 <class 'int'>
|
24 |
-
2023-04-07 17:11:20 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-07 17:11:20 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-07 17:11:20 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-07 17:11:20 - r - INFO: - gamma 0.99 <class 'float'>
|
28 |
-
2023-04-07 17:11:20 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-07 17:11:20 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-07 17:11:20 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-07 17:11:20 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-07 17:11:20 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-07 17:11:20 - r - INFO: - hidden_dim 256 <class 'int'>
|
34 |
-
2023-04-07 17:11:20 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120 <class 'str'>
|
35 |
-
2023-04-07 17:11:20 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/results <class 'str'>
|
36 |
-
2023-04-07 17:11:20 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/logs <class 'str'>
|
37 |
-
2023-04-07 17:11:20 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/traj <class 'str'>
|
38 |
-
2023-04-07 17:11:20 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/videos <class 'str'>
|
39 |
-
2023-04-07 17:11:20 - r - INFO: - ================================================================================
|
40 |
-
2023-04-07 17:11:20 - r - INFO: - n_states: 4, n_actions: 2
|
41 |
-
2023-04-07 17:11:20 - r - INFO: - Start testing!
|
42 |
-
2023-04-07 17:11:20 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
43 |
-
2023-04-07 17:11:20 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
44 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
45 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 3/10, Reward: 190.000, Step: 190
|
46 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
47 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 5/10, Reward: 187.000, Step: 187
|
48 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 6/10, Reward: 182.000, Step: 182
|
49 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
50 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
51 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 9/10, Reward: 196.000, Step: 196
|
52 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
53 |
-
2023-04-07 17:11:21 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png
DELETED
Binary file (45.2 kB)
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,190.0,190
|
5 |
-
3,200.0,200
|
6 |
-
4,187.0,187
|
7 |
-
5,182.0,182
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,196.0,196
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DuelingDQN
|
3 |
-
device: cpu
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_DuelingDQN_20230407-153236
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
mp_backend: ray
|
12 |
-
n_workers: 1
|
13 |
-
new_step_api: true
|
14 |
-
render: false
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
-
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 400
|
21 |
-
wrapper: null
|
22 |
-
algo_cfg:
|
23 |
-
batch_size: 64
|
24 |
-
buffer_size: 100000
|
25 |
-
epsilon_decay: 500
|
26 |
-
epsilon_end: 0.01
|
27 |
-
epsilon_start: 0.95
|
28 |
-
gamma: 0.99
|
29 |
-
hidden_dim: 256
|
30 |
-
lr: 0.0001
|
31 |
-
target_update: 4
|
32 |
-
value_layers:
|
33 |
-
- activation: relu
|
34 |
-
layer_dim:
|
35 |
-
- n_states
|
36 |
-
- 256
|
37 |
-
layer_type: linear
|
38 |
-
- activation: relu
|
39 |
-
layer_dim:
|
40 |
-
- 256
|
41 |
-
- 256
|
42 |
-
layer_type: linear
|
43 |
-
- activation: none
|
44 |
-
layer_dim:
|
45 |
-
- 256
|
46 |
-
- n_actions
|
47 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt
DELETED
@@ -1,53 +0,0 @@
|
|
1 |
-
2023-04-07 16:52:08 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-07 16:52:08 - r - INFO: - ================================================================================
|
3 |
-
2023-04-07 16:52:08 - r - INFO: - Name Value Type
|
4 |
-
2023-04-07 16:52:08 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-07 16:52:08 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-07 16:52:08 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-07 16:52:08 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-07 16:52:08 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-07 16:52:08 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
10 |
-
2023-04-07 16:52:08 - r - INFO: - mode test <class 'str'>
|
11 |
-
2023-04-07 16:52:08 - r - INFO: - mp_backend ray <class 'str'>
|
12 |
-
2023-04-07 16:52:08 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-07 16:52:08 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-07 16:52:08 - r - INFO: - train_eps 400 <class 'int'>
|
15 |
-
2023-04-07 16:52:08 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-07 16:52:08 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-07 16:52:08 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-07 16:52:08 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-07 16:52:08 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
20 |
-
2023-04-07 16:52:08 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_20230407-153236 <class 'str'>
|
21 |
-
2023-04-07 16:52:08 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-07 16:52:08 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-07 16:52:08 - r - INFO: - n_workers 1 <class 'int'>
|
24 |
-
2023-04-07 16:52:08 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-07 16:52:08 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-07 16:52:08 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-07 16:52:08 - r - INFO: - gamma 0.99 <class 'float'>
|
28 |
-
2023-04-07 16:52:08 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-07 16:52:08 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-07 16:52:08 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-07 16:52:08 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-07 16:52:08 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-07 16:52:08 - r - INFO: - hidden_dim 256 <class 'int'>
|
34 |
-
2023-04-07 16:52:08 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208 <class 'str'>
|
35 |
-
2023-04-07 16:52:08 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/results <class 'str'>
|
36 |
-
2023-04-07 16:52:08 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/logs <class 'str'>
|
37 |
-
2023-04-07 16:52:08 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/traj <class 'str'>
|
38 |
-
2023-04-07 16:52:08 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/videos <class 'str'>
|
39 |
-
2023-04-07 16:52:08 - r - INFO: - ================================================================================
|
40 |
-
2023-04-07 16:52:08 - r - INFO: - n_states: 4, n_actions: 2
|
41 |
-
2023-04-07 16:52:08 - r - INFO: - Start testing!
|
42 |
-
2023-04-07 16:52:08 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
43 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 1/10, Reward: 171.000, Step: 171
|
44 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 2/10, Reward: 185.000, Step: 185
|
45 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 3/10, Reward: 159.000, Step: 159
|
46 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 4/10, Reward: 155.000, Step: 155
|
47 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
48 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 6/10, Reward: 120.000, Step: 120
|
49 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
50 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 8/10, Reward: 187.000, Step: 187
|
51 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 9/10, Reward: 154.000, Step: 154
|
52 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
53 |
-
2023-04-07 16:52:08 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png
DELETED
Binary file (45.9 kB)
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,171.0,171
|
3 |
-
1,185.0,185
|
4 |
-
2,159.0,159
|
5 |
-
3,155.0,155
|
6 |
-
4,200.0,200
|
7 |
-
5,120.0,120
|
8 |
-
6,200.0,200
|
9 |
-
7,187.0,187
|
10 |
-
8,154.0,154
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DuelingDQN
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
save_fig: true
|
12 |
-
seed: 1
|
13 |
-
show_fig: false
|
14 |
-
test_eps: 10
|
15 |
-
train_eps: 100
|
16 |
-
algo_cfg:
|
17 |
-
batch_size: 64
|
18 |
-
buffer_size: 100000
|
19 |
-
epsilon_decay: 500
|
20 |
-
epsilon_end: 0.01
|
21 |
-
epsilon_start: 0.95
|
22 |
-
gamma: 0.99
|
23 |
-
hidden_dim: 256
|
24 |
-
lr: 0.0001
|
25 |
-
target_update: 4
|
26 |
-
value_layers:
|
27 |
-
- activation: relu
|
28 |
-
layer_dim:
|
29 |
-
- n_states
|
30 |
-
- 256
|
31 |
-
layer_type: linear
|
32 |
-
- activation: relu
|
33 |
-
layer_dim:
|
34 |
-
- 256
|
35 |
-
- 256
|
36 |
-
layer_type: linear
|
37 |
-
- activation: none
|
38 |
-
layer_dim:
|
39 |
-
- 256
|
40 |
-
- n_actions
|
41 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt
DELETED
@@ -1,119 +0,0 @@
|
|
1 |
-
2022-11-22 12:54:03 - r - INFO: - n_states: 4, n_actions: 2
|
2 |
-
2022-11-22 12:54:06 - r - INFO: - Start training!
|
3 |
-
2022-11-22 12:54:06 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda
|
4 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18
|
5 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35
|
6 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13
|
7 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32
|
8 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16
|
9 |
-
2022-11-22 12:54:06 - r - INFO: - Current episode 5 has the best eval reward: 9.100
|
10 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9
|
11 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12
|
12 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16
|
13 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14
|
14 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12
|
15 |
-
2022-11-22 12:54:06 - r - INFO: - Current episode 10 has the best eval reward: 9.200
|
16 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13
|
17 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14
|
18 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19
|
19 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9
|
20 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15
|
21 |
-
2022-11-22 12:54:06 - r - INFO: - Current episode 15 has the best eval reward: 9.300
|
22 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12
|
23 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11
|
24 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9
|
25 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13
|
26 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17
|
27 |
-
2022-11-22 12:54:07 - r - INFO: - Current episode 20 has the best eval reward: 9.900
|
28 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 21/100, Reward: 14.000, Step: 14
|
29 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 22/100, Reward: 20.000, Step: 20
|
30 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 23/100, Reward: 11.000, Step: 11
|
31 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 24/100, Reward: 24.000, Step: 24
|
32 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 25/100, Reward: 11.000, Step: 11
|
33 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 26/100, Reward: 11.000, Step: 11
|
34 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 27/100, Reward: 11.000, Step: 11
|
35 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 28/100, Reward: 13.000, Step: 13
|
36 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 29/100, Reward: 11.000, Step: 11
|
37 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 30/100, Reward: 8.000, Step: 8
|
38 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13
|
39 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 32/100, Reward: 9.000, Step: 9
|
40 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 33/100, Reward: 34.000, Step: 34
|
41 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 34/100, Reward: 10.000, Step: 10
|
42 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 35/100, Reward: 10.000, Step: 10
|
43 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 36/100, Reward: 10.000, Step: 10
|
44 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 37/100, Reward: 34.000, Step: 34
|
45 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 38/100, Reward: 35.000, Step: 35
|
46 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 39/100, Reward: 32.000, Step: 32
|
47 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 40/100, Reward: 37.000, Step: 37
|
48 |
-
2022-11-22 12:54:08 - r - INFO: - Current episode 40 has the best eval reward: 27.500
|
49 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 41/100, Reward: 29.000, Step: 29
|
50 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 42/100, Reward: 52.000, Step: 52
|
51 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 43/100, Reward: 54.000, Step: 54
|
52 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 44/100, Reward: 90.000, Step: 90
|
53 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 45/100, Reward: 91.000, Step: 91
|
54 |
-
2022-11-22 12:54:09 - r - INFO: - Current episode 45 has the best eval reward: 87.500
|
55 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 46/100, Reward: 51.000, Step: 51
|
56 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 47/100, Reward: 101.000, Step: 101
|
57 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 48/100, Reward: 67.000, Step: 67
|
58 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 49/100, Reward: 103.000, Step: 103
|
59 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 50/100, Reward: 45.000, Step: 45
|
60 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 51/100, Reward: 137.000, Step: 137
|
61 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 52/100, Reward: 47.000, Step: 47
|
62 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 53/100, Reward: 89.000, Step: 89
|
63 |
-
2022-11-22 12:54:11 - r - INFO: - Episode: 54/100, Reward: 95.000, Step: 95
|
64 |
-
2022-11-22 12:54:11 - r - INFO: - Episode: 55/100, Reward: 55.000, Step: 55
|
65 |
-
2022-11-22 12:54:11 - r - INFO: - Episode: 56/100, Reward: 92.000, Step: 92
|
66 |
-
2022-11-22 12:54:12 - r - INFO: - Episode: 57/100, Reward: 155.000, Step: 155
|
67 |
-
2022-11-22 12:54:12 - r - INFO: - Episode: 58/100, Reward: 125.000, Step: 125
|
68 |
-
2022-11-22 12:54:12 - r - INFO: - Episode: 59/100, Reward: 152.000, Step: 152
|
69 |
-
2022-11-22 12:54:13 - r - INFO: - Episode: 60/100, Reward: 199.000, Step: 199
|
70 |
-
2022-11-22 12:54:13 - r - INFO: - Current episode 60 has the best eval reward: 179.100
|
71 |
-
2022-11-22 12:54:14 - r - INFO: - Episode: 61/100, Reward: 88.000, Step: 88
|
72 |
-
2022-11-22 12:54:14 - r - INFO: - Episode: 62/100, Reward: 200.000, Step: 200
|
73 |
-
2022-11-22 12:54:14 - r - INFO: - Episode: 63/100, Reward: 176.000, Step: 176
|
74 |
-
2022-11-22 12:54:15 - r - INFO: - Episode: 64/100, Reward: 200.000, Step: 200
|
75 |
-
2022-11-22 12:54:15 - r - INFO: - Episode: 65/100, Reward: 200.000, Step: 200
|
76 |
-
2022-11-22 12:54:16 - r - INFO: - Current episode 65 has the best eval reward: 198.700
|
77 |
-
2022-11-22 12:54:16 - r - INFO: - Episode: 66/100, Reward: 193.000, Step: 193
|
78 |
-
2022-11-22 12:54:17 - r - INFO: - Episode: 67/100, Reward: 200.000, Step: 200
|
79 |
-
2022-11-22 12:54:17 - r - INFO: - Episode: 68/100, Reward: 200.000, Step: 200
|
80 |
-
2022-11-22 12:54:18 - r - INFO: - Episode: 69/100, Reward: 200.000, Step: 200
|
81 |
-
2022-11-22 12:54:18 - r - INFO: - Episode: 70/100, Reward: 200.000, Step: 200
|
82 |
-
2022-11-22 12:54:19 - r - INFO: - Current episode 70 has the best eval reward: 200.000
|
83 |
-
2022-11-22 12:54:20 - r - INFO: - Episode: 71/100, Reward: 200.000, Step: 200
|
84 |
-
2022-11-22 12:54:20 - r - INFO: - Episode: 72/100, Reward: 200.000, Step: 200
|
85 |
-
2022-11-22 12:54:20 - r - INFO: - Episode: 73/100, Reward: 200.000, Step: 200
|
86 |
-
2022-11-22 12:54:21 - r - INFO: - Episode: 74/100, Reward: 200.000, Step: 200
|
87 |
-
2022-11-22 12:54:21 - r - INFO: - Episode: 75/100, Reward: 200.000, Step: 200
|
88 |
-
2022-11-22 12:54:22 - r - INFO: - Current episode 75 has the best eval reward: 200.000
|
89 |
-
2022-11-22 12:54:23 - r - INFO: - Episode: 76/100, Reward: 200.000, Step: 200
|
90 |
-
2022-11-22 12:54:23 - r - INFO: - Episode: 77/100, Reward: 200.000, Step: 200
|
91 |
-
2022-11-22 12:54:24 - r - INFO: - Episode: 78/100, Reward: 200.000, Step: 200
|
92 |
-
2022-11-22 12:54:24 - r - INFO: - Episode: 79/100, Reward: 200.000, Step: 200
|
93 |
-
2022-11-22 12:54:24 - r - INFO: - Episode: 80/100, Reward: 200.000, Step: 200
|
94 |
-
2022-11-22 12:54:25 - r - INFO: - Current episode 80 has the best eval reward: 200.000
|
95 |
-
2022-11-22 12:54:26 - r - INFO: - Episode: 81/100, Reward: 200.000, Step: 200
|
96 |
-
2022-11-22 12:54:26 - r - INFO: - Episode: 82/100, Reward: 200.000, Step: 200
|
97 |
-
2022-11-22 12:54:27 - r - INFO: - Episode: 83/100, Reward: 200.000, Step: 200
|
98 |
-
2022-11-22 12:54:27 - r - INFO: - Episode: 84/100, Reward: 200.000, Step: 200
|
99 |
-
2022-11-22 12:54:27 - r - INFO: - Episode: 85/100, Reward: 200.000, Step: 200
|
100 |
-
2022-11-22 12:54:28 - r - INFO: - Current episode 85 has the best eval reward: 200.000
|
101 |
-
2022-11-22 12:54:29 - r - INFO: - Episode: 86/100, Reward: 200.000, Step: 200
|
102 |
-
2022-11-22 12:54:29 - r - INFO: - Episode: 87/100, Reward: 200.000, Step: 200
|
103 |
-
2022-11-22 12:54:30 - r - INFO: - Episode: 88/100, Reward: 200.000, Step: 200
|
104 |
-
2022-11-22 12:54:30 - r - INFO: - Episode: 89/100, Reward: 200.000, Step: 200
|
105 |
-
2022-11-22 12:54:30 - r - INFO: - Episode: 90/100, Reward: 200.000, Step: 200
|
106 |
-
2022-11-22 12:54:31 - r - INFO: - Current episode 90 has the best eval reward: 200.000
|
107 |
-
2022-11-22 12:54:32 - r - INFO: - Episode: 91/100, Reward: 200.000, Step: 200
|
108 |
-
2022-11-22 12:54:32 - r - INFO: - Episode: 92/100, Reward: 200.000, Step: 200
|
109 |
-
2022-11-22 12:54:33 - r - INFO: - Episode: 93/100, Reward: 200.000, Step: 200
|
110 |
-
2022-11-22 12:54:33 - r - INFO: - Episode: 94/100, Reward: 200.000, Step: 200
|
111 |
-
2022-11-22 12:54:34 - r - INFO: - Episode: 95/100, Reward: 200.000, Step: 200
|
112 |
-
2022-11-22 12:54:34 - r - INFO: - Current episode 95 has the best eval reward: 200.000
|
113 |
-
2022-11-22 12:54:35 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200
|
114 |
-
2022-11-22 12:54:35 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200
|
115 |
-
2022-11-22 12:54:36 - r - INFO: - Episode: 98/100, Reward: 200.000, Step: 200
|
116 |
-
2022-11-22 12:54:36 - r - INFO: - Episode: 99/100, Reward: 200.000, Step: 200
|
117 |
-
2022-11-22 12:54:37 - r - INFO: - Episode: 100/100, Reward: 200.000, Step: 200
|
118 |
-
2022-11-22 12:54:37 - r - INFO: - Current episode 100 has the best eval reward: 200.000
|
119 |
-
2022-11-22 12:54:37 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png
DELETED
Binary file (47.5 kB)
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv
DELETED
@@ -1,101 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,18.0,18
|
3 |
-
1,35.0,35
|
4 |
-
2,13.0,13
|
5 |
-
3,32.0,32
|
6 |
-
4,16.0,16
|
7 |
-
5,9.0,9
|
8 |
-
6,12.0,12
|
9 |
-
7,16.0,16
|
10 |
-
8,14.0,14
|
11 |
-
9,12.0,12
|
12 |
-
10,13.0,13
|
13 |
-
11,14.0,14
|
14 |
-
12,19.0,19
|
15 |
-
13,9.0,9
|
16 |
-
14,15.0,15
|
17 |
-
15,12.0,12
|
18 |
-
16,11.0,11
|
19 |
-
17,9.0,9
|
20 |
-
18,13.0,13
|
21 |
-
19,17.0,17
|
22 |
-
20,14.0,14
|
23 |
-
21,20.0,20
|
24 |
-
22,11.0,11
|
25 |
-
23,24.0,24
|
26 |
-
24,11.0,11
|
27 |
-
25,11.0,11
|
28 |
-
26,11.0,11
|
29 |
-
27,13.0,13
|
30 |
-
28,11.0,11
|
31 |
-
29,8.0,8
|
32 |
-
30,13.0,13
|
33 |
-
31,9.0,9
|
34 |
-
32,34.0,34
|
35 |
-
33,10.0,10
|
36 |
-
34,10.0,10
|
37 |
-
35,10.0,10
|
38 |
-
36,34.0,34
|
39 |
-
37,35.0,35
|
40 |
-
38,32.0,32
|
41 |
-
39,37.0,37
|
42 |
-
40,29.0,29
|
43 |
-
41,52.0,52
|
44 |
-
42,54.0,54
|
45 |
-
43,90.0,90
|
46 |
-
44,91.0,91
|
47 |
-
45,51.0,51
|
48 |
-
46,101.0,101
|
49 |
-
47,67.0,67
|
50 |
-
48,103.0,103
|
51 |
-
49,45.0,45
|
52 |
-
50,137.0,137
|
53 |
-
51,47.0,47
|
54 |
-
52,89.0,89
|
55 |
-
53,95.0,95
|
56 |
-
54,55.0,55
|
57 |
-
55,92.0,92
|
58 |
-
56,155.0,155
|
59 |
-
57,125.0,125
|
60 |
-
58,152.0,152
|
61 |
-
59,199.0,199
|
62 |
-
60,88.0,88
|
63 |
-
61,200.0,200
|
64 |
-
62,176.0,176
|
65 |
-
63,200.0,200
|
66 |
-
64,200.0,200
|
67 |
-
65,193.0,193
|
68 |
-
66,200.0,200
|
69 |
-
67,200.0,200
|
70 |
-
68,200.0,200
|
71 |
-
69,200.0,200
|
72 |
-
70,200.0,200
|
73 |
-
71,200.0,200
|
74 |
-
72,200.0,200
|
75 |
-
73,200.0,200
|
76 |
-
74,200.0,200
|
77 |
-
75,200.0,200
|
78 |
-
76,200.0,200
|
79 |
-
77,200.0,200
|
80 |
-
78,200.0,200
|
81 |
-
79,200.0,200
|
82 |
-
80,200.0,200
|
83 |
-
81,200.0,200
|
84 |
-
82,200.0,200
|
85 |
-
83,200.0,200
|
86 |
-
84,200.0,200
|
87 |
-
85,200.0,200
|
88 |
-
86,200.0,200
|
89 |
-
87,200.0,200
|
90 |
-
88,200.0,200
|
91 |
-
89,200.0,200
|
92 |
-
90,200.0,200
|
93 |
-
91,200.0,200
|
94 |
-
92,200.0,200
|
95 |
-
93,200.0,200
|
96 |
-
94,200.0,200
|
97 |
-
95,200.0,200
|
98 |
-
96,200.0,200
|
99 |
-
97,200.0,200
|
100 |
-
98,200.0,200
|
101 |
-
99,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
2023-04-07 17:08:53 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-07 17:08:53 - r - INFO: - ================================================================================
|
3 |
-
2023-04-07 17:08:53 - r - INFO: - Name Value Type
|
4 |
-
2023-04-07 17:08:53 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-07 17:08:53 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-07 17:08:53 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-07 17:08:53 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-07 17:08:53 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-07 17:08:53 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
10 |
-
2023-04-07 17:08:53 - r - INFO: - mode train <class 'str'>
|
11 |
-
2023-04-07 17:08:53 - r - INFO: - mp_backend mp <class 'str'>
|
12 |
-
2023-04-07 17:08:53 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-07 17:08:53 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-07 17:08:53 - r - INFO: - train_eps 200 <class 'int'>
|
15 |
-
2023-04-07 17:08:53 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-07 17:08:53 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-07 17:08:53 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-07 17:08:53 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-07 17:08:53 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
20 |
-
2023-04-07 17:08:53 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
21 |
-
2023-04-07 17:08:53 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-07 17:08:53 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-07 17:08:53 - r - INFO: - n_workers 4 <class 'int'>
|
24 |
-
2023-04-07 17:08:53 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-07 17:08:53 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-07 17:08:53 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-07 17:08:53 - r - INFO: - gamma 0.99 <class 'float'>
|
28 |
-
2023-04-07 17:08:53 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-07 17:08:53 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-07 17:08:53 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-07 17:08:53 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-07 17:08:53 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-07 17:08:53 - r - INFO: - hidden_dim 256 <class 'int'>
|
34 |
-
2023-04-07 17:08:53 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853 <class 'str'>
|
35 |
-
2023-04-07 17:08:53 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/results <class 'str'>
|
36 |
-
2023-04-07 17:08:53 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/logs <class 'str'>
|
37 |
-
2023-04-07 17:08:53 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/traj <class 'str'>
|
38 |
-
2023-04-07 17:08:53 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/videos <class 'str'>
|
39 |
-
2023-04-07 17:08:53 - r - INFO: - ================================================================================
|
40 |
-
2023-04-07 17:08:53 - r - INFO: - n_states: 4, n_actions: 2
|
41 |
-
2023-04-07 17:08:53 - r - INFO: - Start training!
|
42 |
-
2023-04-07 17:08:53 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
43 |
-
2023-04-07 17:10:11 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:87aab291b33d6423c3c54eced436183398700a290427c1913be9d65f5503b5ae
|
3 |
-
size 537607
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png
DELETED
Binary file (55.3 kB)
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv
DELETED
@@ -1,202 +0,0 @@
|
|
1 |
-
episodes,rewards
|
2 |
-
0,17.0
|
3 |
-
1,16.0
|
4 |
-
2,24.0
|
5 |
-
3,38.0
|
6 |
-
4,18.0
|
7 |
-
5,18.0
|
8 |
-
6,19.0
|
9 |
-
7,17.0
|
10 |
-
8,27.0
|
11 |
-
9,16.0
|
12 |
-
10,15.0
|
13 |
-
11,31.0
|
14 |
-
12,33.0
|
15 |
-
13,13.0
|
16 |
-
14,9.0
|
17 |
-
15,34.0
|
18 |
-
16,32.0
|
19 |
-
17,34.0
|
20 |
-
18,12.0
|
21 |
-
19,13.0
|
22 |
-
20,11.0
|
23 |
-
21,9.0
|
24 |
-
22,9.0
|
25 |
-
23,11.0
|
26 |
-
24,38.0
|
27 |
-
25,13.0
|
28 |
-
26,20.0
|
29 |
-
27,10.0
|
30 |
-
28,33.0
|
31 |
-
29,15.0
|
32 |
-
30,32.0
|
33 |
-
31,11.0
|
34 |
-
32,13.0
|
35 |
-
33,12.0
|
36 |
-
34,9.0
|
37 |
-
35,13.0
|
38 |
-
36,14.0
|
39 |
-
37,21.0
|
40 |
-
38,48.0
|
41 |
-
39,19.0
|
42 |
-
40,65.0
|
43 |
-
41,28.0
|
44 |
-
42,15.0
|
45 |
-
43,11.0
|
46 |
-
44,54.0
|
47 |
-
45,26.0
|
48 |
-
46,37.0
|
49 |
-
47,40.0
|
50 |
-
48,54.0
|
51 |
-
49,54.0
|
52 |
-
50,50.0
|
53 |
-
51,84.0
|
54 |
-
52,55.0
|
55 |
-
53,43.0
|
56 |
-
54,45.0
|
57 |
-
55,48.0
|
58 |
-
56,88.0
|
59 |
-
57,41.0
|
60 |
-
58,46.0
|
61 |
-
59,61.0
|
62 |
-
60,32.0
|
63 |
-
61,53.0
|
64 |
-
62,59.0
|
65 |
-
63,49.0
|
66 |
-
64,60.0
|
67 |
-
65,35.0
|
68 |
-
66,82.0
|
69 |
-
67,50.0
|
70 |
-
68,108.0
|
71 |
-
69,121.0
|
72 |
-
70,113.0
|
73 |
-
71,67.0
|
74 |
-
72,87.0
|
75 |
-
73,96.0
|
76 |
-
74,181.0
|
77 |
-
75,62.0
|
78 |
-
76,137.0
|
79 |
-
77,175.0
|
80 |
-
78,123.0
|
81 |
-
79,149.0
|
82 |
-
80,172.0
|
83 |
-
81,200.0
|
84 |
-
82,156.0
|
85 |
-
83,146.0
|
86 |
-
84,200.0
|
87 |
-
85,200.0
|
88 |
-
86,200.0
|
89 |
-
87,128.0
|
90 |
-
88,188.0
|
91 |
-
89,200.0
|
92 |
-
90,200.0
|
93 |
-
91,200.0
|
94 |
-
92,200.0
|
95 |
-
93,200.0
|
96 |
-
94,200.0
|
97 |
-
95,200.0
|
98 |
-
96,200.0
|
99 |
-
97,200.0
|
100 |
-
98,195.0
|
101 |
-
99,200.0
|
102 |
-
100,200.0
|
103 |
-
101,196.0
|
104 |
-
102,200.0
|
105 |
-
103,200.0
|
106 |
-
104,192.0
|
107 |
-
105,200.0
|
108 |
-
106,190.0
|
109 |
-
107,200.0
|
110 |
-
108,200.0
|
111 |
-
109,200.0
|
112 |
-
110,197.0
|
113 |
-
111,200.0
|
114 |
-
112,200.0
|
115 |
-
113,200.0
|
116 |
-
114,200.0
|
117 |
-
115,200.0
|
118 |
-
116,200.0
|
119 |
-
117,200.0
|
120 |
-
118,200.0
|
121 |
-
119,200.0
|
122 |
-
120,200.0
|
123 |
-
121,200.0
|
124 |
-
122,200.0
|
125 |
-
123,200.0
|
126 |
-
124,200.0
|
127 |
-
125,200.0
|
128 |
-
126,200.0
|
129 |
-
127,200.0
|
130 |
-
128,200.0
|
131 |
-
129,200.0
|
132 |
-
130,200.0
|
133 |
-
131,200.0
|
134 |
-
132,200.0
|
135 |
-
133,200.0
|
136 |
-
134,200.0
|
137 |
-
135,200.0
|
138 |
-
136,200.0
|
139 |
-
137,197.0
|
140 |
-
138,200.0
|
141 |
-
139,200.0
|
142 |
-
140,200.0
|
143 |
-
141,200.0
|
144 |
-
142,200.0
|
145 |
-
143,200.0
|
146 |
-
144,21.0
|
147 |
-
145,193.0
|
148 |
-
146,123.0
|
149 |
-
147,194.0
|
150 |
-
148,9.0
|
151 |
-
149,9.0
|
152 |
-
150,48.0
|
153 |
-
151,200.0
|
154 |
-
152,200.0
|
155 |
-
153,200.0
|
156 |
-
154,200.0
|
157 |
-
155,200.0
|
158 |
-
156,200.0
|
159 |
-
157,200.0
|
160 |
-
158,200.0
|
161 |
-
159,200.0
|
162 |
-
160,200.0
|
163 |
-
161,200.0
|
164 |
-
162,200.0
|
165 |
-
163,200.0
|
166 |
-
164,200.0
|
167 |
-
165,200.0
|
168 |
-
166,200.0
|
169 |
-
167,200.0
|
170 |
-
168,200.0
|
171 |
-
169,200.0
|
172 |
-
170,200.0
|
173 |
-
171,200.0
|
174 |
-
172,200.0
|
175 |
-
173,200.0
|
176 |
-
174,200.0
|
177 |
-
175,200.0
|
178 |
-
176,200.0
|
179 |
-
177,200.0
|
180 |
-
178,200.0
|
181 |
-
179,200.0
|
182 |
-
180,200.0
|
183 |
-
181,200.0
|
184 |
-
182,200.0
|
185 |
-
183,200.0
|
186 |
-
184,200.0
|
187 |
-
185,200.0
|
188 |
-
186,200.0
|
189 |
-
187,200.0
|
190 |
-
188,200.0
|
191 |
-
189,200.0
|
192 |
-
190,200.0
|
193 |
-
191,200.0
|
194 |
-
192,200.0
|
195 |
-
193,200.0
|
196 |
-
194,200.0
|
197 |
-
195,200.0
|
198 |
-
196,200.0
|
199 |
-
197,200.0
|
200 |
-
198,200.0
|
201 |
-
199,200.0
|
202 |
-
200,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
2023-04-07 15:32:36 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-07 15:32:36 - r - INFO: - ================================================================================
|
3 |
-
2023-04-07 15:32:36 - r - INFO: - Name Value Type
|
4 |
-
2023-04-07 15:32:36 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-07 15:32:36 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-07 15:32:36 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-07 15:32:36 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-07 15:32:36 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-07 15:32:36 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
10 |
-
2023-04-07 15:32:36 - r - INFO: - mode train <class 'str'>
|
11 |
-
2023-04-07 15:32:36 - r - INFO: - mp_backend ray <class 'str'>
|
12 |
-
2023-04-07 15:32:36 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-07 15:32:36 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-07 15:32:36 - r - INFO: - train_eps 400 <class 'int'>
|
15 |
-
2023-04-07 15:32:36 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-07 15:32:36 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-07 15:32:36 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-07 15:32:36 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-07 15:32:36 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
20 |
-
2023-04-07 15:32:36 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
21 |
-
2023-04-07 15:32:36 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-07 15:32:36 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-07 15:32:36 - r - INFO: - n_workers 2 <class 'int'>
|
24 |
-
2023-04-07 15:32:36 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-07 15:32:36 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-07 15:32:36 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-07 15:32:36 - r - INFO: - gamma 0.99 <class 'float'>
|
28 |
-
2023-04-07 15:32:36 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-07 15:32:36 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-07 15:32:36 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-07 15:32:36 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-07 15:32:36 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-07 15:32:36 - r - INFO: - hidden_dim 256 <class 'int'>
|
34 |
-
2023-04-07 15:32:36 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236 <class 'str'>
|
35 |
-
2023-04-07 15:32:36 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/results <class 'str'>
|
36 |
-
2023-04-07 15:32:36 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/logs <class 'str'>
|
37 |
-
2023-04-07 15:32:36 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/traj <class 'str'>
|
38 |
-
2023-04-07 15:32:36 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/videos <class 'str'>
|
39 |
-
2023-04-07 15:32:36 - r - INFO: - ================================================================================
|
40 |
-
2023-04-07 15:32:39 - r - INFO: - n_states: 4, n_actions: 2
|
41 |
-
2023-04-07 15:32:39 - r - INFO: - Start training!
|
42 |
-
2023-04-07 15:32:39 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
43 |
-
2023-04-07 15:40:31 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:03f1262598e3d636dd22e3b2fc0dfe52bf7a55348d54f51f02a8410682ec5a18
|
3 |
-
size 537607
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png
DELETED
Binary file (62.6 kB)
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv
DELETED
@@ -1,401 +0,0 @@
|
|
1 |
-
episodes,rewards
|
2 |
-
0,18.0
|
3 |
-
1,18.0
|
4 |
-
2,19.0
|
5 |
-
3,28.0
|
6 |
-
4,17.0
|
7 |
-
5,15.0
|
8 |
-
6,13.0
|
9 |
-
7,15.0
|
10 |
-
8,38.0
|
11 |
-
9,31.0
|
12 |
-
10,11.0
|
13 |
-
11,31.0
|
14 |
-
12,10.0
|
15 |
-
13,17.0
|
16 |
-
14,14.0
|
17 |
-
15,13.0
|
18 |
-
16,21.0
|
19 |
-
17,15.0
|
20 |
-
18,9.0
|
21 |
-
19,10.0
|
22 |
-
20,22.0
|
23 |
-
21,19.0
|
24 |
-
22,11.0
|
25 |
-
23,13.0
|
26 |
-
24,20.0
|
27 |
-
25,15.0
|
28 |
-
26,14.0
|
29 |
-
27,12.0
|
30 |
-
28,10.0
|
31 |
-
29,11.0
|
32 |
-
30,12.0
|
33 |
-
31,14.0
|
34 |
-
32,9.0
|
35 |
-
33,10.0
|
36 |
-
34,16.0
|
37 |
-
35,13.0
|
38 |
-
36,15.0
|
39 |
-
37,12.0
|
40 |
-
38,14.0
|
41 |
-
39,10.0
|
42 |
-
40,14.0
|
43 |
-
41,10.0
|
44 |
-
42,11.0
|
45 |
-
43,16.0
|
46 |
-
44,16.0
|
47 |
-
45,12.0
|
48 |
-
46,15.0
|
49 |
-
47,19.0
|
50 |
-
48,15.0
|
51 |
-
49,20.0
|
52 |
-
50,15.0
|
53 |
-
51,11.0
|
54 |
-
52,13.0
|
55 |
-
53,12.0
|
56 |
-
54,12.0
|
57 |
-
55,12.0
|
58 |
-
56,12.0
|
59 |
-
57,12.0
|
60 |
-
58,11.0
|
61 |
-
59,10.0
|
62 |
-
60,13.0
|
63 |
-
61,11.0
|
64 |
-
62,12.0
|
65 |
-
63,9.0
|
66 |
-
64,11.0
|
67 |
-
65,11.0
|
68 |
-
66,10.0
|
69 |
-
67,9.0
|
70 |
-
68,11.0
|
71 |
-
69,11.0
|
72 |
-
70,11.0
|
73 |
-
71,12.0
|
74 |
-
72,10.0
|
75 |
-
73,12.0
|
76 |
-
74,9.0
|
77 |
-
75,10.0
|
78 |
-
76,9.0
|
79 |
-
77,10.0
|
80 |
-
78,9.0
|
81 |
-
79,10.0
|
82 |
-
80,11.0
|
83 |
-
81,9.0
|
84 |
-
82,12.0
|
85 |
-
83,11.0
|
86 |
-
84,12.0
|
87 |
-
85,10.0
|
88 |
-
86,9.0
|
89 |
-
87,11.0
|
90 |
-
88,9.0
|
91 |
-
89,9.0
|
92 |
-
90,10.0
|
93 |
-
91,15.0
|
94 |
-
92,11.0
|
95 |
-
93,9.0
|
96 |
-
94,10.0
|
97 |
-
95,16.0
|
98 |
-
96,13.0
|
99 |
-
97,9.0
|
100 |
-
98,10.0
|
101 |
-
99,10.0
|
102 |
-
100,13.0
|
103 |
-
101,11.0
|
104 |
-
102,10.0
|
105 |
-
103,9.0
|
106 |
-
104,13.0
|
107 |
-
105,16.0
|
108 |
-
106,12.0
|
109 |
-
107,9.0
|
110 |
-
108,11.0
|
111 |
-
109,9.0
|
112 |
-
110,13.0
|
113 |
-
111,11.0
|
114 |
-
112,18.0
|
115 |
-
113,13.0
|
116 |
-
114,9.0
|
117 |
-
115,12.0
|
118 |
-
116,10.0
|
119 |
-
117,10.0
|
120 |
-
118,10.0
|
121 |
-
119,13.0
|
122 |
-
120,10.0
|
123 |
-
121,11.0
|
124 |
-
122,10.0
|
125 |
-
123,10.0
|
126 |
-
124,9.0
|
127 |
-
125,10.0
|
128 |
-
126,11.0
|
129 |
-
127,14.0
|
130 |
-
128,12.0
|
131 |
-
129,9.0
|
132 |
-
130,11.0
|
133 |
-
131,14.0
|
134 |
-
132,11.0
|
135 |
-
133,10.0
|
136 |
-
134,13.0
|
137 |
-
135,9.0
|
138 |
-
136,11.0
|
139 |
-
137,11.0
|
140 |
-
138,11.0
|
141 |
-
139,9.0
|
142 |
-
140,10.0
|
143 |
-
141,9.0
|
144 |
-
142,9.0
|
145 |
-
143,12.0
|
146 |
-
144,9.0
|
147 |
-
145,10.0
|
148 |
-
146,9.0
|
149 |
-
147,10.0
|
150 |
-
148,9.0
|
151 |
-
149,10.0
|
152 |
-
150,9.0
|
153 |
-
151,12.0
|
154 |
-
152,9.0
|
155 |
-
153,9.0
|
156 |
-
154,10.0
|
157 |
-
155,9.0
|
158 |
-
156,10.0
|
159 |
-
157,13.0
|
160 |
-
158,14.0
|
161 |
-
159,10.0
|
162 |
-
160,12.0
|
163 |
-
161,11.0
|
164 |
-
162,10.0
|
165 |
-
163,11.0
|
166 |
-
164,11.0
|
167 |
-
165,9.0
|
168 |
-
166,31.0
|
169 |
-
167,39.0
|
170 |
-
168,18.0
|
171 |
-
169,24.0
|
172 |
-
170,18.0
|
173 |
-
171,18.0
|
174 |
-
172,24.0
|
175 |
-
173,16.0
|
176 |
-
174,25.0
|
177 |
-
175,23.0
|
178 |
-
176,26.0
|
179 |
-
177,23.0
|
180 |
-
178,26.0
|
181 |
-
179,21.0
|
182 |
-
180,28.0
|
183 |
-
181,20.0
|
184 |
-
182,22.0
|
185 |
-
183,30.0
|
186 |
-
184,27.0
|
187 |
-
185,34.0
|
188 |
-
186,31.0
|
189 |
-
187,39.0
|
190 |
-
188,29.0
|
191 |
-
189,29.0
|
192 |
-
190,37.0
|
193 |
-
191,27.0
|
194 |
-
192,36.0
|
195 |
-
193,34.0
|
196 |
-
194,46.0
|
197 |
-
195,35.0
|
198 |
-
196,52.0
|
199 |
-
197,32.0
|
200 |
-
198,30.0
|
201 |
-
199,69.0
|
202 |
-
200,38.0
|
203 |
-
201,39.0
|
204 |
-
202,57.0
|
205 |
-
203,38.0
|
206 |
-
204,68.0
|
207 |
-
205,47.0
|
208 |
-
206,45.0
|
209 |
-
207,63.0
|
210 |
-
208,47.0
|
211 |
-
209,86.0
|
212 |
-
210,67.0
|
213 |
-
211,60.0
|
214 |
-
212,48.0
|
215 |
-
213,55.0
|
216 |
-
214,95.0
|
217 |
-
215,58.0
|
218 |
-
216,70.0
|
219 |
-
217,58.0
|
220 |
-
218,42.0
|
221 |
-
219,69.0
|
222 |
-
220,47.0
|
223 |
-
221,109.0
|
224 |
-
222,70.0
|
225 |
-
223,80.0
|
226 |
-
224,77.0
|
227 |
-
225,61.0
|
228 |
-
226,72.0
|
229 |
-
227,55.0
|
230 |
-
228,77.0
|
231 |
-
229,61.0
|
232 |
-
230,79.0
|
233 |
-
231,66.0
|
234 |
-
232,68.0
|
235 |
-
233,99.0
|
236 |
-
234,143.0
|
237 |
-
235,82.0
|
238 |
-
236,85.0
|
239 |
-
237,103.0
|
240 |
-
238,99.0
|
241 |
-
239,93.0
|
242 |
-
240,100.0
|
243 |
-
241,101.0
|
244 |
-
242,151.0
|
245 |
-
243,195.0
|
246 |
-
244,100.0
|
247 |
-
245,99.0
|
248 |
-
246,127.0
|
249 |
-
247,105.0
|
250 |
-
248,127.0
|
251 |
-
249,142.0
|
252 |
-
250,169.0
|
253 |
-
251,108.0
|
254 |
-
252,128.0
|
255 |
-
253,123.0
|
256 |
-
254,134.0
|
257 |
-
255,126.0
|
258 |
-
256,114.0
|
259 |
-
257,200.0
|
260 |
-
258,123.0
|
261 |
-
259,159.0
|
262 |
-
260,125.0
|
263 |
-
261,142.0
|
264 |
-
262,178.0
|
265 |
-
263,96.0
|
266 |
-
264,200.0
|
267 |
-
265,200.0
|
268 |
-
266,113.0
|
269 |
-
267,90.0
|
270 |
-
268,200.0
|
271 |
-
269,122.0
|
272 |
-
270,140.0
|
273 |
-
271,116.0
|
274 |
-
272,128.0
|
275 |
-
273,190.0
|
276 |
-
274,170.0
|
277 |
-
275,96.0
|
278 |
-
276,126.0
|
279 |
-
277,200.0
|
280 |
-
278,88.0
|
281 |
-
279,76.0
|
282 |
-
280,74.0
|
283 |
-
281,84.0
|
284 |
-
282,130.0
|
285 |
-
283,200.0
|
286 |
-
284,86.0
|
287 |
-
285,153.0
|
288 |
-
286,200.0
|
289 |
-
287,59.0
|
290 |
-
288,135.0
|
291 |
-
289,62.0
|
292 |
-
290,200.0
|
293 |
-
291,182.0
|
294 |
-
292,138.0
|
295 |
-
293,200.0
|
296 |
-
294,118.0
|
297 |
-
295,50.0
|
298 |
-
296,74.0
|
299 |
-
297,62.0
|
300 |
-
298,200.0
|
301 |
-
299,124.0
|
302 |
-
300,111.0
|
303 |
-
301,61.0
|
304 |
-
302,132.0
|
305 |
-
303,200.0
|
306 |
-
304,80.0
|
307 |
-
305,60.0
|
308 |
-
306,77.0
|
309 |
-
307,47.0
|
310 |
-
308,80.0
|
311 |
-
309,64.0
|
312 |
-
310,96.0
|
313 |
-
311,200.0
|
314 |
-
312,200.0
|
315 |
-
313,133.0
|
316 |
-
314,200.0
|
317 |
-
315,188.0
|
318 |
-
316,132.0
|
319 |
-
317,150.0
|
320 |
-
318,135.0
|
321 |
-
319,184.0
|
322 |
-
320,138.0
|
323 |
-
321,176.0
|
324 |
-
322,200.0
|
325 |
-
323,161.0
|
326 |
-
324,158.0
|
327 |
-
325,142.0
|
328 |
-
326,133.0
|
329 |
-
327,151.0
|
330 |
-
328,143.0
|
331 |
-
329,160.0
|
332 |
-
330,150.0
|
333 |
-
331,134.0
|
334 |
-
332,147.0
|
335 |
-
333,132.0
|
336 |
-
334,143.0
|
337 |
-
335,137.0
|
338 |
-
336,155.0
|
339 |
-
337,138.0
|
340 |
-
338,138.0
|
341 |
-
339,130.0
|
342 |
-
340,148.0
|
343 |
-
341,146.0
|
344 |
-
342,152.0
|
345 |
-
343,135.0
|
346 |
-
344,175.0
|
347 |
-
345,153.0
|
348 |
-
346,155.0
|
349 |
-
347,131.0
|
350 |
-
348,156.0
|
351 |
-
349,138.0
|
352 |
-
350,151.0
|
353 |
-
351,162.0
|
354 |
-
352,200.0
|
355 |
-
353,175.0
|
356 |
-
354,156.0
|
357 |
-
355,145.0
|
358 |
-
356,168.0
|
359 |
-
357,200.0
|
360 |
-
358,181.0
|
361 |
-
359,145.0
|
362 |
-
360,189.0
|
363 |
-
361,200.0
|
364 |
-
362,144.0
|
365 |
-
363,200.0
|
366 |
-
364,178.0
|
367 |
-
365,200.0
|
368 |
-
366,179.0
|
369 |
-
367,200.0
|
370 |
-
368,177.0
|
371 |
-
369,200.0
|
372 |
-
370,185.0
|
373 |
-
371,195.0
|
374 |
-
372,200.0
|
375 |
-
373,200.0
|
376 |
-
374,190.0
|
377 |
-
375,200.0
|
378 |
-
376,200.0
|
379 |
-
377,200.0
|
380 |
-
378,200.0
|
381 |
-
379,200.0
|
382 |
-
380,200.0
|
383 |
-
381,200.0
|
384 |
-
382,170.0
|
385 |
-
383,173.0
|
386 |
-
384,162.0
|
387 |
-
385,162.0
|
388 |
-
386,149.0
|
389 |
-
387,173.0
|
390 |
-
388,200.0
|
391 |
-
389,200.0
|
392 |
-
390,200.0
|
393 |
-
391,156.0
|
394 |
-
392,157.0
|
395 |
-
393,169.0
|
396 |
-
394,182.0
|
397 |
-
395,154.0
|
398 |
-
396,200.0
|
399 |
-
397,200.0
|
400 |
-
398,200.0
|
401 |
-
399,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/{Train_CartPole-v1_DuelingDQN_ray_20230407-153236 → Train_ray_CartPole-v1_DuelingDQN_20230517-224129}/config.yaml
RENAMED
@@ -1,47 +1,44 @@
|
|
1 |
general_cfg:
|
2 |
algo_name: DuelingDQN
|
|
|
3 |
device: cpu
|
4 |
-
env_name:
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
load_checkpoint: false
|
8 |
-
|
9 |
-
|
|
|
|
|
10 |
mode: train
|
|
|
11 |
mp_backend: ray
|
12 |
n_workers: 2
|
13 |
-
|
14 |
-
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 400
|
21 |
-
wrapper: null
|
22 |
algo_cfg:
|
23 |
batch_size: 64
|
24 |
buffer_size: 100000
|
|
|
|
|
25 |
epsilon_decay: 500
|
26 |
epsilon_end: 0.01
|
27 |
epsilon_start: 0.95
|
28 |
-
gamma: 0.
|
29 |
-
hidden_dim: 256
|
30 |
lr: 0.0001
|
31 |
target_update: 4
|
32 |
value_layers:
|
33 |
- activation: relu
|
34 |
layer_dim:
|
35 |
-
- n_states
|
36 |
- 256
|
37 |
layer_type: linear
|
38 |
- activation: relu
|
39 |
layer_dim:
|
40 |
- 256
|
41 |
-
- 256
|
42 |
-
layer_type: linear
|
43 |
-
- activation: none
|
44 |
-
layer_dim:
|
45 |
-
- 256
|
46 |
-
- n_actions
|
47 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
general_cfg:
|
2 |
algo_name: DuelingDQN
|
3 |
+
collect_traj: false
|
4 |
device: cpu
|
5 |
+
env_name: gym
|
|
|
|
|
6 |
load_checkpoint: false
|
7 |
+
load_model_step: best
|
8 |
+
load_path: Train_single_CartPole-v1_DQN_20230515-211721
|
9 |
+
max_episode: 100
|
10 |
+
max_step: 200
|
11 |
mode: train
|
12 |
+
model_save_fre: 500
|
13 |
mp_backend: ray
|
14 |
n_workers: 2
|
15 |
+
online_eval: true
|
16 |
+
online_eval_episode: 10
|
|
|
|
|
17 |
seed: 1
|
|
|
|
|
|
|
|
|
18 |
algo_cfg:
|
19 |
batch_size: 64
|
20 |
buffer_size: 100000
|
21 |
+
buffer_type: REPLAY_QUE
|
22 |
+
dueling: true
|
23 |
epsilon_decay: 500
|
24 |
epsilon_end: 0.01
|
25 |
epsilon_start: 0.95
|
26 |
+
gamma: 0.95
|
|
|
27 |
lr: 0.0001
|
28 |
target_update: 4
|
29 |
value_layers:
|
30 |
- activation: relu
|
31 |
layer_dim:
|
|
|
32 |
- 256
|
33 |
layer_type: linear
|
34 |
- activation: relu
|
35 |
layer_dim:
|
36 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
layer_type: linear
|
38 |
+
env_cfg:
|
39 |
+
id: CartPole-v1
|
40 |
+
ignore_params:
|
41 |
+
- wrapper
|
42 |
+
- ignore_params
|
43 |
+
render_mode: null
|
44 |
+
wrapper: null
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - General Configs:
|
2 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
3 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
|
4 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - algo_name DuelingDQN <class 'str'>
|
6 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - mode train <class 'str'>
|
7 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - device cpu <class 'str'>
|
8 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - seed 1 <class 'int'>
|
9 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - max_episode 100 <class 'int'>
|
10 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
11 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
12 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - mp_backend ray <class 'str'>
|
13 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
14 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
17 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
|
19 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
20 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
21 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Algo Configs:
|
22 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
23 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
|
24 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - dueling 1 <class 'bool'>
|
25 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
26 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
27 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
28 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
|
29 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
30 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
31 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
32 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
33 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
34 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
35 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
36 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Env Configs:
|
37 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
38 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
|
39 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
40 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - render_mode None <class 'str'>
|
41 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - wrapper None <class 'str'>
|
42 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
43 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
44 |
+
2023-05-17 22:41:35 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
45 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps
|
46 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 0 with reward 23.0 in 23 steps
|
47 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 2 with reward 10.0 in 10 steps
|
48 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 3 with reward 9.0 in 9 steps
|
49 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 2 with reward 29.0 in 29 steps
|
50 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 4 with reward 11.0 in 11 steps
|
51 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 6 with reward 15.0 in 15 steps
|
52 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 18.0 in 18 steps
|
53 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 7 with reward 9.0 in 9 steps
|
54 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps
|
55 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 10 with reward 13.0 in 13 steps
|
56 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 9 with reward 25.0 in 25 steps
|
57 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 12.0 in 12 steps
|
58 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 13 with reward 10.0 in 10 steps
|
59 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 33.0 in 33 steps
|
60 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 14 with reward 9.0 in 9 steps
|
61 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 15 with reward 10.0 in 10 steps
|
62 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 16 with reward 13.0 in 13 steps
|
63 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 17 with reward 16.0 in 16 steps
|
64 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 18 with reward 9.0 in 9 steps
|
65 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 20 with reward 11.0 in 11 steps
|
66 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 19 with reward 16.0 in 16 steps
|
67 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 18.0 in 18 steps
|
68 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 22 with reward 18.0 in 18 steps
|
69 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 23 with reward 11.0 in 11 steps
|
70 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 24 with reward 9.0 in 9 steps
|
71 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 26 with reward 9.0 in 9 steps
|
72 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps
|
73 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 11.0 in 11 steps
|
74 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 27 with reward 12.0 in 12 steps
|
75 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 15.0 in 15 steps
|
76 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 30 with reward 19.0 in 19 steps
|
77 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 31 with reward 10.0 in 10 steps
|
78 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 32 with reward 13.0 in 13 steps
|
79 |
+
2023-05-17 22:41:44 - RayLog - INFO: - update_step: 500, online_eval_reward: 200.000
|
80 |
+
2023-05-17 22:41:44 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
81 |
+
2023-05-17 22:41:45 - RayLog - INFO: - Worker 0 finished episode 33 with reward 97.0 in 97 steps
|
82 |
+
2023-05-17 22:41:45 - RayLog - INFO: - Worker 1 finished episode 34 with reward 96.0 in 96 steps
|
83 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 36 with reward 24.0 in 24 steps
|
84 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 35 with reward 34.0 in 34 steps
|
85 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 37 with reward 17.0 in 17 steps
|
86 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 38 with reward 23.0 in 23 steps
|
87 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 39 with reward 16.0 in 16 steps
|
88 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 41 with reward 17.0 in 17 steps
|
89 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 40 with reward 24.0 in 24 steps
|
90 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 42 with reward 21.0 in 21 steps
|
91 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 43 with reward 29.0 in 29 steps
|
92 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 44 with reward 22.0 in 22 steps
|
93 |
+
2023-05-17 22:41:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 100.000
|
94 |
+
2023-05-17 22:41:49 - RayLog - INFO: - Worker 0 finished episode 45 with reward 84.0 in 84 steps
|
95 |
+
2023-05-17 22:41:49 - RayLog - INFO: - Worker 1 finished episode 46 with reward 75.0 in 75 steps
|
96 |
+
2023-05-17 22:41:50 - RayLog - INFO: - Worker 1 finished episode 48 with reward 52.0 in 52 steps
|
97 |
+
2023-05-17 22:41:50 - RayLog - INFO: - Worker 0 finished episode 47 with reward 66.0 in 66 steps
|
98 |
+
2023-05-17 22:41:51 - RayLog - INFO: - Worker 1 finished episode 49 with reward 63.0 in 63 steps
|
99 |
+
2023-05-17 22:41:52 - RayLog - INFO: - Worker 0 finished episode 50 with reward 94.0 in 94 steps
|
100 |
+
2023-05-17 22:41:53 - RayLog - INFO: - Worker 1 finished episode 51 with reward 75.0 in 75 steps
|
101 |
+
2023-05-17 22:41:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 120.000
|
102 |
+
2023-05-17 22:41:54 - RayLog - INFO: - Worker 0 finished episode 52 with reward 102.0 in 102 steps
|
103 |
+
2023-05-17 22:41:55 - RayLog - INFO: - Worker 1 finished episode 53 with reward 93.0 in 93 steps
|
104 |
+
2023-05-17 22:41:57 - RayLog - INFO: - Worker 1 finished episode 55 with reward 126.0 in 126 steps
|
105 |
+
2023-05-17 22:41:58 - RayLog - INFO: - Worker 0 finished episode 54 with reward 200.0 in 200 steps
|
106 |
+
2023-05-17 22:41:59 - RayLog - INFO: - update_step: 2000, online_eval_reward: 200.000
|
107 |
+
2023-05-17 22:42:01 - RayLog - INFO: - Worker 1 finished episode 56 with reward 200.0 in 200 steps
|
108 |
+
2023-05-17 22:42:02 - RayLog - INFO: - Worker 0 finished episode 57 with reward 200.0 in 200 steps
|
109 |
+
2023-05-17 22:42:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 167.000
|
110 |
+
2023-05-17 22:42:05 - RayLog - INFO: - Worker 1 finished episode 58 with reward 200.0 in 200 steps
|
111 |
+
2023-05-17 22:42:06 - RayLog - INFO: - Worker 0 finished episode 59 with reward 168.0 in 168 steps
|
112 |
+
2023-05-17 22:42:09 - RayLog - INFO: - Worker 0 finished episode 61 with reward 164.0 in 164 steps
|
113 |
+
2023-05-17 22:42:09 - RayLog - INFO: - update_step: 3000, online_eval_reward: 145.000
|
114 |
+
2023-05-17 22:42:09 - RayLog - INFO: - Worker 1 finished episode 60 with reward 189.0 in 189 steps
|
115 |
+
2023-05-17 22:42:12 - RayLog - INFO: - Worker 0 finished episode 62 with reward 152.0 in 152 steps
|
116 |
+
2023-05-17 22:42:12 - RayLog - INFO: - Worker 1 finished episode 63 with reward 162.0 in 162 steps
|
117 |
+
2023-05-17 22:42:14 - RayLog - INFO: - update_step: 3500, online_eval_reward: 151.000
|
118 |
+
2023-05-17 22:42:15 - RayLog - INFO: - Worker 0 finished episode 64 with reward 143.0 in 143 steps
|
119 |
+
2023-05-17 22:42:16 - RayLog - INFO: - Worker 1 finished episode 65 with reward 163.0 in 163 steps
|
120 |
+
2023-05-17 22:42:19 - RayLog - INFO: - Worker 0 finished episode 66 with reward 187.0 in 187 steps
|
121 |
+
2023-05-17 22:42:19 - RayLog - INFO: - update_step: 4000, online_eval_reward: 189.000
|
122 |
+
2023-05-17 22:42:20 - RayLog - INFO: - Worker 1 finished episode 67 with reward 200.0 in 200 steps
|
123 |
+
2023-05-17 22:42:22 - RayLog - INFO: - Worker 0 finished episode 68 with reward 173.0 in 173 steps
|
124 |
+
2023-05-17 22:42:23 - RayLog - INFO: - Worker 1 finished episode 69 with reward 170.0 in 170 steps
|
125 |
+
2023-05-17 22:42:24 - RayLog - INFO: - update_step: 4500, online_eval_reward: 178.000
|
126 |
+
2023-05-17 22:42:26 - RayLog - INFO: - Worker 0 finished episode 70 with reward 200.0 in 200 steps
|
127 |
+
2023-05-17 22:42:27 - RayLog - INFO: - Worker 1 finished episode 71 with reward 200.0 in 200 steps
|
128 |
+
2023-05-17 22:42:30 - RayLog - INFO: - update_step: 5000, online_eval_reward: 197.000
|
129 |
+
2023-05-17 22:42:30 - RayLog - INFO: - Worker 0 finished episode 72 with reward 200.0 in 200 steps
|
130 |
+
2023-05-17 22:42:31 - RayLog - INFO: - Worker 1 finished episode 73 with reward 200.0 in 200 steps
|
131 |
+
2023-05-17 22:42:35 - RayLog - INFO: - Worker 0 finished episode 74 with reward 197.0 in 197 steps
|
132 |
+
2023-05-17 22:42:35 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000
|
133 |
+
2023-05-17 22:42:36 - RayLog - INFO: - Worker 1 finished episode 75 with reward 200.0 in 200 steps
|
134 |
+
2023-05-17 22:42:39 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps
|
135 |
+
2023-05-17 22:42:40 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps
|
136 |
+
2023-05-17 22:42:40 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000
|
137 |
+
2023-05-17 22:42:43 - RayLog - INFO: - Worker 0 finished episode 78 with reward 200.0 in 200 steps
|
138 |
+
2023-05-17 22:42:44 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps
|
139 |
+
2023-05-17 22:42:45 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000
|
140 |
+
2023-05-17 22:42:47 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps
|
141 |
+
2023-05-17 22:42:48 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps
|
142 |
+
2023-05-17 22:42:51 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000
|
143 |
+
2023-05-17 22:42:52 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps
|
144 |
+
2023-05-17 22:42:53 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps
|
145 |
+
2023-05-17 22:42:56 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps
|
146 |
+
2023-05-17 22:42:56 - RayLog - INFO: - update_step: 7500, online_eval_reward: 200.000
|
147 |
+
2023-05-17 22:42:57 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps
|
148 |
+
2023-05-17 22:43:00 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps
|
149 |
+
2023-05-17 22:43:01 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps
|
150 |
+
2023-05-17 22:43:02 - RayLog - INFO: - update_step: 8000, online_eval_reward: 200.000
|
151 |
+
2023-05-17 22:43:05 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps
|
152 |
+
2023-05-17 22:43:06 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps
|
153 |
+
2023-05-17 22:43:07 - RayLog - INFO: - update_step: 8500, online_eval_reward: 200.000
|
154 |
+
2023-05-17 22:43:09 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps
|
155 |
+
2023-05-17 22:43:10 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps
|
156 |
+
2023-05-17 22:43:12 - RayLog - INFO: - update_step: 9000, online_eval_reward: 200.000
|
157 |
+
2023-05-17 22:43:13 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps
|
158 |
+
2023-05-17 22:43:14 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps
|
159 |
+
2023-05-17 22:43:18 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps
|
160 |
+
2023-05-17 22:43:18 - RayLog - INFO: - update_step: 9500, online_eval_reward: 200.000
|
161 |
+
2023-05-17 22:43:19 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps
|
162 |
+
2023-05-17 22:43:22 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps
|
163 |
+
2023-05-17 22:43:23 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps
|
164 |
+
2023-05-17 22:43:23 - RayLog - INFO: - update_step: 10000, online_eval_reward: 200.000
|
165 |
+
2023-05-17 22:43:26 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps
|
166 |
+
2023-05-17 22:43:27 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps
|
167 |
+
2023-05-17 22:43:29 - RayLog - INFO: - update_step: 10500, online_eval_reward: 200.000
|
168 |
+
2023-05-17 22:43:30 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps
|
169 |
+
2023-05-17 22:43:32 - SimpleLog - INFO: - Finish training! total time consumed: 122.69s
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best
ADDED
Binary file (548 kB). View file
|
|
CartPole-v1/{Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c9b4a566642bacd5610c3e7b42d10f1feb9704e2a4cb2c004a7d85f75a0aba9
|
3 |
+
size 40
|
CartPole-v1/{Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9867609ac2d205c8c66fe7bc380a67b26f152a046fb5e97d523f5b2bf1c147fd
|
3 |
+
size 10028
|