johnjim0816
commited on
Commit
β’
ccb908b
1
Parent(s):
989b5fc
update CartPole-v1 DoubleDQN
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml +0 -40
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt +0 -14
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt +0 -52
- CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml +0 -46
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt +0 -52
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv +0 -11
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410 β Test_single_CartPole-v1_DoubleDQN_20230516-115305}/config.yaml +20 -23
- CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt +55 -0
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth β Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0} +2 -2
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt β Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1} +2 -2
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml +0 -40
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt +0 -116
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv +0 -101
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt +0 -42
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth +0 -3
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv +0 -402
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt +0 -42
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt +0 -3
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv +0 -401
- CartPole-v1/{Train_CartPole-v1_DoubleDQN_ray_20230406-162938 β Train_ray_CartPole-v1_DoubleDQN_20230516-115126}/config.yaml +17 -20
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt +157 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best +0 -0
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth β Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0} +2 -2
- CartPole-v1/{Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth β Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0} +2 -2
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1 +3 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1 +3 -0
- CartPole-v1/{Train_CartPole-v1_DoubleDQN_mp_20230406-160028 β Train_single_CartPole-v1_DoubleDQN_20230516-114540}/config.yaml +18 -21
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt +162 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000 +0 -0
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DoubleDQN
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_DoubleDQN_20221122-125516
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
save_fig: true
|
12 |
-
seed: 1
|
13 |
-
show_fig: false
|
14 |
-
test_eps: 10
|
15 |
-
train_eps: 100
|
16 |
-
algo_cfg:
|
17 |
-
batch_size: 64
|
18 |
-
buffer_size: 100000
|
19 |
-
epsilon_decay: 500
|
20 |
-
epsilon_end: 0.01
|
21 |
-
epsilon_start: 0.95
|
22 |
-
gamma: 0.99
|
23 |
-
lr: 0.0001
|
24 |
-
target_update: 4
|
25 |
-
value_layers:
|
26 |
-
- activation: relu
|
27 |
-
layer_dim:
|
28 |
-
- n_states
|
29 |
-
- 256
|
30 |
-
layer_type: linear
|
31 |
-
- activation: relu
|
32 |
-
layer_dim:
|
33 |
-
- 256
|
34 |
-
- 256
|
35 |
-
layer_type: linear
|
36 |
-
- activation: none
|
37 |
-
layer_dim:
|
38 |
-
- 256
|
39 |
-
- n_actions
|
40 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
2022-11-22 12:56:12 - r - INFO: - n_states: 4, n_actions: 2
|
2 |
-
2022-11-22 12:56:14 - r - INFO: - Start testing!
|
3 |
-
2022-11-22 12:56:14 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda
|
4 |
-
2022-11-22 12:56:14 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
5 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
6 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
7 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
8 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 5/10, Reward: 138.000, Step: 138
|
9 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
10 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
11 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
12 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
13 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
14 |
-
2022-11-22 12:56:15 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png
DELETED
Binary file (31.4 kB)
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,138.0,138
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
2023-04-06 16:04:10 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-06 16:04:10 - r - INFO: - ================================================================================
|
3 |
-
2023-04-06 16:04:10 - r - INFO: - Name Value Type
|
4 |
-
2023-04-06 16:04:10 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-06 16:04:10 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-06 16:04:10 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-06 16:04:10 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-06 16:04:10 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-06 16:04:10 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
10 |
-
2023-04-06 16:04:10 - r - INFO: - mode test <class 'str'>
|
11 |
-
2023-04-06 16:04:10 - r - INFO: - mp_backend mp <class 'str'>
|
12 |
-
2023-04-06 16:04:10 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-06 16:04:10 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-06 16:04:10 - r - INFO: - train_eps 400 <class 'int'>
|
15 |
-
2023-04-06 16:04:10 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-06 16:04:10 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-06 16:04:10 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-06 16:04:10 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-06 16:04:10 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
20 |
-
2023-04-06 16:04:10 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_mp_20230406-160028 <class 'str'>
|
21 |
-
2023-04-06 16:04:10 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-06 16:04:10 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-06 16:04:10 - r - INFO: - n_workers 1 <class 'int'>
|
24 |
-
2023-04-06 16:04:10 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-06 16:04:10 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-06 16:04:10 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-06 16:04:10 - r - INFO: - gamma 0.95 <class 'float'>
|
28 |
-
2023-04-06 16:04:10 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-06 16:04:10 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-06 16:04:10 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-06 16:04:10 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-06 16:04:10 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-06 16:04:10 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410 <class 'str'>
|
34 |
-
2023-04-06 16:04:10 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/results <class 'str'>
|
35 |
-
2023-04-06 16:04:10 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/logs <class 'str'>
|
36 |
-
2023-04-06 16:04:10 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/traj <class 'str'>
|
37 |
-
2023-04-06 16:04:10 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/videos <class 'str'>
|
38 |
-
2023-04-06 16:04:10 - r - INFO: - ================================================================================
|
39 |
-
2023-04-06 16:04:10 - r - INFO: - n_states: 4, n_actions: 2
|
40 |
-
2023-04-06 16:04:10 - r - INFO: - Start testing!
|
41 |
-
2023-04-06 16:04:10 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
42 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
43 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
44 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
45 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
46 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
47 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
48 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
49 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
50 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
51 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 10/10, Reward: 198.000, Step: 198
|
52 |
-
2023-04-06 16:04:10 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png
DELETED
Binary file (34.7 kB)
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,198.0,198
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DoubleDQN
|
3 |
-
device: cpu
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: true
|
8 |
-
load_path: Train_CartPole-v1_DoubleDQN_ray_20230406-162938
|
9 |
-
max_steps: 200
|
10 |
-
mode: test
|
11 |
-
mp_backend: ray
|
12 |
-
n_workers: 1
|
13 |
-
new_step_api: true
|
14 |
-
render: false
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
-
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 400
|
21 |
-
wrapper: null
|
22 |
-
algo_cfg:
|
23 |
-
batch_size: 64
|
24 |
-
buffer_size: 100000
|
25 |
-
epsilon_decay: 500
|
26 |
-
epsilon_end: 0.01
|
27 |
-
epsilon_start: 0.95
|
28 |
-
gamma: 0.95
|
29 |
-
lr: 0.0001
|
30 |
-
target_update: 4
|
31 |
-
value_layers:
|
32 |
-
- activation: relu
|
33 |
-
layer_dim:
|
34 |
-
- n_states
|
35 |
-
- 256
|
36 |
-
layer_type: linear
|
37 |
-
- activation: relu
|
38 |
-
layer_dim:
|
39 |
-
- 256
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: none
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- n_actions
|
46 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
2023-04-06 17:03:48 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-06 17:03:48 - r - INFO: - ================================================================================
|
3 |
-
2023-04-06 17:03:48 - r - INFO: - Name Value Type
|
4 |
-
2023-04-06 17:03:48 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-06 17:03:48 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-06 17:03:48 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-06 17:03:48 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-06 17:03:48 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-06 17:03:48 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
10 |
-
2023-04-06 17:03:48 - r - INFO: - mode test <class 'str'>
|
11 |
-
2023-04-06 17:03:48 - r - INFO: - mp_backend ray <class 'str'>
|
12 |
-
2023-04-06 17:03:48 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-06 17:03:48 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-06 17:03:48 - r - INFO: - train_eps 400 <class 'int'>
|
15 |
-
2023-04-06 17:03:48 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-06 17:03:48 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-06 17:03:48 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-06 17:03:48 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-06 17:03:48 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
20 |
-
2023-04-06 17:03:48 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_ray_20230406-162938 <class 'str'>
|
21 |
-
2023-04-06 17:03:48 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-06 17:03:48 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-06 17:03:48 - r - INFO: - n_workers 1 <class 'int'>
|
24 |
-
2023-04-06 17:03:48 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-06 17:03:48 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-06 17:03:48 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-06 17:03:48 - r - INFO: - gamma 0.95 <class 'float'>
|
28 |
-
2023-04-06 17:03:48 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-06 17:03:48 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-06 17:03:48 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-06 17:03:48 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-06 17:03:48 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-06 17:03:48 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348 <class 'str'>
|
34 |
-
2023-04-06 17:03:48 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/results <class 'str'>
|
35 |
-
2023-04-06 17:03:48 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/logs <class 'str'>
|
36 |
-
2023-04-06 17:03:48 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/traj <class 'str'>
|
37 |
-
2023-04-06 17:03:48 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/videos <class 'str'>
|
38 |
-
2023-04-06 17:03:48 - r - INFO: - ================================================================================
|
39 |
-
2023-04-06 17:03:48 - r - INFO: - n_states: 4, n_actions: 2
|
40 |
-
2023-04-06 17:03:48 - r - INFO: - Start testing!
|
41 |
-
2023-04-06 17:03:48 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
42 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
43 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
44 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
45 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
46 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
47 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
48 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
49 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
50 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
51 |
-
2023-04-06 17:03:49 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
52 |
-
2023-04-06 17:03:49 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png
DELETED
Binary file (27.7 kB)
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,200.0,200
|
3 |
-
1,200.0,200
|
4 |
-
2,200.0,200
|
5 |
-
3,200.0,200
|
6 |
-
4,200.0,200
|
7 |
-
5,200.0,200
|
8 |
-
6,200.0,200
|
9 |
-
7,200.0,200
|
10 |
-
8,200.0,200
|
11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410 β Test_single_CartPole-v1_DoubleDQN_20230516-115305}/config.yaml
RENAMED
@@ -1,46 +1,43 @@
|
|
1 |
general_cfg:
|
2 |
algo_name: DoubleDQN
|
|
|
3 |
device: cpu
|
4 |
-
env_name:
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
load_checkpoint: true
|
8 |
-
|
9 |
-
|
|
|
|
|
10 |
mode: test
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
save_fig: true
|
17 |
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 400
|
21 |
-
wrapper: null
|
22 |
algo_cfg:
|
23 |
batch_size: 64
|
24 |
buffer_size: 100000
|
|
|
25 |
epsilon_decay: 500
|
26 |
epsilon_end: 0.01
|
27 |
epsilon_start: 0.95
|
28 |
-
gamma: 0.
|
29 |
lr: 0.0001
|
30 |
target_update: 4
|
31 |
value_layers:
|
32 |
- activation: relu
|
33 |
layer_dim:
|
34 |
-
- n_states
|
35 |
- 256
|
36 |
layer_type: linear
|
37 |
- activation: relu
|
38 |
layer_dim:
|
39 |
- 256
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: none
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- n_actions
|
46 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
general_cfg:
|
2 |
algo_name: DoubleDQN
|
3 |
+
collect_traj: false
|
4 |
device: cpu
|
5 |
+
env_name: gym
|
|
|
|
|
6 |
load_checkpoint: true
|
7 |
+
load_model_step: best
|
8 |
+
load_path: Train_single_CartPole-v1_DoubleDQN_20230516-114540
|
9 |
+
max_episode: 10
|
10 |
+
max_step: 200
|
11 |
mode: test
|
12 |
+
model_save_fre: 500
|
13 |
+
mp_backend: single
|
14 |
+
n_workers: 2
|
15 |
+
online_eval: true
|
16 |
+
online_eval_episode: 10
|
|
|
17 |
seed: 1
|
|
|
|
|
|
|
|
|
18 |
algo_cfg:
|
19 |
batch_size: 64
|
20 |
buffer_size: 100000
|
21 |
+
buffer_type: REPLAY_QUE
|
22 |
epsilon_decay: 500
|
23 |
epsilon_end: 0.01
|
24 |
epsilon_start: 0.95
|
25 |
+
gamma: 0.99
|
26 |
lr: 0.0001
|
27 |
target_update: 4
|
28 |
value_layers:
|
29 |
- activation: relu
|
30 |
layer_dim:
|
|
|
31 |
- 256
|
32 |
layer_type: linear
|
33 |
- activation: relu
|
34 |
layer_dim:
|
35 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
layer_type: linear
|
37 |
+
env_cfg:
|
38 |
+
id: CartPole-v1
|
39 |
+
ignore_params:
|
40 |
+
- wrapper
|
41 |
+
- ignore_params
|
42 |
+
render_mode: null
|
43 |
+
wrapper: null
|
CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - General Configs:
|
2 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
3 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
|
4 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
|
6 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - mode test <class 'str'>
|
7 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - device cpu <class 'str'>
|
8 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - seed 1 <class 'int'>
|
9 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - max_episode 10 <class 'int'>
|
10 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
11 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
12 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
13 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
14 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
17 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
|
18 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DoubleDQN_20230516-114540 <class 'str'>
|
19 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
20 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
21 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Algo Configs:
|
22 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
23 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
|
24 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
28 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
29 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
30 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
31 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
32 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
33 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
34 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
35 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Env Configs:
|
36 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
37 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
|
38 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
39 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - render_mode None <class 'str'>
|
40 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - wrapper None <class 'str'>
|
41 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
42 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
43 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
44 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Start testing!
|
45 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
|
46 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
|
47 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
|
48 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
|
49 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
|
50 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
|
51 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
|
52 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
|
53 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
|
54 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
|
55 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Finish testing! total time consumed: 0.24s
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth β Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:350321a3436f2c600f7c9a0f8ba02ba28a6ad9c6e949481d6926ca5daf32d79e
|
3 |
+
size 1056
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt β Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25447bc5c9337e3d33f456f66eb8230e83dcc359ad3630edde9c63f21baefd4d
|
3 |
+
size 40
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
general_cfg:
|
2 |
-
algo_name: DoubleDQN
|
3 |
-
device: cuda
|
4 |
-
env_name: CartPole-v1
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
-
load_checkpoint: false
|
8 |
-
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
-
max_steps: 200
|
10 |
-
mode: train
|
11 |
-
save_fig: true
|
12 |
-
seed: 1
|
13 |
-
show_fig: false
|
14 |
-
test_eps: 10
|
15 |
-
train_eps: 100
|
16 |
-
algo_cfg:
|
17 |
-
batch_size: 64
|
18 |
-
buffer_size: 100000
|
19 |
-
epsilon_decay: 500
|
20 |
-
epsilon_end: 0.01
|
21 |
-
epsilon_start: 0.95
|
22 |
-
gamma: 0.99
|
23 |
-
lr: 0.0001
|
24 |
-
target_update: 4
|
25 |
-
value_layers:
|
26 |
-
- activation: relu
|
27 |
-
layer_dim:
|
28 |
-
- n_states
|
29 |
-
- 256
|
30 |
-
layer_type: linear
|
31 |
-
- activation: relu
|
32 |
-
layer_dim:
|
33 |
-
- 256
|
34 |
-
- 256
|
35 |
-
layer_type: linear
|
36 |
-
- activation: none
|
37 |
-
layer_dim:
|
38 |
-
- 256
|
39 |
-
- n_actions
|
40 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt
DELETED
@@ -1,116 +0,0 @@
|
|
1 |
-
2022-11-22 12:55:16 - r - INFO: - n_states: 4, n_actions: 2
|
2 |
-
2022-11-22 12:55:19 - r - INFO: - Start training!
|
3 |
-
2022-11-22 12:55:19 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda
|
4 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18
|
5 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35
|
6 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13
|
7 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32
|
8 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16
|
9 |
-
2022-11-22 12:55:19 - r - INFO: - Current episode 5 has the best eval reward: 9.100
|
10 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9
|
11 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12
|
12 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16
|
13 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14
|
14 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12
|
15 |
-
2022-11-22 12:55:19 - r - INFO: - Current episode 10 has the best eval reward: 9.200
|
16 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13
|
17 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14
|
18 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19
|
19 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9
|
20 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15
|
21 |
-
2022-11-22 12:55:19 - r - INFO: - Current episode 15 has the best eval reward: 9.300
|
22 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12
|
23 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11
|
24 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9
|
25 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13
|
26 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17
|
27 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 21/100, Reward: 13.000, Step: 13
|
28 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 22/100, Reward: 15.000, Step: 15
|
29 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 23/100, Reward: 22.000, Step: 22
|
30 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 24/100, Reward: 26.000, Step: 26
|
31 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 25/100, Reward: 19.000, Step: 19
|
32 |
-
2022-11-22 12:55:20 - r - INFO: - Current episode 25 has the best eval reward: 9.800
|
33 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 26/100, Reward: 10.000, Step: 10
|
34 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 27/100, Reward: 10.000, Step: 10
|
35 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 28/100, Reward: 11.000, Step: 11
|
36 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 29/100, Reward: 13.000, Step: 13
|
37 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 30/100, Reward: 16.000, Step: 16
|
38 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13
|
39 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 32/100, Reward: 15.000, Step: 15
|
40 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 33/100, Reward: 12.000, Step: 12
|
41 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 34/100, Reward: 13.000, Step: 13
|
42 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 35/100, Reward: 13.000, Step: 13
|
43 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 36/100, Reward: 11.000, Step: 11
|
44 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 37/100, Reward: 9.000, Step: 9
|
45 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 38/100, Reward: 9.000, Step: 9
|
46 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 39/100, Reward: 10.000, Step: 10
|
47 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 40/100, Reward: 14.000, Step: 14
|
48 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 41/100, Reward: 9.000, Step: 9
|
49 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 42/100, Reward: 10.000, Step: 10
|
50 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 43/100, Reward: 9.000, Step: 9
|
51 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 44/100, Reward: 14.000, Step: 14
|
52 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 45/100, Reward: 10.000, Step: 10
|
53 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 46/100, Reward: 19.000, Step: 19
|
54 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 47/100, Reward: 10.000, Step: 10
|
55 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 48/100, Reward: 14.000, Step: 14
|
56 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 49/100, Reward: 18.000, Step: 18
|
57 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 50/100, Reward: 32.000, Step: 32
|
58 |
-
2022-11-22 12:55:20 - r - INFO: - Current episode 50 has the best eval reward: 24.300
|
59 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 51/100, Reward: 17.000, Step: 17
|
60 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 52/100, Reward: 15.000, Step: 15
|
61 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 53/100, Reward: 18.000, Step: 18
|
62 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 54/100, Reward: 14.000, Step: 14
|
63 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 55/100, Reward: 22.000, Step: 22
|
64 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 56/100, Reward: 14.000, Step: 14
|
65 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 57/100, Reward: 21.000, Step: 21
|
66 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 58/100, Reward: 21.000, Step: 21
|
67 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 59/100, Reward: 23.000, Step: 23
|
68 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 60/100, Reward: 21.000, Step: 21
|
69 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 61/100, Reward: 21.000, Step: 21
|
70 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 62/100, Reward: 35.000, Step: 35
|
71 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 63/100, Reward: 23.000, Step: 23
|
72 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 64/100, Reward: 27.000, Step: 27
|
73 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 65/100, Reward: 24.000, Step: 24
|
74 |
-
2022-11-22 12:55:21 - r - INFO: - Current episode 65 has the best eval reward: 29.700
|
75 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 66/100, Reward: 28.000, Step: 28
|
76 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 67/100, Reward: 30.000, Step: 30
|
77 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 68/100, Reward: 33.000, Step: 33
|
78 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 69/100, Reward: 33.000, Step: 33
|
79 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 70/100, Reward: 26.000, Step: 26
|
80 |
-
2022-11-22 12:55:22 - r - INFO: - Current episode 70 has the best eval reward: 34.400
|
81 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 71/100, Reward: 37.000, Step: 37
|
82 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 72/100, Reward: 28.000, Step: 28
|
83 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 73/100, Reward: 30.000, Step: 30
|
84 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 74/100, Reward: 41.000, Step: 41
|
85 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 75/100, Reward: 45.000, Step: 45
|
86 |
-
2022-11-22 12:55:22 - r - INFO: - Current episode 75 has the best eval reward: 35.600
|
87 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 76/100, Reward: 68.000, Step: 68
|
88 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 77/100, Reward: 33.000, Step: 33
|
89 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 78/100, Reward: 46.000, Step: 46
|
90 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 79/100, Reward: 54.000, Step: 54
|
91 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 80/100, Reward: 37.000, Step: 37
|
92 |
-
2022-11-22 12:55:23 - r - INFO: - Current episode 80 has the best eval reward: 42.800
|
93 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 81/100, Reward: 43.000, Step: 43
|
94 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 82/100, Reward: 79.000, Step: 79
|
95 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 83/100, Reward: 36.000, Step: 36
|
96 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 84/100, Reward: 58.000, Step: 58
|
97 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 85/100, Reward: 42.000, Step: 42
|
98 |
-
2022-11-22 12:55:24 - r - INFO: - Current episode 85 has the best eval reward: 62.100
|
99 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 86/100, Reward: 136.000, Step: 136
|
100 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 87/100, Reward: 57.000, Step: 57
|
101 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 88/100, Reward: 46.000, Step: 46
|
102 |
-
2022-11-22 12:55:25 - r - INFO: - Episode: 89/100, Reward: 105.000, Step: 105
|
103 |
-
2022-11-22 12:55:25 - r - INFO: - Episode: 90/100, Reward: 63.000, Step: 63
|
104 |
-
2022-11-22 12:55:25 - r - INFO: - Current episode 90 has the best eval reward: 76.600
|
105 |
-
2022-11-22 12:55:25 - r - INFO: - Episode: 91/100, Reward: 84.000, Step: 84
|
106 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 92/100, Reward: 136.000, Step: 136
|
107 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 93/100, Reward: 121.000, Step: 121
|
108 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 94/100, Reward: 96.000, Step: 96
|
109 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 95/100, Reward: 106.000, Step: 106
|
110 |
-
2022-11-22 12:55:27 - r - INFO: - Current episode 95 has the best eval reward: 187.300
|
111 |
-
2022-11-22 12:55:27 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200
|
112 |
-
2022-11-22 12:55:28 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200
|
113 |
-
2022-11-22 12:55:28 - r - INFO: - Episode: 98/100, Reward: 113.000, Step: 113
|
114 |
-
2022-11-22 12:55:28 - r - INFO: - Episode: 99/100, Reward: 113.000, Step: 113
|
115 |
-
2022-11-22 12:55:29 - r - INFO: - Episode: 100/100, Reward: 132.000, Step: 132
|
116 |
-
2022-11-22 12:55:29 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png
DELETED
Binary file (47.3 kB)
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv
DELETED
@@ -1,101 +0,0 @@
|
|
1 |
-
episodes,rewards,steps
|
2 |
-
0,18.0,18
|
3 |
-
1,35.0,35
|
4 |
-
2,13.0,13
|
5 |
-
3,32.0,32
|
6 |
-
4,16.0,16
|
7 |
-
5,9.0,9
|
8 |
-
6,12.0,12
|
9 |
-
7,16.0,16
|
10 |
-
8,14.0,14
|
11 |
-
9,12.0,12
|
12 |
-
10,13.0,13
|
13 |
-
11,14.0,14
|
14 |
-
12,19.0,19
|
15 |
-
13,9.0,9
|
16 |
-
14,15.0,15
|
17 |
-
15,12.0,12
|
18 |
-
16,11.0,11
|
19 |
-
17,9.0,9
|
20 |
-
18,13.0,13
|
21 |
-
19,17.0,17
|
22 |
-
20,13.0,13
|
23 |
-
21,15.0,15
|
24 |
-
22,22.0,22
|
25 |
-
23,26.0,26
|
26 |
-
24,19.0,19
|
27 |
-
25,10.0,10
|
28 |
-
26,10.0,10
|
29 |
-
27,11.0,11
|
30 |
-
28,13.0,13
|
31 |
-
29,16.0,16
|
32 |
-
30,13.0,13
|
33 |
-
31,15.0,15
|
34 |
-
32,12.0,12
|
35 |
-
33,13.0,13
|
36 |
-
34,13.0,13
|
37 |
-
35,11.0,11
|
38 |
-
36,9.0,9
|
39 |
-
37,9.0,9
|
40 |
-
38,10.0,10
|
41 |
-
39,14.0,14
|
42 |
-
40,9.0,9
|
43 |
-
41,10.0,10
|
44 |
-
42,9.0,9
|
45 |
-
43,14.0,14
|
46 |
-
44,10.0,10
|
47 |
-
45,19.0,19
|
48 |
-
46,10.0,10
|
49 |
-
47,14.0,14
|
50 |
-
48,18.0,18
|
51 |
-
49,32.0,32
|
52 |
-
50,17.0,17
|
53 |
-
51,15.0,15
|
54 |
-
52,18.0,18
|
55 |
-
53,14.0,14
|
56 |
-
54,22.0,22
|
57 |
-
55,14.0,14
|
58 |
-
56,21.0,21
|
59 |
-
57,21.0,21
|
60 |
-
58,23.0,23
|
61 |
-
59,21.0,21
|
62 |
-
60,21.0,21
|
63 |
-
61,35.0,35
|
64 |
-
62,23.0,23
|
65 |
-
63,27.0,27
|
66 |
-
64,24.0,24
|
67 |
-
65,28.0,28
|
68 |
-
66,30.0,30
|
69 |
-
67,33.0,33
|
70 |
-
68,33.0,33
|
71 |
-
69,26.0,26
|
72 |
-
70,37.0,37
|
73 |
-
71,28.0,28
|
74 |
-
72,30.0,30
|
75 |
-
73,41.0,41
|
76 |
-
74,45.0,45
|
77 |
-
75,68.0,68
|
78 |
-
76,33.0,33
|
79 |
-
77,46.0,46
|
80 |
-
78,54.0,54
|
81 |
-
79,37.0,37
|
82 |
-
80,43.0,43
|
83 |
-
81,79.0,79
|
84 |
-
82,36.0,36
|
85 |
-
83,58.0,58
|
86 |
-
84,42.0,42
|
87 |
-
85,136.0,136
|
88 |
-
86,57.0,57
|
89 |
-
87,46.0,46
|
90 |
-
88,105.0,105
|
91 |
-
89,63.0,63
|
92 |
-
90,84.0,84
|
93 |
-
91,136.0,136
|
94 |
-
92,121.0,121
|
95 |
-
93,96.0,96
|
96 |
-
94,106.0,106
|
97 |
-
95,200.0,200
|
98 |
-
96,200.0,200
|
99 |
-
97,113.0,113
|
100 |
-
98,113.0,113
|
101 |
-
99,132.0,132
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
2023-04-06 16:00:28 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-06 16:00:28 - r - INFO: - ================================================================================
|
3 |
-
2023-04-06 16:00:28 - r - INFO: - Name Value Type
|
4 |
-
2023-04-06 16:00:28 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-06 16:00:28 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-06 16:00:28 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-06 16:00:28 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-06 16:00:28 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-06 16:00:28 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
10 |
-
2023-04-06 16:00:28 - r - INFO: - mode train <class 'str'>
|
11 |
-
2023-04-06 16:00:28 - r - INFO: - mp_backend mp <class 'str'>
|
12 |
-
2023-04-06 16:00:28 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-06 16:00:28 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-06 16:00:28 - r - INFO: - train_eps 400 <class 'int'>
|
15 |
-
2023-04-06 16:00:28 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-06 16:00:28 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-06 16:00:28 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-06 16:00:28 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-06 16:00:28 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
20 |
-
2023-04-06 16:00:28 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
21 |
-
2023-04-06 16:00:28 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-06 16:00:28 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-06 16:00:28 - r - INFO: - n_workers 2 <class 'int'>
|
24 |
-
2023-04-06 16:00:28 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-06 16:00:28 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-06 16:00:28 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-06 16:00:28 - r - INFO: - gamma 0.95 <class 'float'>
|
28 |
-
2023-04-06 16:00:28 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-06 16:00:28 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-06 16:00:28 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-06 16:00:28 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-06 16:00:28 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-06 16:00:28 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028 <class 'str'>
|
34 |
-
2023-04-06 16:00:28 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/results <class 'str'>
|
35 |
-
2023-04-06 16:00:28 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/logs <class 'str'>
|
36 |
-
2023-04-06 16:00:28 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/traj <class 'str'>
|
37 |
-
2023-04-06 16:00:28 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/videos <class 'str'>
|
38 |
-
2023-04-06 16:00:28 - r - INFO: - ================================================================================
|
39 |
-
2023-04-06 16:00:28 - r - INFO: - n_states: 4, n_actions: 2
|
40 |
-
2023-04-06 16:00:28 - r - INFO: - Start training!
|
41 |
-
2023-04-06 16:00:28 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
42 |
-
2023-04-06 16:01:56 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4cd537f6ab4ac0ff27caa323076685e3a3fff04b064dbdfc509baae76e9a9406
|
3 |
-
size 272407
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png
DELETED
Binary file (44.9 kB)
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv
DELETED
@@ -1,402 +0,0 @@
|
|
1 |
-
episodes,rewards
|
2 |
-
0,18.0
|
3 |
-
1,39.0
|
4 |
-
2,18.0
|
5 |
-
3,28.0
|
6 |
-
4,15.0
|
7 |
-
5,12.0
|
8 |
-
6,15.0
|
9 |
-
7,40.0
|
10 |
-
8,31.0
|
11 |
-
9,17.0
|
12 |
-
10,17.0
|
13 |
-
11,31.0
|
14 |
-
12,13.0
|
15 |
-
13,14.0
|
16 |
-
14,15.0
|
17 |
-
15,21.0
|
18 |
-
16,9.0
|
19 |
-
17,10.0
|
20 |
-
18,22.0
|
21 |
-
19,19.0
|
22 |
-
20,11.0
|
23 |
-
21,13.0
|
24 |
-
22,15.0
|
25 |
-
23,20.0
|
26 |
-
24,14.0
|
27 |
-
25,12.0
|
28 |
-
26,11.0
|
29 |
-
27,10.0
|
30 |
-
28,14.0
|
31 |
-
29,12.0
|
32 |
-
30,9.0
|
33 |
-
31,10.0
|
34 |
-
32,16.0
|
35 |
-
33,13.0
|
36 |
-
34,12.0
|
37 |
-
35,15.0
|
38 |
-
36,10.0
|
39 |
-
37,17.0
|
40 |
-
38,10.0
|
41 |
-
39,10.0
|
42 |
-
40,13.0
|
43 |
-
41,12.0
|
44 |
-
42,10.0
|
45 |
-
43,12.0
|
46 |
-
44,12.0
|
47 |
-
45,12.0
|
48 |
-
46,9.0
|
49 |
-
47,10.0
|
50 |
-
48,10.0
|
51 |
-
49,11.0
|
52 |
-
50,9.0
|
53 |
-
51,13.0
|
54 |
-
52,9.0
|
55 |
-
53,10.0
|
56 |
-
54,10.0
|
57 |
-
55,11.0
|
58 |
-
56,10.0
|
59 |
-
57,10.0
|
60 |
-
58,14.0
|
61 |
-
59,9.0
|
62 |
-
60,10.0
|
63 |
-
61,10.0
|
64 |
-
62,9.0
|
65 |
-
63,12.0
|
66 |
-
64,10.0
|
67 |
-
65,13.0
|
68 |
-
66,12.0
|
69 |
-
67,15.0
|
70 |
-
68,10.0
|
71 |
-
69,13.0
|
72 |
-
70,14.0
|
73 |
-
71,10.0
|
74 |
-
72,30.0
|
75 |
-
73,11.0
|
76 |
-
74,9.0
|
77 |
-
75,11.0
|
78 |
-
76,9.0
|
79 |
-
77,11.0
|
80 |
-
78,9.0
|
81 |
-
79,11.0
|
82 |
-
80,16.0
|
83 |
-
81,21.0
|
84 |
-
82,10.0
|
85 |
-
83,9.0
|
86 |
-
84,14.0
|
87 |
-
85,9.0
|
88 |
-
86,13.0
|
89 |
-
87,9.0
|
90 |
-
88,13.0
|
91 |
-
89,17.0
|
92 |
-
90,26.0
|
93 |
-
91,32.0
|
94 |
-
92,14.0
|
95 |
-
93,17.0
|
96 |
-
94,11.0
|
97 |
-
95,34.0
|
98 |
-
96,10.0
|
99 |
-
97,23.0
|
100 |
-
98,14.0
|
101 |
-
99,37.0
|
102 |
-
100,27.0
|
103 |
-
101,34.0
|
104 |
-
102,23.0
|
105 |
-
103,59.0
|
106 |
-
104,9.0
|
107 |
-
105,91.0
|
108 |
-
106,61.0
|
109 |
-
107,47.0
|
110 |
-
108,21.0
|
111 |
-
109,27.0
|
112 |
-
110,22.0
|
113 |
-
111,30.0
|
114 |
-
112,22.0
|
115 |
-
113,29.0
|
116 |
-
114,25.0
|
117 |
-
115,68.0
|
118 |
-
116,102.0
|
119 |
-
117,54.0
|
120 |
-
118,46.0
|
121 |
-
119,34.0
|
122 |
-
120,61.0
|
123 |
-
121,81.0
|
124 |
-
122,55.0
|
125 |
-
123,67.0
|
126 |
-
124,71.0
|
127 |
-
125,46.0
|
128 |
-
126,88.0
|
129 |
-
127,90.0
|
130 |
-
128,68.0
|
131 |
-
129,114.0
|
132 |
-
130,66.0
|
133 |
-
131,102.0
|
134 |
-
132,100.0
|
135 |
-
133,88.0
|
136 |
-
134,80.0
|
137 |
-
135,81.0
|
138 |
-
136,49.0
|
139 |
-
137,123.0
|
140 |
-
138,197.0
|
141 |
-
139,146.0
|
142 |
-
140,93.0
|
143 |
-
141,135.0
|
144 |
-
142,117.0
|
145 |
-
143,104.0
|
146 |
-
144,168.0
|
147 |
-
145,114.0
|
148 |
-
146,82.0
|
149 |
-
147,153.0
|
150 |
-
148,106.0
|
151 |
-
149,140.0
|
152 |
-
150,100.0
|
153 |
-
151,120.0
|
154 |
-
152,117.0
|
155 |
-
153,173.0
|
156 |
-
154,200.0
|
157 |
-
155,142.0
|
158 |
-
156,180.0
|
159 |
-
157,156.0
|
160 |
-
158,149.0
|
161 |
-
159,173.0
|
162 |
-
160,187.0
|
163 |
-
161,200.0
|
164 |
-
162,188.0
|
165 |
-
163,156.0
|
166 |
-
164,170.0
|
167 |
-
165,158.0
|
168 |
-
166,200.0
|
169 |
-
167,152.0
|
170 |
-
168,194.0
|
171 |
-
169,196.0
|
172 |
-
170,189.0
|
173 |
-
171,200.0
|
174 |
-
172,173.0
|
175 |
-
173,200.0
|
176 |
-
174,154.0
|
177 |
-
175,200.0
|
178 |
-
176,200.0
|
179 |
-
177,200.0
|
180 |
-
178,189.0
|
181 |
-
179,194.0
|
182 |
-
180,199.0
|
183 |
-
181,200.0
|
184 |
-
182,200.0
|
185 |
-
183,189.0
|
186 |
-
184,200.0
|
187 |
-
185,200.0
|
188 |
-
186,200.0
|
189 |
-
187,200.0
|
190 |
-
188,200.0
|
191 |
-
189,200.0
|
192 |
-
190,200.0
|
193 |
-
191,200.0
|
194 |
-
192,200.0
|
195 |
-
193,200.0
|
196 |
-
194,200.0
|
197 |
-
195,189.0
|
198 |
-
196,198.0
|
199 |
-
197,195.0
|
200 |
-
198,199.0
|
201 |
-
199,200.0
|
202 |
-
200,200.0
|
203 |
-
201,200.0
|
204 |
-
202,198.0
|
205 |
-
203,196.0
|
206 |
-
204,200.0
|
207 |
-
205,200.0
|
208 |
-
206,200.0
|
209 |
-
207,200.0
|
210 |
-
208,200.0
|
211 |
-
209,200.0
|
212 |
-
210,195.0
|
213 |
-
211,198.0
|
214 |
-
212,200.0
|
215 |
-
213,200.0
|
216 |
-
214,200.0
|
217 |
-
215,200.0
|
218 |
-
216,200.0
|
219 |
-
217,194.0
|
220 |
-
218,200.0
|
221 |
-
219,200.0
|
222 |
-
220,200.0
|
223 |
-
221,200.0
|
224 |
-
222,200.0
|
225 |
-
223,200.0
|
226 |
-
224,197.0
|
227 |
-
225,200.0
|
228 |
-
226,200.0
|
229 |
-
227,200.0
|
230 |
-
228,199.0
|
231 |
-
229,200.0
|
232 |
-
230,200.0
|
233 |
-
231,198.0
|
234 |
-
232,200.0
|
235 |
-
233,200.0
|
236 |
-
234,197.0
|
237 |
-
235,200.0
|
238 |
-
236,200.0
|
239 |
-
237,200.0
|
240 |
-
238,200.0
|
241 |
-
239,196.0
|
242 |
-
240,200.0
|
243 |
-
241,200.0
|
244 |
-
242,195.0
|
245 |
-
243,200.0
|
246 |
-
244,200.0
|
247 |
-
245,200.0
|
248 |
-
246,200.0
|
249 |
-
247,200.0
|
250 |
-
248,200.0
|
251 |
-
249,200.0
|
252 |
-
250,200.0
|
253 |
-
251,200.0
|
254 |
-
252,200.0
|
255 |
-
253,200.0
|
256 |
-
254,200.0
|
257 |
-
255,199.0
|
258 |
-
256,200.0
|
259 |
-
257,200.0
|
260 |
-
258,200.0
|
261 |
-
259,200.0
|
262 |
-
260,200.0
|
263 |
-
261,200.0
|
264 |
-
262,200.0
|
265 |
-
263,200.0
|
266 |
-
264,200.0
|
267 |
-
265,200.0
|
268 |
-
266,200.0
|
269 |
-
267,200.0
|
270 |
-
268,200.0
|
271 |
-
269,200.0
|
272 |
-
270,200.0
|
273 |
-
271,200.0
|
274 |
-
272,200.0
|
275 |
-
273,200.0
|
276 |
-
274,200.0
|
277 |
-
275,200.0
|
278 |
-
276,200.0
|
279 |
-
277,200.0
|
280 |
-
278,200.0
|
281 |
-
279,200.0
|
282 |
-
280,200.0
|
283 |
-
281,200.0
|
284 |
-
282,200.0
|
285 |
-
283,200.0
|
286 |
-
284,200.0
|
287 |
-
285,200.0
|
288 |
-
286,200.0
|
289 |
-
287,200.0
|
290 |
-
288,200.0
|
291 |
-
289,200.0
|
292 |
-
290,200.0
|
293 |
-
291,200.0
|
294 |
-
292,200.0
|
295 |
-
293,200.0
|
296 |
-
294,200.0
|
297 |
-
295,200.0
|
298 |
-
296,200.0
|
299 |
-
297,200.0
|
300 |
-
298,200.0
|
301 |
-
299,200.0
|
302 |
-
300,200.0
|
303 |
-
301,200.0
|
304 |
-
302,200.0
|
305 |
-
303,200.0
|
306 |
-
304,200.0
|
307 |
-
305,200.0
|
308 |
-
306,200.0
|
309 |
-
307,200.0
|
310 |
-
308,200.0
|
311 |
-
309,200.0
|
312 |
-
310,200.0
|
313 |
-
311,200.0
|
314 |
-
312,200.0
|
315 |
-
313,200.0
|
316 |
-
314,200.0
|
317 |
-
315,200.0
|
318 |
-
316,200.0
|
319 |
-
317,200.0
|
320 |
-
318,200.0
|
321 |
-
319,200.0
|
322 |
-
320,200.0
|
323 |
-
321,200.0
|
324 |
-
322,200.0
|
325 |
-
323,200.0
|
326 |
-
324,200.0
|
327 |
-
325,200.0
|
328 |
-
326,200.0
|
329 |
-
327,200.0
|
330 |
-
328,200.0
|
331 |
-
329,200.0
|
332 |
-
330,200.0
|
333 |
-
331,200.0
|
334 |
-
332,200.0
|
335 |
-
333,200.0
|
336 |
-
334,200.0
|
337 |
-
335,200.0
|
338 |
-
336,200.0
|
339 |
-
337,200.0
|
340 |
-
338,200.0
|
341 |
-
339,200.0
|
342 |
-
340,200.0
|
343 |
-
341,200.0
|
344 |
-
342,200.0
|
345 |
-
343,200.0
|
346 |
-
344,200.0
|
347 |
-
345,200.0
|
348 |
-
346,200.0
|
349 |
-
347,200.0
|
350 |
-
348,200.0
|
351 |
-
349,200.0
|
352 |
-
350,200.0
|
353 |
-
351,200.0
|
354 |
-
352,200.0
|
355 |
-
353,200.0
|
356 |
-
354,200.0
|
357 |
-
355,200.0
|
358 |
-
356,200.0
|
359 |
-
357,200.0
|
360 |
-
358,200.0
|
361 |
-
359,200.0
|
362 |
-
360,200.0
|
363 |
-
361,200.0
|
364 |
-
362,200.0
|
365 |
-
363,200.0
|
366 |
-
364,200.0
|
367 |
-
365,200.0
|
368 |
-
366,200.0
|
369 |
-
367,200.0
|
370 |
-
368,200.0
|
371 |
-
369,200.0
|
372 |
-
370,200.0
|
373 |
-
371,200.0
|
374 |
-
372,200.0
|
375 |
-
373,200.0
|
376 |
-
374,200.0
|
377 |
-
375,200.0
|
378 |
-
376,200.0
|
379 |
-
377,200.0
|
380 |
-
378,200.0
|
381 |
-
379,200.0
|
382 |
-
380,200.0
|
383 |
-
381,200.0
|
384 |
-
382,200.0
|
385 |
-
383,200.0
|
386 |
-
384,200.0
|
387 |
-
385,200.0
|
388 |
-
386,200.0
|
389 |
-
387,200.0
|
390 |
-
388,200.0
|
391 |
-
389,200.0
|
392 |
-
390,200.0
|
393 |
-
391,200.0
|
394 |
-
392,200.0
|
395 |
-
393,200.0
|
396 |
-
394,200.0
|
397 |
-
395,200.0
|
398 |
-
396,200.0
|
399 |
-
397,200.0
|
400 |
-
398,200.0
|
401 |
-
399,200.0
|
402 |
-
400,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
2023-04-06 16:29:38 - r - INFO: - Hyperparameters:
|
2 |
-
2023-04-06 16:29:38 - r - INFO: - ================================================================================
|
3 |
-
2023-04-06 16:29:38 - r - INFO: - Name Value Type
|
4 |
-
2023-04-06 16:29:38 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
5 |
-
2023-04-06 16:29:38 - r - INFO: - new_step_api 1 <class 'bool'>
|
6 |
-
2023-04-06 16:29:38 - r - INFO: - wrapper None <class 'str'>
|
7 |
-
2023-04-06 16:29:38 - r - INFO: - render 0 <class 'bool'>
|
8 |
-
2023-04-06 16:29:38 - r - INFO: - render_mode human <class 'str'>
|
9 |
-
2023-04-06 16:29:38 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
10 |
-
2023-04-06 16:29:38 - r - INFO: - mode train <class 'str'>
|
11 |
-
2023-04-06 16:29:38 - r - INFO: - mp_backend ray <class 'str'>
|
12 |
-
2023-04-06 16:29:38 - r - INFO: - seed 1 <class 'int'>
|
13 |
-
2023-04-06 16:29:38 - r - INFO: - device cpu <class 'str'>
|
14 |
-
2023-04-06 16:29:38 - r - INFO: - train_eps 400 <class 'int'>
|
15 |
-
2023-04-06 16:29:38 - r - INFO: - test_eps 10 <class 'int'>
|
16 |
-
2023-04-06 16:29:38 - r - INFO: - eval_eps 10 <class 'int'>
|
17 |
-
2023-04-06 16:29:38 - r - INFO: - eval_per_episode 5 <class 'int'>
|
18 |
-
2023-04-06 16:29:38 - r - INFO: - max_steps 200 <class 'int'>
|
19 |
-
2023-04-06 16:29:38 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
20 |
-
2023-04-06 16:29:38 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
21 |
-
2023-04-06 16:29:38 - r - INFO: - show_fig 0 <class 'bool'>
|
22 |
-
2023-04-06 16:29:38 - r - INFO: - save_fig 1 <class 'bool'>
|
23 |
-
2023-04-06 16:29:38 - r - INFO: - n_workers 2 <class 'int'>
|
24 |
-
2023-04-06 16:29:38 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
-
2023-04-06 16:29:38 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
-
2023-04-06 16:29:38 - r - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
-
2023-04-06 16:29:38 - r - INFO: - gamma 0.95 <class 'float'>
|
28 |
-
2023-04-06 16:29:38 - r - INFO: - lr 0.0001 <class 'float'>
|
29 |
-
2023-04-06 16:29:38 - r - INFO: - buffer_size 100000 <class 'int'>
|
30 |
-
2023-04-06 16:29:38 - r - INFO: - batch_size 64 <class 'int'>
|
31 |
-
2023-04-06 16:29:38 - r - INFO: - target_update 4 <class 'int'>
|
32 |
-
2023-04-06 16:29:38 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
33 |
-
2023-04-06 16:29:38 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938 <class 'str'>
|
34 |
-
2023-04-06 16:29:38 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/results <class 'str'>
|
35 |
-
2023-04-06 16:29:38 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/logs <class 'str'>
|
36 |
-
2023-04-06 16:29:38 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/traj <class 'str'>
|
37 |
-
2023-04-06 16:29:38 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/videos <class 'str'>
|
38 |
-
2023-04-06 16:29:38 - r - INFO: - ================================================================================
|
39 |
-
2023-04-06 16:29:40 - r - INFO: - n_states: 4, n_actions: 2
|
40 |
-
2023-04-06 16:29:40 - r - INFO: - Start training!
|
41 |
-
2023-04-06 16:29:40 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
42 |
-
2023-04-06 16:37:19 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7d3a724152cea263dec5b58d80bee101405e7b3268a34c265d414ebbd771c5ac
|
3 |
-
size 272407
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png
DELETED
Binary file (58.2 kB)
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv
DELETED
@@ -1,401 +0,0 @@
|
|
1 |
-
episodes,rewards
|
2 |
-
0,16.0
|
3 |
-
1,18.0
|
4 |
-
2,18.0
|
5 |
-
3,28.0
|
6 |
-
4,16.0
|
7 |
-
5,15.0
|
8 |
-
6,14.0
|
9 |
-
7,15.0
|
10 |
-
8,11.0
|
11 |
-
9,49.0
|
12 |
-
10,31.0
|
13 |
-
11,31.0
|
14 |
-
12,11.0
|
15 |
-
13,14.0
|
16 |
-
14,13.0
|
17 |
-
15,21.0
|
18 |
-
16,9.0
|
19 |
-
17,28.0
|
20 |
-
18,10.0
|
21 |
-
19,19.0
|
22 |
-
20,15.0
|
23 |
-
21,12.0
|
24 |
-
22,13.0
|
25 |
-
23,18.0
|
26 |
-
24,15.0
|
27 |
-
25,14.0
|
28 |
-
26,13.0
|
29 |
-
27,11.0
|
30 |
-
28,12.0
|
31 |
-
29,14.0
|
32 |
-
30,12.0
|
33 |
-
31,10.0
|
34 |
-
32,9.0
|
35 |
-
33,13.0
|
36 |
-
34,16.0
|
37 |
-
35,17.0
|
38 |
-
36,15.0
|
39 |
-
37,12.0
|
40 |
-
38,10.0
|
41 |
-
39,18.0
|
42 |
-
40,10.0
|
43 |
-
41,11.0
|
44 |
-
42,21.0
|
45 |
-
43,16.0
|
46 |
-
44,15.0
|
47 |
-
45,19.0
|
48 |
-
46,15.0
|
49 |
-
47,20.0
|
50 |
-
48,15.0
|
51 |
-
49,14.0
|
52 |
-
50,12.0
|
53 |
-
51,10.0
|
54 |
-
52,12.0
|
55 |
-
53,11.0
|
56 |
-
54,12.0
|
57 |
-
55,10.0
|
58 |
-
56,11.0
|
59 |
-
57,9.0
|
60 |
-
58,15.0
|
61 |
-
59,10.0
|
62 |
-
60,15.0
|
63 |
-
61,11.0
|
64 |
-
62,12.0
|
65 |
-
63,15.0
|
66 |
-
64,13.0
|
67 |
-
65,11.0
|
68 |
-
66,12.0
|
69 |
-
67,12.0
|
70 |
-
68,10.0
|
71 |
-
69,11.0
|
72 |
-
70,11.0
|
73 |
-
71,9.0
|
74 |
-
72,9.0
|
75 |
-
73,11.0
|
76 |
-
74,9.0
|
77 |
-
75,10.0
|
78 |
-
76,9.0
|
79 |
-
77,10.0
|
80 |
-
78,9.0
|
81 |
-
79,10.0
|
82 |
-
80,10.0
|
83 |
-
81,9.0
|
84 |
-
82,15.0
|
85 |
-
83,10.0
|
86 |
-
84,10.0
|
87 |
-
85,11.0
|
88 |
-
86,11.0
|
89 |
-
87,13.0
|
90 |
-
88,13.0
|
91 |
-
89,9.0
|
92 |
-
90,16.0
|
93 |
-
91,12.0
|
94 |
-
92,15.0
|
95 |
-
93,9.0
|
96 |
-
94,10.0
|
97 |
-
95,10.0
|
98 |
-
96,11.0
|
99 |
-
97,11.0
|
100 |
-
98,9.0
|
101 |
-
99,12.0
|
102 |
-
100,16.0
|
103 |
-
101,10.0
|
104 |
-
102,15.0
|
105 |
-
103,9.0
|
106 |
-
104,9.0
|
107 |
-
105,10.0
|
108 |
-
106,11.0
|
109 |
-
107,10.0
|
110 |
-
108,13.0
|
111 |
-
109,11.0
|
112 |
-
110,9.0
|
113 |
-
111,14.0
|
114 |
-
112,10.0
|
115 |
-
113,12.0
|
116 |
-
114,10.0
|
117 |
-
115,10.0
|
118 |
-
116,10.0
|
119 |
-
117,13.0
|
120 |
-
118,10.0
|
121 |
-
119,11.0
|
122 |
-
120,9.0
|
123 |
-
121,11.0
|
124 |
-
122,10.0
|
125 |
-
123,10.0
|
126 |
-
124,12.0
|
127 |
-
125,41.0
|
128 |
-
126,9.0
|
129 |
-
127,31.0
|
130 |
-
128,14.0
|
131 |
-
129,14.0
|
132 |
-
130,14.0
|
133 |
-
131,11.0
|
134 |
-
132,14.0
|
135 |
-
133,12.0
|
136 |
-
134,16.0
|
137 |
-
135,11.0
|
138 |
-
136,12.0
|
139 |
-
137,16.0
|
140 |
-
138,12.0
|
141 |
-
139,14.0
|
142 |
-
140,12.0
|
143 |
-
141,18.0
|
144 |
-
142,15.0
|
145 |
-
143,18.0
|
146 |
-
144,14.0
|
147 |
-
145,14.0
|
148 |
-
146,18.0
|
149 |
-
147,17.0
|
150 |
-
148,24.0
|
151 |
-
149,13.0
|
152 |
-
150,18.0
|
153 |
-
151,15.0
|
154 |
-
152,20.0
|
155 |
-
153,17.0
|
156 |
-
154,16.0
|
157 |
-
155,17.0
|
158 |
-
156,14.0
|
159 |
-
157,26.0
|
160 |
-
158,26.0
|
161 |
-
159,42.0
|
162 |
-
160,25.0
|
163 |
-
161,58.0
|
164 |
-
162,48.0
|
165 |
-
163,48.0
|
166 |
-
164,61.0
|
167 |
-
165,115.0
|
168 |
-
166,156.0
|
169 |
-
167,56.0
|
170 |
-
168,61.0
|
171 |
-
169,83.0
|
172 |
-
170,36.0
|
173 |
-
171,47.0
|
174 |
-
172,31.0
|
175 |
-
173,27.0
|
176 |
-
174,50.0
|
177 |
-
175,34.0
|
178 |
-
176,32.0
|
179 |
-
177,49.0
|
180 |
-
178,30.0
|
181 |
-
179,50.0
|
182 |
-
180,34.0
|
183 |
-
181,27.0
|
184 |
-
182,49.0
|
185 |
-
183,35.0
|
186 |
-
184,52.0
|
187 |
-
185,35.0
|
188 |
-
186,47.0
|
189 |
-
187,50.0
|
190 |
-
188,35.0
|
191 |
-
189,54.0
|
192 |
-
190,33.0
|
193 |
-
191,50.0
|
194 |
-
192,63.0
|
195 |
-
193,121.0
|
196 |
-
194,86.0
|
197 |
-
195,46.0
|
198 |
-
196,54.0
|
199 |
-
197,42.0
|
200 |
-
198,73.0
|
201 |
-
199,45.0
|
202 |
-
200,48.0
|
203 |
-
201,72.0
|
204 |
-
202,60.0
|
205 |
-
203,96.0
|
206 |
-
204,40.0
|
207 |
-
205,46.0
|
208 |
-
206,65.0
|
209 |
-
207,84.0
|
210 |
-
208,115.0
|
211 |
-
209,78.0
|
212 |
-
210,33.0
|
213 |
-
211,40.0
|
214 |
-
212,32.0
|
215 |
-
213,39.0
|
216 |
-
214,47.0
|
217 |
-
215,37.0
|
218 |
-
216,53.0
|
219 |
-
217,37.0
|
220 |
-
218,56.0
|
221 |
-
219,36.0
|
222 |
-
220,101.0
|
223 |
-
221,105.0
|
224 |
-
222,172.0
|
225 |
-
223,116.0
|
226 |
-
224,200.0
|
227 |
-
225,162.0
|
228 |
-
226,200.0
|
229 |
-
227,200.0
|
230 |
-
228,200.0
|
231 |
-
229,200.0
|
232 |
-
230,200.0
|
233 |
-
231,200.0
|
234 |
-
232,200.0
|
235 |
-
233,200.0
|
236 |
-
234,200.0
|
237 |
-
235,200.0
|
238 |
-
236,200.0
|
239 |
-
237,200.0
|
240 |
-
238,200.0
|
241 |
-
239,200.0
|
242 |
-
240,200.0
|
243 |
-
241,200.0
|
244 |
-
242,200.0
|
245 |
-
243,200.0
|
246 |
-
244,200.0
|
247 |
-
245,200.0
|
248 |
-
246,200.0
|
249 |
-
247,200.0
|
250 |
-
248,200.0
|
251 |
-
249,200.0
|
252 |
-
250,200.0
|
253 |
-
251,200.0
|
254 |
-
252,200.0
|
255 |
-
253,200.0
|
256 |
-
254,200.0
|
257 |
-
255,200.0
|
258 |
-
256,200.0
|
259 |
-
257,200.0
|
260 |
-
258,200.0
|
261 |
-
259,200.0
|
262 |
-
260,200.0
|
263 |
-
261,200.0
|
264 |
-
262,200.0
|
265 |
-
263,200.0
|
266 |
-
264,200.0
|
267 |
-
265,200.0
|
268 |
-
266,200.0
|
269 |
-
267,200.0
|
270 |
-
268,200.0
|
271 |
-
269,200.0
|
272 |
-
270,200.0
|
273 |
-
271,200.0
|
274 |
-
272,200.0
|
275 |
-
273,200.0
|
276 |
-
274,200.0
|
277 |
-
275,200.0
|
278 |
-
276,200.0
|
279 |
-
277,200.0
|
280 |
-
278,200.0
|
281 |
-
279,200.0
|
282 |
-
280,200.0
|
283 |
-
281,200.0
|
284 |
-
282,200.0
|
285 |
-
283,200.0
|
286 |
-
284,200.0
|
287 |
-
285,200.0
|
288 |
-
286,200.0
|
289 |
-
287,200.0
|
290 |
-
288,200.0
|
291 |
-
289,199.0
|
292 |
-
290,200.0
|
293 |
-
291,190.0
|
294 |
-
292,179.0
|
295 |
-
293,189.0
|
296 |
-
294,193.0
|
297 |
-
295,200.0
|
298 |
-
296,200.0
|
299 |
-
297,200.0
|
300 |
-
298,195.0
|
301 |
-
299,200.0
|
302 |
-
300,186.0
|
303 |
-
301,175.0
|
304 |
-
302,177.0
|
305 |
-
303,185.0
|
306 |
-
304,167.0
|
307 |
-
305,172.0
|
308 |
-
306,164.0
|
309 |
-
307,146.0
|
310 |
-
308,187.0
|
311 |
-
309,150.0
|
312 |
-
310,146.0
|
313 |
-
311,165.0
|
314 |
-
312,200.0
|
315 |
-
313,200.0
|
316 |
-
314,200.0
|
317 |
-
315,200.0
|
318 |
-
316,200.0
|
319 |
-
317,158.0
|
320 |
-
318,181.0
|
321 |
-
319,174.0
|
322 |
-
320,175.0
|
323 |
-
321,176.0
|
324 |
-
322,170.0
|
325 |
-
323,161.0
|
326 |
-
324,180.0
|
327 |
-
325,200.0
|
328 |
-
326,198.0
|
329 |
-
327,179.0
|
330 |
-
328,192.0
|
331 |
-
329,157.0
|
332 |
-
330,151.0
|
333 |
-
331,198.0
|
334 |
-
332,154.0
|
335 |
-
333,165.0
|
336 |
-
334,200.0
|
337 |
-
335,179.0
|
338 |
-
336,200.0
|
339 |
-
337,191.0
|
340 |
-
338,177.0
|
341 |
-
339,200.0
|
342 |
-
340,171.0
|
343 |
-
341,200.0
|
344 |
-
342,200.0
|
345 |
-
343,200.0
|
346 |
-
344,200.0
|
347 |
-
345,200.0
|
348 |
-
346,200.0
|
349 |
-
347,163.0
|
350 |
-
348,134.0
|
351 |
-
349,200.0
|
352 |
-
350,140.0
|
353 |
-
351,200.0
|
354 |
-
352,200.0
|
355 |
-
353,139.0
|
356 |
-
354,152.0
|
357 |
-
355,136.0
|
358 |
-
356,200.0
|
359 |
-
357,200.0
|
360 |
-
358,173.0
|
361 |
-
359,200.0
|
362 |
-
360,155.0
|
363 |
-
361,134.0
|
364 |
-
362,200.0
|
365 |
-
363,186.0
|
366 |
-
364,142.0
|
367 |
-
365,200.0
|
368 |
-
366,200.0
|
369 |
-
367,128.0
|
370 |
-
368,200.0
|
371 |
-
369,200.0
|
372 |
-
370,200.0
|
373 |
-
371,200.0
|
374 |
-
372,200.0
|
375 |
-
373,200.0
|
376 |
-
374,200.0
|
377 |
-
375,142.0
|
378 |
-
376,162.0
|
379 |
-
377,180.0
|
380 |
-
378,120.0
|
381 |
-
379,190.0
|
382 |
-
380,169.0
|
383 |
-
381,125.0
|
384 |
-
382,189.0
|
385 |
-
383,158.0
|
386 |
-
384,197.0
|
387 |
-
385,200.0
|
388 |
-
386,200.0
|
389 |
-
387,139.0
|
390 |
-
388,158.0
|
391 |
-
389,165.0
|
392 |
-
390,200.0
|
393 |
-
391,200.0
|
394 |
-
392,113.0
|
395 |
-
393,115.0
|
396 |
-
394,117.0
|
397 |
-
395,119.0
|
398 |
-
396,110.0
|
399 |
-
397,119.0
|
400 |
-
398,200.0
|
401 |
-
399,133.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/{Train_CartPole-v1_DoubleDQN_ray_20230406-162938 β Train_ray_CartPole-v1_DoubleDQN_20230516-115126}/config.yaml
RENAMED
@@ -1,46 +1,43 @@
|
|
1 |
general_cfg:
|
2 |
algo_name: DoubleDQN
|
|
|
3 |
device: cpu
|
4 |
-
env_name:
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
load_checkpoint: false
|
|
|
8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
-
|
|
|
10 |
mode: train
|
|
|
11 |
mp_backend: ray
|
12 |
n_workers: 2
|
13 |
-
|
14 |
-
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 400
|
21 |
-
wrapper: null
|
22 |
algo_cfg:
|
23 |
batch_size: 64
|
24 |
buffer_size: 100000
|
|
|
25 |
epsilon_decay: 500
|
26 |
epsilon_end: 0.01
|
27 |
epsilon_start: 0.95
|
28 |
-
gamma: 0.
|
29 |
lr: 0.0001
|
30 |
target_update: 4
|
31 |
value_layers:
|
32 |
- activation: relu
|
33 |
layer_dim:
|
34 |
-
- n_states
|
35 |
- 256
|
36 |
layer_type: linear
|
37 |
- activation: relu
|
38 |
layer_dim:
|
39 |
- 256
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: none
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- n_actions
|
46 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
general_cfg:
|
2 |
algo_name: DoubleDQN
|
3 |
+
collect_traj: false
|
4 |
device: cpu
|
5 |
+
env_name: gym
|
|
|
|
|
6 |
load_checkpoint: false
|
7 |
+
load_model_step: best
|
8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
+
max_episode: 100
|
10 |
+
max_step: 200
|
11 |
mode: train
|
12 |
+
model_save_fre: 500
|
13 |
mp_backend: ray
|
14 |
n_workers: 2
|
15 |
+
online_eval: true
|
16 |
+
online_eval_episode: 10
|
|
|
|
|
17 |
seed: 1
|
|
|
|
|
|
|
|
|
18 |
algo_cfg:
|
19 |
batch_size: 64
|
20 |
buffer_size: 100000
|
21 |
+
buffer_type: REPLAY_QUE
|
22 |
epsilon_decay: 500
|
23 |
epsilon_end: 0.01
|
24 |
epsilon_start: 0.95
|
25 |
+
gamma: 0.99
|
26 |
lr: 0.0001
|
27 |
target_update: 4
|
28 |
value_layers:
|
29 |
- activation: relu
|
30 |
layer_dim:
|
|
|
31 |
- 256
|
32 |
layer_type: linear
|
33 |
- activation: relu
|
34 |
layer_dim:
|
35 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
layer_type: linear
|
37 |
+
env_cfg:
|
38 |
+
id: CartPole-v1
|
39 |
+
ignore_params:
|
40 |
+
- wrapper
|
41 |
+
- ignore_params
|
42 |
+
render_mode: null
|
43 |
+
wrapper: null
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - General Configs:
|
2 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
3 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
|
4 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
|
6 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - mode train <class 'str'>
|
7 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - device cpu <class 'str'>
|
8 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - seed 1 <class 'int'>
|
9 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - max_episode 100 <class 'int'>
|
10 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
11 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
12 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - mp_backend ray <class 'str'>
|
13 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
14 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
17 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
19 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
20 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
21 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Algo Configs:
|
22 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
23 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
|
24 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
28 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
29 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
30 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
31 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
32 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
33 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
34 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
35 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Env Configs:
|
36 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
37 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
|
38 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
39 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - render_mode None <class 'str'>
|
40 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - wrapper None <class 'str'>
|
41 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
42 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
43 |
+
2023-05-16 11:51:32 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
44 |
+
2023-05-16 11:51:37 - RayLog - INFO: - Worker 0 finished episode 0 with reward 16.0 in 16 steps
|
45 |
+
2023-05-16 11:51:37 - RayLog - INFO: - Worker 1 finished episode 0 with reward 20.0 in 20 steps
|
46 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 2 with reward 11.0 in 11 steps
|
47 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 2 with reward 15.0 in 15 steps
|
48 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 4 with reward 13.0 in 13 steps
|
49 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 3 with reward 22.0 in 22 steps
|
50 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 9.0 in 9 steps
|
51 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 6 with reward 14.0 in 14 steps
|
52 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 7 with reward 12.0 in 12 steps
|
53 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 9 with reward 13.0 in 13 steps
|
54 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 10 with reward 14.0 in 14 steps
|
55 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 8 with reward 35.0 in 35 steps
|
56 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 13.0 in 13 steps
|
57 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 15.0 in 15 steps
|
58 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 13 with reward 12.0 in 12 steps
|
59 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 14 with reward 10.0 in 10 steps
|
60 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 15 with reward 11.0 in 11 steps
|
61 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 16 with reward 11.0 in 11 steps
|
62 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 18 with reward 12.0 in 12 steps
|
63 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 17 with reward 21.0 in 21 steps
|
64 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 19 with reward 15.0 in 15 steps
|
65 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 20 with reward 18.0 in 18 steps
|
66 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 13.0 in 13 steps
|
67 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 22 with reward 10.0 in 10 steps
|
68 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 23 with reward 16.0 in 16 steps
|
69 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 24 with reward 19.0 in 19 steps
|
70 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps
|
71 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 27 with reward 10.0 in 10 steps
|
72 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 10.0 in 10 steps
|
73 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 26 with reward 26.0 in 26 steps
|
74 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 12.0 in 12 steps
|
75 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 30 with reward 15.0 in 15 steps
|
76 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 32 with reward 10.0 in 10 steps
|
77 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 31 with reward 18.0 in 18 steps
|
78 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 33 with reward 13.0 in 13 steps
|
79 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 34 with reward 10.0 in 10 steps
|
80 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 36 with reward 9.0 in 9 steps
|
81 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 35 with reward 12.0 in 12 steps
|
82 |
+
2023-05-16 11:51:45 - RayLog - INFO: - update_step: 500, online_eval_reward: 9.000
|
83 |
+
2023-05-16 11:51:45 - RayLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
|
84 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 38 with reward 10.0 in 10 steps
|
85 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 37 with reward 13.0 in 13 steps
|
86 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 40 with reward 9.0 in 9 steps
|
87 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 39 with reward 14.0 in 14 steps
|
88 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 41 with reward 14.0 in 14 steps
|
89 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 42 with reward 20.0 in 20 steps
|
90 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 43 with reward 15.0 in 15 steps
|
91 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 44 with reward 19.0 in 19 steps
|
92 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 45 with reward 17.0 in 17 steps
|
93 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 47 with reward 12.0 in 12 steps
|
94 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 46 with reward 15.0 in 15 steps
|
95 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 48 with reward 14.0 in 14 steps
|
96 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 49 with reward 16.0 in 16 steps
|
97 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 50 with reward 9.0 in 9 steps
|
98 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 51 with reward 13.0 in 13 steps
|
99 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 52 with reward 10.0 in 10 steps
|
100 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 53 with reward 13.0 in 13 steps
|
101 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 54 with reward 13.0 in 13 steps
|
102 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 56 with reward 14.0 in 14 steps
|
103 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 55 with reward 20.0 in 20 steps
|
104 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 57 with reward 14.0 in 14 steps
|
105 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 58 with reward 16.0 in 16 steps
|
106 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 59 with reward 11.0 in 11 steps
|
107 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 60 with reward 14.0 in 14 steps
|
108 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 61 with reward 9.0 in 9 steps
|
109 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 63 with reward 19.0 in 19 steps
|
110 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 62 with reward 23.0 in 23 steps
|
111 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 64 with reward 18.0 in 18 steps
|
112 |
+
2023-05-16 11:51:49 - RayLog - INFO: - Worker 1 finished episode 65 with reward 26.0 in 26 steps
|
113 |
+
2023-05-16 11:51:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 69.000
|
114 |
+
2023-05-16 11:51:49 - RayLog - INFO: - current update step obtain a better online_eval_reward: 69.000, save the best model!
|
115 |
+
2023-05-16 11:51:50 - RayLog - INFO: - Worker 1 finished episode 67 with reward 93.0 in 93 steps
|
116 |
+
2023-05-16 11:51:51 - RayLog - INFO: - Worker 0 finished episode 66 with reward 127.0 in 127 steps
|
117 |
+
2023-05-16 11:51:51 - RayLog - INFO: - Worker 1 finished episode 68 with reward 40.0 in 40 steps
|
118 |
+
2023-05-16 11:51:52 - RayLog - INFO: - Worker 0 finished episode 69 with reward 54.0 in 54 steps
|
119 |
+
2023-05-16 11:51:52 - RayLog - INFO: - Worker 1 finished episode 70 with reward 48.0 in 48 steps
|
120 |
+
2023-05-16 11:51:53 - RayLog - INFO: - Worker 0 finished episode 71 with reward 62.0 in 62 steps
|
121 |
+
2023-05-16 11:51:53 - RayLog - INFO: - Worker 1 finished episode 72 with reward 60.0 in 60 steps
|
122 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 74 with reward 35.0 in 35 steps
|
123 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 73 with reward 47.0 in 47 steps
|
124 |
+
2023-05-16 11:51:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 63.000
|
125 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 75 with reward 38.0 in 38 steps
|
126 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 76 with reward 46.0 in 46 steps
|
127 |
+
2023-05-16 11:51:55 - RayLog - INFO: - Worker 1 finished episode 77 with reward 40.0 in 40 steps
|
128 |
+
2023-05-16 11:51:55 - RayLog - INFO: - Worker 0 finished episode 78 with reward 57.0 in 57 steps
|
129 |
+
2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 79 with reward 38.0 in 38 steps
|
130 |
+
2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 81 with reward 33.0 in 33 steps
|
131 |
+
2023-05-16 11:51:56 - RayLog - INFO: - Worker 0 finished episode 80 with reward 51.0 in 51 steps
|
132 |
+
2023-05-16 11:51:57 - RayLog - INFO: - Worker 1 finished episode 82 with reward 44.0 in 44 steps
|
133 |
+
2023-05-16 11:51:58 - RayLog - INFO: - Worker 0 finished episode 83 with reward 70.0 in 70 steps
|
134 |
+
2023-05-16 11:51:58 - RayLog - INFO: - Worker 1 finished episode 84 with reward 55.0 in 55 steps
|
135 |
+
2023-05-16 11:51:58 - RayLog - INFO: - update_step: 2000, online_eval_reward: 82.000
|
136 |
+
2023-05-16 11:51:58 - RayLog - INFO: - current update step obtain a better online_eval_reward: 82.000, save the best model!
|
137 |
+
2023-05-16 11:51:59 - RayLog - INFO: - Worker 0 finished episode 85 with reward 66.0 in 66 steps
|
138 |
+
2023-05-16 11:51:59 - RayLog - INFO: - Worker 1 finished episode 86 with reward 56.0 in 56 steps
|
139 |
+
2023-05-16 11:52:00 - RayLog - INFO: - Worker 1 finished episode 88 with reward 45.0 in 45 steps
|
140 |
+
2023-05-16 11:52:00 - RayLog - INFO: - Worker 0 finished episode 87 with reward 68.0 in 68 steps
|
141 |
+
2023-05-16 11:52:01 - RayLog - INFO: - Worker 1 finished episode 89 with reward 50.0 in 50 steps
|
142 |
+
2023-05-16 11:52:02 - RayLog - INFO: - Worker 0 finished episode 90 with reward 79.0 in 79 steps
|
143 |
+
2023-05-16 11:52:02 - RayLog - INFO: - Worker 1 finished episode 91 with reward 57.0 in 57 steps
|
144 |
+
2023-05-16 11:52:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 77.000
|
145 |
+
2023-05-16 11:52:04 - RayLog - INFO: - Worker 1 finished episode 93 with reward 66.0 in 66 steps
|
146 |
+
2023-05-16 11:52:04 - RayLog - INFO: - Worker 0 finished episode 92 with reward 84.0 in 84 steps
|
147 |
+
2023-05-16 11:52:05 - RayLog - INFO: - Worker 1 finished episode 94 with reward 56.0 in 56 steps
|
148 |
+
2023-05-16 11:52:07 - RayLog - INFO: - Worker 0 finished episode 95 with reward 134.0 in 134 steps
|
149 |
+
2023-05-16 11:52:08 - RayLog - INFO: - Worker 1 finished episode 96 with reward 115.0 in 115 steps
|
150 |
+
2023-05-16 11:52:10 - RayLog - INFO: - update_step: 3000, online_eval_reward: 200.000
|
151 |
+
2023-05-16 11:52:10 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
152 |
+
2023-05-16 11:52:12 - RayLog - INFO: - Worker 0 finished episode 97 with reward 200.0 in 200 steps
|
153 |
+
2023-05-16 11:52:13 - RayLog - INFO: - Worker 1 finished episode 98 with reward 200.0 in 200 steps
|
154 |
+
2023-05-16 11:52:15 - RayLog - INFO: - update_step: 3500, online_eval_reward: 200.000
|
155 |
+
2023-05-16 11:52:16 - RayLog - INFO: - Worker 0 finished episode 99 with reward 200.0 in 200 steps
|
156 |
+
2023-05-16 11:52:17 - RayLog - INFO: - Worker 1 finished episode 100 with reward 200.0 in 200 steps
|
157 |
+
2023-05-16 11:52:19 - SimpleLog - INFO: - Finish training! total time consumed: 53.70s
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth β Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be6be5f2b8ae4530630b850e07d2bbd6010678cb75c3d3050606cdfa0e1f6acd
|
3 |
+
size 40
|
CartPole-v1/{Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth β Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aa028324617b734607430bc18aa93daae8536fcfea762ed7cdd92c65a472dd0
|
3 |
+
size 10028
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37344be608143375d6347aff7b1395cef1e5b52479a11b5faf17e1f631046d1d
|
3 |
+
size 40
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c34b8e5d11e95894f9f70682c415e3475f765542af5aa31b961c6d0d8e11923
|
3 |
+
size 177587
|
CartPole-v1/{Train_CartPole-v1_DoubleDQN_mp_20230406-160028 β Train_single_CartPole-v1_DoubleDQN_20230516-114540}/config.yaml
RENAMED
@@ -1,46 +1,43 @@
|
|
1 |
general_cfg:
|
2 |
algo_name: DoubleDQN
|
|
|
3 |
device: cpu
|
4 |
-
env_name:
|
5 |
-
eval_eps: 10
|
6 |
-
eval_per_episode: 5
|
7 |
load_checkpoint: false
|
|
|
8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
-
|
|
|
10 |
mode: train
|
11 |
-
|
|
|
12 |
n_workers: 2
|
13 |
-
|
14 |
-
|
15 |
-
render_mode: human
|
16 |
-
save_fig: true
|
17 |
seed: 1
|
18 |
-
show_fig: false
|
19 |
-
test_eps: 10
|
20 |
-
train_eps: 400
|
21 |
-
wrapper: null
|
22 |
algo_cfg:
|
23 |
batch_size: 64
|
24 |
buffer_size: 100000
|
|
|
25 |
epsilon_decay: 500
|
26 |
epsilon_end: 0.01
|
27 |
epsilon_start: 0.95
|
28 |
-
gamma: 0.
|
29 |
lr: 0.0001
|
30 |
target_update: 4
|
31 |
value_layers:
|
32 |
- activation: relu
|
33 |
layer_dim:
|
34 |
-
- n_states
|
35 |
- 256
|
36 |
layer_type: linear
|
37 |
- activation: relu
|
38 |
layer_dim:
|
39 |
- 256
|
40 |
-
- 256
|
41 |
-
layer_type: linear
|
42 |
-
- activation: none
|
43 |
-
layer_dim:
|
44 |
-
- 256
|
45 |
-
- n_actions
|
46 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
general_cfg:
|
2 |
algo_name: DoubleDQN
|
3 |
+
collect_traj: false
|
4 |
device: cpu
|
5 |
+
env_name: gym
|
|
|
|
|
6 |
load_checkpoint: false
|
7 |
+
load_model_step: best
|
8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
9 |
+
max_episode: 100
|
10 |
+
max_step: 200
|
11 |
mode: train
|
12 |
+
model_save_fre: 500
|
13 |
+
mp_backend: single
|
14 |
n_workers: 2
|
15 |
+
online_eval: true
|
16 |
+
online_eval_episode: 10
|
|
|
|
|
17 |
seed: 1
|
|
|
|
|
|
|
|
|
18 |
algo_cfg:
|
19 |
batch_size: 64
|
20 |
buffer_size: 100000
|
21 |
+
buffer_type: REPLAY_QUE
|
22 |
epsilon_decay: 500
|
23 |
epsilon_end: 0.01
|
24 |
epsilon_start: 0.95
|
25 |
+
gamma: 0.99
|
26 |
lr: 0.0001
|
27 |
target_update: 4
|
28 |
value_layers:
|
29 |
- activation: relu
|
30 |
layer_dim:
|
|
|
31 |
- 256
|
32 |
layer_type: linear
|
33 |
- activation: relu
|
34 |
layer_dim:
|
35 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
layer_type: linear
|
37 |
+
env_cfg:
|
38 |
+
id: CartPole-v1
|
39 |
+
ignore_params:
|
40 |
+
- wrapper
|
41 |
+
- ignore_params
|
42 |
+
render_mode: null
|
43 |
+
wrapper: null
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - General Configs:
|
2 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
3 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
|
4 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - env_name gym <class 'str'>
|
5 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
|
6 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - mode train <class 'str'>
|
7 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - device cpu <class 'str'>
|
8 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - seed 1 <class 'int'>
|
9 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - max_episode 100 <class 'int'>
|
10 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
11 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
12 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
13 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
14 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
15 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
16 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
17 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
18 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
19 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
20 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
21 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Algo Configs:
|
22 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
23 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
|
24 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
25 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
26 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
27 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
28 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
29 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
30 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
31 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
32 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
33 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
34 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
35 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Env Configs:
|
36 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
37 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
|
38 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
39 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - render_mode None <class 'str'>
|
40 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - wrapper None <class 'str'>
|
41 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
42 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
43 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
44 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Start training!
|
45 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 0, ep_reward: 25.0, ep_step: 25
|
46 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 1, ep_reward: 17.0, ep_step: 17
|
47 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 2, ep_reward: 19.0, ep_step: 19
|
48 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 3, ep_reward: 14.0, ep_step: 14
|
49 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 4, ep_reward: 14.0, ep_step: 14
|
50 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 5, ep_reward: 21.0, ep_step: 21
|
51 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 6, ep_reward: 22.0, ep_step: 22
|
52 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 7, ep_reward: 13.0, ep_step: 13
|
53 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 8, ep_reward: 27.0, ep_step: 27
|
54 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 9, ep_reward: 11.0, ep_step: 11
|
55 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 10, ep_reward: 14.0, ep_step: 14
|
56 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 11, ep_reward: 24.0, ep_step: 24
|
57 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 12, ep_reward: 23.0, ep_step: 23
|
58 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 13, ep_reward: 12.0, ep_step: 12
|
59 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 14, ep_reward: 12.0, ep_step: 12
|
60 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 15, ep_reward: 13.0, ep_step: 13
|
61 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 16, ep_reward: 11.0, ep_step: 11
|
62 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 17, ep_reward: 15.0, ep_step: 15
|
63 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 18, ep_reward: 12.0, ep_step: 12
|
64 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 19, ep_reward: 27.0, ep_step: 27
|
65 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 20, ep_reward: 14.0, ep_step: 14
|
66 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 21, ep_reward: 19.0, ep_step: 19
|
67 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 22, ep_reward: 10.0, ep_step: 10
|
68 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 23, ep_reward: 10.0, ep_step: 10
|
69 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 24, ep_reward: 15.0, ep_step: 15
|
70 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 25, ep_reward: 15.0, ep_step: 15
|
71 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 26, ep_reward: 14.0, ep_step: 14
|
72 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 27, ep_reward: 11.0, ep_step: 11
|
73 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 28, ep_reward: 10.0, ep_step: 10
|
74 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 29, ep_reward: 12.0, ep_step: 12
|
75 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 30, ep_reward: 10.0, ep_step: 10
|
76 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 31, ep_reward: 9.0, ep_step: 9
|
77 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 32, ep_reward: 11.0, ep_step: 11
|
78 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 33, ep_reward: 9.0, ep_step: 9
|
79 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 34, ep_reward: 13.0, ep_step: 13
|
80 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 35, ep_reward: 10.0, ep_step: 10
|
81 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 36, ep_reward: 9.0, ep_step: 9
|
82 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 37, ep_reward: 10.0, ep_step: 10
|
83 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 9.000
|
84 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
|
85 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 38, ep_reward: 14.0, ep_step: 14
|
86 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 39, ep_reward: 11.0, ep_step: 11
|
87 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 40, ep_reward: 9.0, ep_step: 9
|
88 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 41, ep_reward: 9.0, ep_step: 9
|
89 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 42, ep_reward: 9.0, ep_step: 9
|
90 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 43, ep_reward: 11.0, ep_step: 11
|
91 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 44, ep_reward: 21.0, ep_step: 21
|
92 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 45, ep_reward: 13.0, ep_step: 13
|
93 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 46, ep_reward: 12.0, ep_step: 12
|
94 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 47, ep_reward: 30.0, ep_step: 30
|
95 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 48, ep_reward: 20.0, ep_step: 20
|
96 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 49, ep_reward: 28.0, ep_step: 28
|
97 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 50, ep_reward: 22.0, ep_step: 22
|
98 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 51, ep_reward: 20.0, ep_step: 20
|
99 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 52, ep_reward: 26.0, ep_step: 26
|
100 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 53, ep_reward: 24.0, ep_step: 24
|
101 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 54, ep_reward: 30.0, ep_step: 30
|
102 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 55, ep_reward: 26.0, ep_step: 26
|
103 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 56, ep_reward: 41.0, ep_step: 41
|
104 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 57, ep_reward: 58.0, ep_step: 58
|
105 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 58, ep_reward: 59.0, ep_step: 59
|
106 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 63.000
|
107 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 63.000, save the best model!
|
108 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 59, ep_reward: 58.0, ep_step: 58
|
109 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 60, ep_reward: 47.0, ep_step: 47
|
110 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 61, ep_reward: 84.0, ep_step: 84
|
111 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 62, ep_reward: 44.0, ep_step: 44
|
112 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 63, ep_reward: 59.0, ep_step: 59
|
113 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 64, ep_reward: 39.0, ep_step: 39
|
114 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 65, ep_reward: 53.0, ep_step: 53
|
115 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 66, ep_reward: 70.0, ep_step: 70
|
116 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 67, ep_reward: 58.0, ep_step: 58
|
117 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 65.000
|
118 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 65.000, save the best model!
|
119 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 68, ep_reward: 101.0, ep_step: 101
|
120 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 69, ep_reward: 52.0, ep_step: 52
|
121 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 70, ep_reward: 58.0, ep_step: 58
|
122 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 71, ep_reward: 61.0, ep_step: 61
|
123 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 72, ep_reward: 91.0, ep_step: 91
|
124 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 73, ep_reward: 54.0, ep_step: 54
|
125 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 65.000
|
126 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 74, ep_reward: 98.0, ep_step: 98
|
127 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 75, ep_reward: 67.0, ep_step: 67
|
128 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 76, ep_reward: 70.0, ep_step: 70
|
129 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 77, ep_reward: 74.0, ep_step: 74
|
130 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 78, ep_reward: 72.0, ep_step: 72
|
131 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 79, ep_reward: 81.0, ep_step: 81
|
132 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 80, ep_reward: 82.0, ep_step: 82
|
133 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 94.000
|
134 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 94.000, save the best model!
|
135 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 81, ep_reward: 97.0, ep_step: 97
|
136 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 82, ep_reward: 89.0, ep_step: 89
|
137 |
+
2023-05-16 11:45:49 - SimpleLog - INFO: - episode: 83, ep_reward: 200.0, ep_step: 200
|
138 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 84, ep_reward: 142.0, ep_step: 142
|
139 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 153.000
|
140 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 153.000, save the best model!
|
141 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 85, ep_reward: 114.0, ep_step: 114
|
142 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 86, ep_reward: 162.0, ep_step: 162
|
143 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 87, ep_reward: 200.0, ep_step: 200
|
144 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 200.000
|
145 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
146 |
+
2023-05-16 11:45:52 - SimpleLog - INFO: - episode: 88, ep_reward: 200.0, ep_step: 200
|
147 |
+
2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200
|
148 |
+
2023-05-16 11:45:53 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 200.000
|
149 |
+
2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 90, ep_reward: 200.0, ep_step: 200
|
150 |
+
2023-05-16 11:45:54 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200
|
151 |
+
2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200
|
152 |
+
2023-05-16 11:45:55 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 200.000
|
153 |
+
2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200
|
154 |
+
2023-05-16 11:45:56 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200
|
155 |
+
2023-05-16 11:45:57 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 200.000
|
156 |
+
2023-05-16 11:45:57 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200
|
157 |
+
2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200
|
158 |
+
2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200
|
159 |
+
2023-05-16 11:45:58 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 200.000
|
160 |
+
2023-05-16 11:45:59 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200
|
161 |
+
2023-05-16 11:46:00 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200
|
162 |
+
2023-05-16 11:46:00 - SimpleLog - INFO: - Finish training! total time consumed: 20.03s
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500
ADDED
Binary file (545 kB). View file
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000
ADDED
Binary file (545 kB). View file
|
|