johnjim0816 commited on
Commit
c93cde9
1 Parent(s): 7e0d2ec

update Cartpole-v1 DuelingDQN

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml +0 -41
  2. CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt +0 -14
  3. CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png +0 -0
  4. CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv +0 -11
  5. CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml +0 -47
  6. CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt +0 -53
  7. CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png +0 -0
  8. CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv +0 -11
  9. CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml +0 -47
  10. CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt +0 -53
  11. CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png +0 -0
  12. CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv +0 -11
  13. CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml +0 -41
  14. CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt +0 -119
  15. CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png +0 -0
  16. CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv +0 -101
  17. CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt +0 -43
  18. CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt +0 -3
  19. CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png +0 -0
  20. CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv +0 -202
  21. CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt +0 -43
  22. CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt +0 -3
  23. CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png +0 -0
  24. CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv +0 -401
  25. CartPole-v1/{Train_CartPole-v1_DuelingDQN_ray_20230407-153236 → Train_ray_CartPole-v1_DuelingDQN_20230517-224129}/config.yaml +19 -22
  26. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt +169 -0
  27. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 +0 -0
  28. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 +0 -0
  29. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 +0 -0
  30. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 +0 -0
  31. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 +0 -0
  32. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 +0 -0
  33. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 +0 -0
  34. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 +0 -0
  35. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 +0 -0
  36. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 +0 -0
  37. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 +0 -0
  38. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 +0 -0
  39. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 +0 -0
  40. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 +0 -0
  41. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 +0 -0
  42. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 +0 -0
  43. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 +0 -0
  44. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 +0 -0
  45. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 +0 -0
  46. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 +0 -0
  47. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 +0 -0
  48. CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best +0 -0
  49. CartPole-v1/{Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0} +2 -2
  50. CartPole-v1/{Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0} +2 -2
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml DELETED
@@ -1,41 +0,0 @@
1
- general_cfg:
2
- algo_name: DuelingDQN
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_DuelingDQN_20221122-125403
9
- max_steps: 200
10
- mode: test
11
- save_fig: true
12
- seed: 1
13
- show_fig: false
14
- test_eps: 10
15
- train_eps: 100
16
- algo_cfg:
17
- batch_size: 64
18
- buffer_size: 100000
19
- epsilon_decay: 500
20
- epsilon_end: 0.01
21
- epsilon_start: 0.95
22
- gamma: 0.99
23
- hidden_dim: 256
24
- lr: 0.0001
25
- target_update: 4
26
- value_layers:
27
- - activation: relu
28
- layer_dim:
29
- - n_states
30
- - 256
31
- layer_type: linear
32
- - activation: relu
33
- layer_dim:
34
- - 256
35
- - 256
36
- layer_type: linear
37
- - activation: none
38
- layer_dim:
39
- - 256
40
- - n_actions
41
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt DELETED
@@ -1,14 +0,0 @@
1
- 2022-11-22 12:54:55 - r - INFO: - n_states: 4, n_actions: 2
2
- 2022-11-22 12:54:58 - r - INFO: - Start testing!
3
- 2022-11-22 12:54:58 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda
4
- 2022-11-22 12:54:58 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
5
- 2022-11-22 12:54:58 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
6
- 2022-11-22 12:54:58 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
7
- 2022-11-22 12:54:58 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
8
- 2022-11-22 12:54:58 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
9
- 2022-11-22 12:54:58 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
10
- 2022-11-22 12:54:58 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
11
- 2022-11-22 12:54:58 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
12
- 2022-11-22 12:54:58 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
13
- 2022-11-22 12:54:58 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
14
- 2022-11-22 12:54:58 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png DELETED
Binary file (27.6 kB)
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,200.0,200
5
- 3,200.0,200
6
- 4,200.0,200
7
- 5,200.0,200
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,200.0,200
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml DELETED
@@ -1,47 +0,0 @@
1
- general_cfg:
2
- algo_name: DuelingDQN
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_DuelingDQN_mp_20230407-170853
9
- max_steps: 200
10
- mode: test
11
- mp_backend: mp
12
- n_workers: 1
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 200
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.99
29
- hidden_dim: 256
30
- lr: 0.0001
31
- target_update: 4
32
- value_layers:
33
- - activation: relu
34
- layer_dim:
35
- - n_states
36
- - 256
37
- layer_type: linear
38
- - activation: relu
39
- layer_dim:
40
- - 256
41
- - 256
42
- layer_type: linear
43
- - activation: none
44
- layer_dim:
45
- - 256
46
- - n_actions
47
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt DELETED
@@ -1,53 +0,0 @@
1
- 2023-04-07 17:11:20 - r - INFO: - Hyperparameters:
2
- 2023-04-07 17:11:20 - r - INFO: - ================================================================================
3
- 2023-04-07 17:11:20 - r - INFO: - Name Value Type
4
- 2023-04-07 17:11:20 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-07 17:11:20 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-07 17:11:20 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-07 17:11:20 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-07 17:11:20 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-07 17:11:20 - r - INFO: - algo_name DuelingDQN <class 'str'>
10
- 2023-04-07 17:11:20 - r - INFO: - mode test <class 'str'>
11
- 2023-04-07 17:11:20 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-07 17:11:20 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-07 17:11:20 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-07 17:11:20 - r - INFO: - train_eps 200 <class 'int'>
15
- 2023-04-07 17:11:20 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-07 17:11:20 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-07 17:11:20 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-07 17:11:20 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-07 17:11:20 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-07 17:11:20 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_mp_20230407-170853 <class 'str'>
21
- 2023-04-07 17:11:20 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-07 17:11:20 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-07 17:11:20 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-07 17:11:20 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-07 17:11:20 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-07 17:11:20 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-07 17:11:20 - r - INFO: - gamma 0.99 <class 'float'>
28
- 2023-04-07 17:11:20 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-07 17:11:20 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-07 17:11:20 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-07 17:11:20 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-07 17:11:20 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-07 17:11:20 - r - INFO: - hidden_dim 256 <class 'int'>
34
- 2023-04-07 17:11:20 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120 <class 'str'>
35
- 2023-04-07 17:11:20 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/results <class 'str'>
36
- 2023-04-07 17:11:20 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/logs <class 'str'>
37
- 2023-04-07 17:11:20 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/traj <class 'str'>
38
- 2023-04-07 17:11:20 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/videos <class 'str'>
39
- 2023-04-07 17:11:20 - r - INFO: - ================================================================================
40
- 2023-04-07 17:11:20 - r - INFO: - n_states: 4, n_actions: 2
41
- 2023-04-07 17:11:20 - r - INFO: - Start testing!
42
- 2023-04-07 17:11:20 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
43
- 2023-04-07 17:11:20 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
44
- 2023-04-07 17:11:21 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
45
- 2023-04-07 17:11:21 - r - INFO: - Episode: 3/10, Reward: 190.000, Step: 190
46
- 2023-04-07 17:11:21 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
47
- 2023-04-07 17:11:21 - r - INFO: - Episode: 5/10, Reward: 187.000, Step: 187
48
- 2023-04-07 17:11:21 - r - INFO: - Episode: 6/10, Reward: 182.000, Step: 182
49
- 2023-04-07 17:11:21 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
50
- 2023-04-07 17:11:21 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
51
- 2023-04-07 17:11:21 - r - INFO: - Episode: 9/10, Reward: 196.000, Step: 196
52
- 2023-04-07 17:11:21 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
53
- 2023-04-07 17:11:21 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png DELETED
Binary file (45.2 kB)
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,200.0,200
3
- 1,200.0,200
4
- 2,190.0,190
5
- 3,200.0,200
6
- 4,187.0,187
7
- 5,182.0,182
8
- 6,200.0,200
9
- 7,200.0,200
10
- 8,196.0,196
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml DELETED
@@ -1,47 +0,0 @@
1
- general_cfg:
2
- algo_name: DuelingDQN
3
- device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: true
8
- load_path: Train_CartPole-v1_DuelingDQN_20230407-153236
9
- max_steps: 200
10
- mode: test
11
- mp_backend: ray
12
- n_workers: 1
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
- seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 400
21
- wrapper: null
22
- algo_cfg:
23
- batch_size: 64
24
- buffer_size: 100000
25
- epsilon_decay: 500
26
- epsilon_end: 0.01
27
- epsilon_start: 0.95
28
- gamma: 0.99
29
- hidden_dim: 256
30
- lr: 0.0001
31
- target_update: 4
32
- value_layers:
33
- - activation: relu
34
- layer_dim:
35
- - n_states
36
- - 256
37
- layer_type: linear
38
- - activation: relu
39
- layer_dim:
40
- - 256
41
- - 256
42
- layer_type: linear
43
- - activation: none
44
- layer_dim:
45
- - 256
46
- - n_actions
47
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt DELETED
@@ -1,53 +0,0 @@
1
- 2023-04-07 16:52:08 - r - INFO: - Hyperparameters:
2
- 2023-04-07 16:52:08 - r - INFO: - ================================================================================
3
- 2023-04-07 16:52:08 - r - INFO: - Name Value Type
4
- 2023-04-07 16:52:08 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-07 16:52:08 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-07 16:52:08 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-07 16:52:08 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-07 16:52:08 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-07 16:52:08 - r - INFO: - algo_name DuelingDQN <class 'str'>
10
- 2023-04-07 16:52:08 - r - INFO: - mode test <class 'str'>
11
- 2023-04-07 16:52:08 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-07 16:52:08 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-07 16:52:08 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-07 16:52:08 - r - INFO: - train_eps 400 <class 'int'>
15
- 2023-04-07 16:52:08 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-07 16:52:08 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-07 16:52:08 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-07 16:52:08 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-07 16:52:08 - r - INFO: - load_checkpoint 1 <class 'bool'>
20
- 2023-04-07 16:52:08 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_20230407-153236 <class 'str'>
21
- 2023-04-07 16:52:08 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-07 16:52:08 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-07 16:52:08 - r - INFO: - n_workers 1 <class 'int'>
24
- 2023-04-07 16:52:08 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-07 16:52:08 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-07 16:52:08 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-07 16:52:08 - r - INFO: - gamma 0.99 <class 'float'>
28
- 2023-04-07 16:52:08 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-07 16:52:08 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-07 16:52:08 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-07 16:52:08 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-07 16:52:08 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-07 16:52:08 - r - INFO: - hidden_dim 256 <class 'int'>
34
- 2023-04-07 16:52:08 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208 <class 'str'>
35
- 2023-04-07 16:52:08 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/results <class 'str'>
36
- 2023-04-07 16:52:08 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/logs <class 'str'>
37
- 2023-04-07 16:52:08 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/traj <class 'str'>
38
- 2023-04-07 16:52:08 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/videos <class 'str'>
39
- 2023-04-07 16:52:08 - r - INFO: - ================================================================================
40
- 2023-04-07 16:52:08 - r - INFO: - n_states: 4, n_actions: 2
41
- 2023-04-07 16:52:08 - r - INFO: - Start testing!
42
- 2023-04-07 16:52:08 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
43
- 2023-04-07 16:52:08 - r - INFO: - Episode: 1/10, Reward: 171.000, Step: 171
44
- 2023-04-07 16:52:08 - r - INFO: - Episode: 2/10, Reward: 185.000, Step: 185
45
- 2023-04-07 16:52:08 - r - INFO: - Episode: 3/10, Reward: 159.000, Step: 159
46
- 2023-04-07 16:52:08 - r - INFO: - Episode: 4/10, Reward: 155.000, Step: 155
47
- 2023-04-07 16:52:08 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
48
- 2023-04-07 16:52:08 - r - INFO: - Episode: 6/10, Reward: 120.000, Step: 120
49
- 2023-04-07 16:52:08 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
50
- 2023-04-07 16:52:08 - r - INFO: - Episode: 8/10, Reward: 187.000, Step: 187
51
- 2023-04-07 16:52:08 - r - INFO: - Episode: 9/10, Reward: 154.000, Step: 154
52
- 2023-04-07 16:52:08 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
53
- 2023-04-07 16:52:08 - r - INFO: - Finish testing!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png DELETED
Binary file (45.9 kB)
 
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv DELETED
@@ -1,11 +0,0 @@
1
- episodes,rewards,steps
2
- 0,171.0,171
3
- 1,185.0,185
4
- 2,159.0,159
5
- 3,155.0,155
6
- 4,200.0,200
7
- 5,120.0,120
8
- 6,200.0,200
9
- 7,187.0,187
10
- 8,154.0,154
11
- 9,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml DELETED
@@ -1,41 +0,0 @@
1
- general_cfg:
2
- algo_name: DuelingDQN
3
- device: cuda
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
- load_checkpoint: false
8
- load_path: Train_CartPole-v1_DQN_20221026-054757
9
- max_steps: 200
10
- mode: train
11
- save_fig: true
12
- seed: 1
13
- show_fig: false
14
- test_eps: 10
15
- train_eps: 100
16
- algo_cfg:
17
- batch_size: 64
18
- buffer_size: 100000
19
- epsilon_decay: 500
20
- epsilon_end: 0.01
21
- epsilon_start: 0.95
22
- gamma: 0.99
23
- hidden_dim: 256
24
- lr: 0.0001
25
- target_update: 4
26
- value_layers:
27
- - activation: relu
28
- layer_dim:
29
- - n_states
30
- - 256
31
- layer_type: linear
32
- - activation: relu
33
- layer_dim:
34
- - 256
35
- - 256
36
- layer_type: linear
37
- - activation: none
38
- layer_dim:
39
- - 256
40
- - n_actions
41
- layer_type: linear
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt DELETED
@@ -1,119 +0,0 @@
1
- 2022-11-22 12:54:03 - r - INFO: - n_states: 4, n_actions: 2
2
- 2022-11-22 12:54:06 - r - INFO: - Start training!
3
- 2022-11-22 12:54:06 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda
4
- 2022-11-22 12:54:06 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18
5
- 2022-11-22 12:54:06 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35
6
- 2022-11-22 12:54:06 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13
7
- 2022-11-22 12:54:06 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32
8
- 2022-11-22 12:54:06 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16
9
- 2022-11-22 12:54:06 - r - INFO: - Current episode 5 has the best eval reward: 9.100
10
- 2022-11-22 12:54:06 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9
11
- 2022-11-22 12:54:06 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12
12
- 2022-11-22 12:54:06 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16
13
- 2022-11-22 12:54:06 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14
14
- 2022-11-22 12:54:06 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12
15
- 2022-11-22 12:54:06 - r - INFO: - Current episode 10 has the best eval reward: 9.200
16
- 2022-11-22 12:54:06 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13
17
- 2022-11-22 12:54:06 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14
18
- 2022-11-22 12:54:06 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19
19
- 2022-11-22 12:54:06 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9
20
- 2022-11-22 12:54:06 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15
21
- 2022-11-22 12:54:06 - r - INFO: - Current episode 15 has the best eval reward: 9.300
22
- 2022-11-22 12:54:06 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12
23
- 2022-11-22 12:54:06 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11
24
- 2022-11-22 12:54:06 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9
25
- 2022-11-22 12:54:07 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13
26
- 2022-11-22 12:54:07 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17
27
- 2022-11-22 12:54:07 - r - INFO: - Current episode 20 has the best eval reward: 9.900
28
- 2022-11-22 12:54:07 - r - INFO: - Episode: 21/100, Reward: 14.000, Step: 14
29
- 2022-11-22 12:54:07 - r - INFO: - Episode: 22/100, Reward: 20.000, Step: 20
30
- 2022-11-22 12:54:07 - r - INFO: - Episode: 23/100, Reward: 11.000, Step: 11
31
- 2022-11-22 12:54:07 - r - INFO: - Episode: 24/100, Reward: 24.000, Step: 24
32
- 2022-11-22 12:54:07 - r - INFO: - Episode: 25/100, Reward: 11.000, Step: 11
33
- 2022-11-22 12:54:07 - r - INFO: - Episode: 26/100, Reward: 11.000, Step: 11
34
- 2022-11-22 12:54:07 - r - INFO: - Episode: 27/100, Reward: 11.000, Step: 11
35
- 2022-11-22 12:54:07 - r - INFO: - Episode: 28/100, Reward: 13.000, Step: 13
36
- 2022-11-22 12:54:07 - r - INFO: - Episode: 29/100, Reward: 11.000, Step: 11
37
- 2022-11-22 12:54:07 - r - INFO: - Episode: 30/100, Reward: 8.000, Step: 8
38
- 2022-11-22 12:54:07 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13
39
- 2022-11-22 12:54:07 - r - INFO: - Episode: 32/100, Reward: 9.000, Step: 9
40
- 2022-11-22 12:54:07 - r - INFO: - Episode: 33/100, Reward: 34.000, Step: 34
41
- 2022-11-22 12:54:07 - r - INFO: - Episode: 34/100, Reward: 10.000, Step: 10
42
- 2022-11-22 12:54:07 - r - INFO: - Episode: 35/100, Reward: 10.000, Step: 10
43
- 2022-11-22 12:54:07 - r - INFO: - Episode: 36/100, Reward: 10.000, Step: 10
44
- 2022-11-22 12:54:07 - r - INFO: - Episode: 37/100, Reward: 34.000, Step: 34
45
- 2022-11-22 12:54:07 - r - INFO: - Episode: 38/100, Reward: 35.000, Step: 35
46
- 2022-11-22 12:54:07 - r - INFO: - Episode: 39/100, Reward: 32.000, Step: 32
47
- 2022-11-22 12:54:08 - r - INFO: - Episode: 40/100, Reward: 37.000, Step: 37
48
- 2022-11-22 12:54:08 - r - INFO: - Current episode 40 has the best eval reward: 27.500
49
- 2022-11-22 12:54:08 - r - INFO: - Episode: 41/100, Reward: 29.000, Step: 29
50
- 2022-11-22 12:54:08 - r - INFO: - Episode: 42/100, Reward: 52.000, Step: 52
51
- 2022-11-22 12:54:08 - r - INFO: - Episode: 43/100, Reward: 54.000, Step: 54
52
- 2022-11-22 12:54:08 - r - INFO: - Episode: 44/100, Reward: 90.000, Step: 90
53
- 2022-11-22 12:54:08 - r - INFO: - Episode: 45/100, Reward: 91.000, Step: 91
54
- 2022-11-22 12:54:09 - r - INFO: - Current episode 45 has the best eval reward: 87.500
55
- 2022-11-22 12:54:09 - r - INFO: - Episode: 46/100, Reward: 51.000, Step: 51
56
- 2022-11-22 12:54:09 - r - INFO: - Episode: 47/100, Reward: 101.000, Step: 101
57
- 2022-11-22 12:54:09 - r - INFO: - Episode: 48/100, Reward: 67.000, Step: 67
58
- 2022-11-22 12:54:09 - r - INFO: - Episode: 49/100, Reward: 103.000, Step: 103
59
- 2022-11-22 12:54:10 - r - INFO: - Episode: 50/100, Reward: 45.000, Step: 45
60
- 2022-11-22 12:54:10 - r - INFO: - Episode: 51/100, Reward: 137.000, Step: 137
61
- 2022-11-22 12:54:10 - r - INFO: - Episode: 52/100, Reward: 47.000, Step: 47
62
- 2022-11-22 12:54:10 - r - INFO: - Episode: 53/100, Reward: 89.000, Step: 89
63
- 2022-11-22 12:54:11 - r - INFO: - Episode: 54/100, Reward: 95.000, Step: 95
64
- 2022-11-22 12:54:11 - r - INFO: - Episode: 55/100, Reward: 55.000, Step: 55
65
- 2022-11-22 12:54:11 - r - INFO: - Episode: 56/100, Reward: 92.000, Step: 92
66
- 2022-11-22 12:54:12 - r - INFO: - Episode: 57/100, Reward: 155.000, Step: 155
67
- 2022-11-22 12:54:12 - r - INFO: - Episode: 58/100, Reward: 125.000, Step: 125
68
- 2022-11-22 12:54:12 - r - INFO: - Episode: 59/100, Reward: 152.000, Step: 152
69
- 2022-11-22 12:54:13 - r - INFO: - Episode: 60/100, Reward: 199.000, Step: 199
70
- 2022-11-22 12:54:13 - r - INFO: - Current episode 60 has the best eval reward: 179.100
71
- 2022-11-22 12:54:14 - r - INFO: - Episode: 61/100, Reward: 88.000, Step: 88
72
- 2022-11-22 12:54:14 - r - INFO: - Episode: 62/100, Reward: 200.000, Step: 200
73
- 2022-11-22 12:54:14 - r - INFO: - Episode: 63/100, Reward: 176.000, Step: 176
74
- 2022-11-22 12:54:15 - r - INFO: - Episode: 64/100, Reward: 200.000, Step: 200
75
- 2022-11-22 12:54:15 - r - INFO: - Episode: 65/100, Reward: 200.000, Step: 200
76
- 2022-11-22 12:54:16 - r - INFO: - Current episode 65 has the best eval reward: 198.700
77
- 2022-11-22 12:54:16 - r - INFO: - Episode: 66/100, Reward: 193.000, Step: 193
78
- 2022-11-22 12:54:17 - r - INFO: - Episode: 67/100, Reward: 200.000, Step: 200
79
- 2022-11-22 12:54:17 - r - INFO: - Episode: 68/100, Reward: 200.000, Step: 200
80
- 2022-11-22 12:54:18 - r - INFO: - Episode: 69/100, Reward: 200.000, Step: 200
81
- 2022-11-22 12:54:18 - r - INFO: - Episode: 70/100, Reward: 200.000, Step: 200
82
- 2022-11-22 12:54:19 - r - INFO: - Current episode 70 has the best eval reward: 200.000
83
- 2022-11-22 12:54:20 - r - INFO: - Episode: 71/100, Reward: 200.000, Step: 200
84
- 2022-11-22 12:54:20 - r - INFO: - Episode: 72/100, Reward: 200.000, Step: 200
85
- 2022-11-22 12:54:20 - r - INFO: - Episode: 73/100, Reward: 200.000, Step: 200
86
- 2022-11-22 12:54:21 - r - INFO: - Episode: 74/100, Reward: 200.000, Step: 200
87
- 2022-11-22 12:54:21 - r - INFO: - Episode: 75/100, Reward: 200.000, Step: 200
88
- 2022-11-22 12:54:22 - r - INFO: - Current episode 75 has the best eval reward: 200.000
89
- 2022-11-22 12:54:23 - r - INFO: - Episode: 76/100, Reward: 200.000, Step: 200
90
- 2022-11-22 12:54:23 - r - INFO: - Episode: 77/100, Reward: 200.000, Step: 200
91
- 2022-11-22 12:54:24 - r - INFO: - Episode: 78/100, Reward: 200.000, Step: 200
92
- 2022-11-22 12:54:24 - r - INFO: - Episode: 79/100, Reward: 200.000, Step: 200
93
- 2022-11-22 12:54:24 - r - INFO: - Episode: 80/100, Reward: 200.000, Step: 200
94
- 2022-11-22 12:54:25 - r - INFO: - Current episode 80 has the best eval reward: 200.000
95
- 2022-11-22 12:54:26 - r - INFO: - Episode: 81/100, Reward: 200.000, Step: 200
96
- 2022-11-22 12:54:26 - r - INFO: - Episode: 82/100, Reward: 200.000, Step: 200
97
- 2022-11-22 12:54:27 - r - INFO: - Episode: 83/100, Reward: 200.000, Step: 200
98
- 2022-11-22 12:54:27 - r - INFO: - Episode: 84/100, Reward: 200.000, Step: 200
99
- 2022-11-22 12:54:27 - r - INFO: - Episode: 85/100, Reward: 200.000, Step: 200
100
- 2022-11-22 12:54:28 - r - INFO: - Current episode 85 has the best eval reward: 200.000
101
- 2022-11-22 12:54:29 - r - INFO: - Episode: 86/100, Reward: 200.000, Step: 200
102
- 2022-11-22 12:54:29 - r - INFO: - Episode: 87/100, Reward: 200.000, Step: 200
103
- 2022-11-22 12:54:30 - r - INFO: - Episode: 88/100, Reward: 200.000, Step: 200
104
- 2022-11-22 12:54:30 - r - INFO: - Episode: 89/100, Reward: 200.000, Step: 200
105
- 2022-11-22 12:54:30 - r - INFO: - Episode: 90/100, Reward: 200.000, Step: 200
106
- 2022-11-22 12:54:31 - r - INFO: - Current episode 90 has the best eval reward: 200.000
107
- 2022-11-22 12:54:32 - r - INFO: - Episode: 91/100, Reward: 200.000, Step: 200
108
- 2022-11-22 12:54:32 - r - INFO: - Episode: 92/100, Reward: 200.000, Step: 200
109
- 2022-11-22 12:54:33 - r - INFO: - Episode: 93/100, Reward: 200.000, Step: 200
110
- 2022-11-22 12:54:33 - r - INFO: - Episode: 94/100, Reward: 200.000, Step: 200
111
- 2022-11-22 12:54:34 - r - INFO: - Episode: 95/100, Reward: 200.000, Step: 200
112
- 2022-11-22 12:54:34 - r - INFO: - Current episode 95 has the best eval reward: 200.000
113
- 2022-11-22 12:54:35 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200
114
- 2022-11-22 12:54:35 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200
115
- 2022-11-22 12:54:36 - r - INFO: - Episode: 98/100, Reward: 200.000, Step: 200
116
- 2022-11-22 12:54:36 - r - INFO: - Episode: 99/100, Reward: 200.000, Step: 200
117
- 2022-11-22 12:54:37 - r - INFO: - Episode: 100/100, Reward: 200.000, Step: 200
118
- 2022-11-22 12:54:37 - r - INFO: - Current episode 100 has the best eval reward: 200.000
119
- 2022-11-22 12:54:37 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png DELETED
Binary file (47.5 kB)
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv DELETED
@@ -1,101 +0,0 @@
1
- episodes,rewards,steps
2
- 0,18.0,18
3
- 1,35.0,35
4
- 2,13.0,13
5
- 3,32.0,32
6
- 4,16.0,16
7
- 5,9.0,9
8
- 6,12.0,12
9
- 7,16.0,16
10
- 8,14.0,14
11
- 9,12.0,12
12
- 10,13.0,13
13
- 11,14.0,14
14
- 12,19.0,19
15
- 13,9.0,9
16
- 14,15.0,15
17
- 15,12.0,12
18
- 16,11.0,11
19
- 17,9.0,9
20
- 18,13.0,13
21
- 19,17.0,17
22
- 20,14.0,14
23
- 21,20.0,20
24
- 22,11.0,11
25
- 23,24.0,24
26
- 24,11.0,11
27
- 25,11.0,11
28
- 26,11.0,11
29
- 27,13.0,13
30
- 28,11.0,11
31
- 29,8.0,8
32
- 30,13.0,13
33
- 31,9.0,9
34
- 32,34.0,34
35
- 33,10.0,10
36
- 34,10.0,10
37
- 35,10.0,10
38
- 36,34.0,34
39
- 37,35.0,35
40
- 38,32.0,32
41
- 39,37.0,37
42
- 40,29.0,29
43
- 41,52.0,52
44
- 42,54.0,54
45
- 43,90.0,90
46
- 44,91.0,91
47
- 45,51.0,51
48
- 46,101.0,101
49
- 47,67.0,67
50
- 48,103.0,103
51
- 49,45.0,45
52
- 50,137.0,137
53
- 51,47.0,47
54
- 52,89.0,89
55
- 53,95.0,95
56
- 54,55.0,55
57
- 55,92.0,92
58
- 56,155.0,155
59
- 57,125.0,125
60
- 58,152.0,152
61
- 59,199.0,199
62
- 60,88.0,88
63
- 61,200.0,200
64
- 62,176.0,176
65
- 63,200.0,200
66
- 64,200.0,200
67
- 65,193.0,193
68
- 66,200.0,200
69
- 67,200.0,200
70
- 68,200.0,200
71
- 69,200.0,200
72
- 70,200.0,200
73
- 71,200.0,200
74
- 72,200.0,200
75
- 73,200.0,200
76
- 74,200.0,200
77
- 75,200.0,200
78
- 76,200.0,200
79
- 77,200.0,200
80
- 78,200.0,200
81
- 79,200.0,200
82
- 80,200.0,200
83
- 81,200.0,200
84
- 82,200.0,200
85
- 83,200.0,200
86
- 84,200.0,200
87
- 85,200.0,200
88
- 86,200.0,200
89
- 87,200.0,200
90
- 88,200.0,200
91
- 89,200.0,200
92
- 90,200.0,200
93
- 91,200.0,200
94
- 92,200.0,200
95
- 93,200.0,200
96
- 94,200.0,200
97
- 95,200.0,200
98
- 96,200.0,200
99
- 97,200.0,200
100
- 98,200.0,200
101
- 99,200.0,200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt DELETED
@@ -1,43 +0,0 @@
1
- 2023-04-07 17:08:53 - r - INFO: - Hyperparameters:
2
- 2023-04-07 17:08:53 - r - INFO: - ================================================================================
3
- 2023-04-07 17:08:53 - r - INFO: - Name Value Type
4
- 2023-04-07 17:08:53 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-07 17:08:53 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-07 17:08:53 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-07 17:08:53 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-07 17:08:53 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-07 17:08:53 - r - INFO: - algo_name DuelingDQN <class 'str'>
10
- 2023-04-07 17:08:53 - r - INFO: - mode train <class 'str'>
11
- 2023-04-07 17:08:53 - r - INFO: - mp_backend mp <class 'str'>
12
- 2023-04-07 17:08:53 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-07 17:08:53 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-07 17:08:53 - r - INFO: - train_eps 200 <class 'int'>
15
- 2023-04-07 17:08:53 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-07 17:08:53 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-07 17:08:53 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-07 17:08:53 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-07 17:08:53 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-07 17:08:53 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
21
- 2023-04-07 17:08:53 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-07 17:08:53 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-07 17:08:53 - r - INFO: - n_workers 4 <class 'int'>
24
- 2023-04-07 17:08:53 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-07 17:08:53 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-07 17:08:53 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-07 17:08:53 - r - INFO: - gamma 0.99 <class 'float'>
28
- 2023-04-07 17:08:53 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-07 17:08:53 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-07 17:08:53 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-07 17:08:53 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-07 17:08:53 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-07 17:08:53 - r - INFO: - hidden_dim 256 <class 'int'>
34
- 2023-04-07 17:08:53 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853 <class 'str'>
35
- 2023-04-07 17:08:53 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/results <class 'str'>
36
- 2023-04-07 17:08:53 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/logs <class 'str'>
37
- 2023-04-07 17:08:53 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/traj <class 'str'>
38
- 2023-04-07 17:08:53 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/videos <class 'str'>
39
- 2023-04-07 17:08:53 - r - INFO: - ================================================================================
40
- 2023-04-07 17:08:53 - r - INFO: - n_states: 4, n_actions: 2
41
- 2023-04-07 17:08:53 - r - INFO: - Start training!
42
- 2023-04-07 17:08:53 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
43
- 2023-04-07 17:10:11 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:87aab291b33d6423c3c54eced436183398700a290427c1913be9d65f5503b5ae
3
- size 537607
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png DELETED
Binary file (55.3 kB)
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv DELETED
@@ -1,202 +0,0 @@
1
- episodes,rewards
2
- 0,17.0
3
- 1,16.0
4
- 2,24.0
5
- 3,38.0
6
- 4,18.0
7
- 5,18.0
8
- 6,19.0
9
- 7,17.0
10
- 8,27.0
11
- 9,16.0
12
- 10,15.0
13
- 11,31.0
14
- 12,33.0
15
- 13,13.0
16
- 14,9.0
17
- 15,34.0
18
- 16,32.0
19
- 17,34.0
20
- 18,12.0
21
- 19,13.0
22
- 20,11.0
23
- 21,9.0
24
- 22,9.0
25
- 23,11.0
26
- 24,38.0
27
- 25,13.0
28
- 26,20.0
29
- 27,10.0
30
- 28,33.0
31
- 29,15.0
32
- 30,32.0
33
- 31,11.0
34
- 32,13.0
35
- 33,12.0
36
- 34,9.0
37
- 35,13.0
38
- 36,14.0
39
- 37,21.0
40
- 38,48.0
41
- 39,19.0
42
- 40,65.0
43
- 41,28.0
44
- 42,15.0
45
- 43,11.0
46
- 44,54.0
47
- 45,26.0
48
- 46,37.0
49
- 47,40.0
50
- 48,54.0
51
- 49,54.0
52
- 50,50.0
53
- 51,84.0
54
- 52,55.0
55
- 53,43.0
56
- 54,45.0
57
- 55,48.0
58
- 56,88.0
59
- 57,41.0
60
- 58,46.0
61
- 59,61.0
62
- 60,32.0
63
- 61,53.0
64
- 62,59.0
65
- 63,49.0
66
- 64,60.0
67
- 65,35.0
68
- 66,82.0
69
- 67,50.0
70
- 68,108.0
71
- 69,121.0
72
- 70,113.0
73
- 71,67.0
74
- 72,87.0
75
- 73,96.0
76
- 74,181.0
77
- 75,62.0
78
- 76,137.0
79
- 77,175.0
80
- 78,123.0
81
- 79,149.0
82
- 80,172.0
83
- 81,200.0
84
- 82,156.0
85
- 83,146.0
86
- 84,200.0
87
- 85,200.0
88
- 86,200.0
89
- 87,128.0
90
- 88,188.0
91
- 89,200.0
92
- 90,200.0
93
- 91,200.0
94
- 92,200.0
95
- 93,200.0
96
- 94,200.0
97
- 95,200.0
98
- 96,200.0
99
- 97,200.0
100
- 98,195.0
101
- 99,200.0
102
- 100,200.0
103
- 101,196.0
104
- 102,200.0
105
- 103,200.0
106
- 104,192.0
107
- 105,200.0
108
- 106,190.0
109
- 107,200.0
110
- 108,200.0
111
- 109,200.0
112
- 110,197.0
113
- 111,200.0
114
- 112,200.0
115
- 113,200.0
116
- 114,200.0
117
- 115,200.0
118
- 116,200.0
119
- 117,200.0
120
- 118,200.0
121
- 119,200.0
122
- 120,200.0
123
- 121,200.0
124
- 122,200.0
125
- 123,200.0
126
- 124,200.0
127
- 125,200.0
128
- 126,200.0
129
- 127,200.0
130
- 128,200.0
131
- 129,200.0
132
- 130,200.0
133
- 131,200.0
134
- 132,200.0
135
- 133,200.0
136
- 134,200.0
137
- 135,200.0
138
- 136,200.0
139
- 137,197.0
140
- 138,200.0
141
- 139,200.0
142
- 140,200.0
143
- 141,200.0
144
- 142,200.0
145
- 143,200.0
146
- 144,21.0
147
- 145,193.0
148
- 146,123.0
149
- 147,194.0
150
- 148,9.0
151
- 149,9.0
152
- 150,48.0
153
- 151,200.0
154
- 152,200.0
155
- 153,200.0
156
- 154,200.0
157
- 155,200.0
158
- 156,200.0
159
- 157,200.0
160
- 158,200.0
161
- 159,200.0
162
- 160,200.0
163
- 161,200.0
164
- 162,200.0
165
- 163,200.0
166
- 164,200.0
167
- 165,200.0
168
- 166,200.0
169
- 167,200.0
170
- 168,200.0
171
- 169,200.0
172
- 170,200.0
173
- 171,200.0
174
- 172,200.0
175
- 173,200.0
176
- 174,200.0
177
- 175,200.0
178
- 176,200.0
179
- 177,200.0
180
- 178,200.0
181
- 179,200.0
182
- 180,200.0
183
- 181,200.0
184
- 182,200.0
185
- 183,200.0
186
- 184,200.0
187
- 185,200.0
188
- 186,200.0
189
- 187,200.0
190
- 188,200.0
191
- 189,200.0
192
- 190,200.0
193
- 191,200.0
194
- 192,200.0
195
- 193,200.0
196
- 194,200.0
197
- 195,200.0
198
- 196,200.0
199
- 197,200.0
200
- 198,200.0
201
- 199,200.0
202
- 200,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt DELETED
@@ -1,43 +0,0 @@
1
- 2023-04-07 15:32:36 - r - INFO: - Hyperparameters:
2
- 2023-04-07 15:32:36 - r - INFO: - ================================================================================
3
- 2023-04-07 15:32:36 - r - INFO: - Name Value Type
4
- 2023-04-07 15:32:36 - r - INFO: - env_name CartPole-v1 <class 'str'>
5
- 2023-04-07 15:32:36 - r - INFO: - new_step_api 1 <class 'bool'>
6
- 2023-04-07 15:32:36 - r - INFO: - wrapper None <class 'str'>
7
- 2023-04-07 15:32:36 - r - INFO: - render 0 <class 'bool'>
8
- 2023-04-07 15:32:36 - r - INFO: - render_mode human <class 'str'>
9
- 2023-04-07 15:32:36 - r - INFO: - algo_name DuelingDQN <class 'str'>
10
- 2023-04-07 15:32:36 - r - INFO: - mode train <class 'str'>
11
- 2023-04-07 15:32:36 - r - INFO: - mp_backend ray <class 'str'>
12
- 2023-04-07 15:32:36 - r - INFO: - seed 1 <class 'int'>
13
- 2023-04-07 15:32:36 - r - INFO: - device cpu <class 'str'>
14
- 2023-04-07 15:32:36 - r - INFO: - train_eps 400 <class 'int'>
15
- 2023-04-07 15:32:36 - r - INFO: - test_eps 10 <class 'int'>
16
- 2023-04-07 15:32:36 - r - INFO: - eval_eps 10 <class 'int'>
17
- 2023-04-07 15:32:36 - r - INFO: - eval_per_episode 5 <class 'int'>
18
- 2023-04-07 15:32:36 - r - INFO: - max_steps 200 <class 'int'>
19
- 2023-04-07 15:32:36 - r - INFO: - load_checkpoint 0 <class 'bool'>
20
- 2023-04-07 15:32:36 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
21
- 2023-04-07 15:32:36 - r - INFO: - show_fig 0 <class 'bool'>
22
- 2023-04-07 15:32:36 - r - INFO: - save_fig 1 <class 'bool'>
23
- 2023-04-07 15:32:36 - r - INFO: - n_workers 2 <class 'int'>
24
- 2023-04-07 15:32:36 - r - INFO: - epsilon_start 0.95 <class 'float'>
25
- 2023-04-07 15:32:36 - r - INFO: - epsilon_end 0.01 <class 'float'>
26
- 2023-04-07 15:32:36 - r - INFO: - epsilon_decay 500 <class 'int'>
27
- 2023-04-07 15:32:36 - r - INFO: - gamma 0.99 <class 'float'>
28
- 2023-04-07 15:32:36 - r - INFO: - lr 0.0001 <class 'float'>
29
- 2023-04-07 15:32:36 - r - INFO: - buffer_size 100000 <class 'int'>
30
- 2023-04-07 15:32:36 - r - INFO: - batch_size 64 <class 'int'>
31
- 2023-04-07 15:32:36 - r - INFO: - target_update 4 <class 'int'>
32
- 2023-04-07 15:32:36 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
33
- 2023-04-07 15:32:36 - r - INFO: - hidden_dim 256 <class 'int'>
34
- 2023-04-07 15:32:36 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236 <class 'str'>
35
- 2023-04-07 15:32:36 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/results <class 'str'>
36
- 2023-04-07 15:32:36 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/logs <class 'str'>
37
- 2023-04-07 15:32:36 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/traj <class 'str'>
38
- 2023-04-07 15:32:36 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/videos <class 'str'>
39
- 2023-04-07 15:32:36 - r - INFO: - ================================================================================
40
- 2023-04-07 15:32:39 - r - INFO: - n_states: 4, n_actions: 2
41
- 2023-04-07 15:32:39 - r - INFO: - Start training!
42
- 2023-04-07 15:32:39 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
43
- 2023-04-07 15:40:31 - r - INFO: - Finish training!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:03f1262598e3d636dd22e3b2fc0dfe52bf7a55348d54f51f02a8410682ec5a18
3
- size 537607
 
 
 
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png DELETED
Binary file (62.6 kB)
 
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv DELETED
@@ -1,401 +0,0 @@
1
- episodes,rewards
2
- 0,18.0
3
- 1,18.0
4
- 2,19.0
5
- 3,28.0
6
- 4,17.0
7
- 5,15.0
8
- 6,13.0
9
- 7,15.0
10
- 8,38.0
11
- 9,31.0
12
- 10,11.0
13
- 11,31.0
14
- 12,10.0
15
- 13,17.0
16
- 14,14.0
17
- 15,13.0
18
- 16,21.0
19
- 17,15.0
20
- 18,9.0
21
- 19,10.0
22
- 20,22.0
23
- 21,19.0
24
- 22,11.0
25
- 23,13.0
26
- 24,20.0
27
- 25,15.0
28
- 26,14.0
29
- 27,12.0
30
- 28,10.0
31
- 29,11.0
32
- 30,12.0
33
- 31,14.0
34
- 32,9.0
35
- 33,10.0
36
- 34,16.0
37
- 35,13.0
38
- 36,15.0
39
- 37,12.0
40
- 38,14.0
41
- 39,10.0
42
- 40,14.0
43
- 41,10.0
44
- 42,11.0
45
- 43,16.0
46
- 44,16.0
47
- 45,12.0
48
- 46,15.0
49
- 47,19.0
50
- 48,15.0
51
- 49,20.0
52
- 50,15.0
53
- 51,11.0
54
- 52,13.0
55
- 53,12.0
56
- 54,12.0
57
- 55,12.0
58
- 56,12.0
59
- 57,12.0
60
- 58,11.0
61
- 59,10.0
62
- 60,13.0
63
- 61,11.0
64
- 62,12.0
65
- 63,9.0
66
- 64,11.0
67
- 65,11.0
68
- 66,10.0
69
- 67,9.0
70
- 68,11.0
71
- 69,11.0
72
- 70,11.0
73
- 71,12.0
74
- 72,10.0
75
- 73,12.0
76
- 74,9.0
77
- 75,10.0
78
- 76,9.0
79
- 77,10.0
80
- 78,9.0
81
- 79,10.0
82
- 80,11.0
83
- 81,9.0
84
- 82,12.0
85
- 83,11.0
86
- 84,12.0
87
- 85,10.0
88
- 86,9.0
89
- 87,11.0
90
- 88,9.0
91
- 89,9.0
92
- 90,10.0
93
- 91,15.0
94
- 92,11.0
95
- 93,9.0
96
- 94,10.0
97
- 95,16.0
98
- 96,13.0
99
- 97,9.0
100
- 98,10.0
101
- 99,10.0
102
- 100,13.0
103
- 101,11.0
104
- 102,10.0
105
- 103,9.0
106
- 104,13.0
107
- 105,16.0
108
- 106,12.0
109
- 107,9.0
110
- 108,11.0
111
- 109,9.0
112
- 110,13.0
113
- 111,11.0
114
- 112,18.0
115
- 113,13.0
116
- 114,9.0
117
- 115,12.0
118
- 116,10.0
119
- 117,10.0
120
- 118,10.0
121
- 119,13.0
122
- 120,10.0
123
- 121,11.0
124
- 122,10.0
125
- 123,10.0
126
- 124,9.0
127
- 125,10.0
128
- 126,11.0
129
- 127,14.0
130
- 128,12.0
131
- 129,9.0
132
- 130,11.0
133
- 131,14.0
134
- 132,11.0
135
- 133,10.0
136
- 134,13.0
137
- 135,9.0
138
- 136,11.0
139
- 137,11.0
140
- 138,11.0
141
- 139,9.0
142
- 140,10.0
143
- 141,9.0
144
- 142,9.0
145
- 143,12.0
146
- 144,9.0
147
- 145,10.0
148
- 146,9.0
149
- 147,10.0
150
- 148,9.0
151
- 149,10.0
152
- 150,9.0
153
- 151,12.0
154
- 152,9.0
155
- 153,9.0
156
- 154,10.0
157
- 155,9.0
158
- 156,10.0
159
- 157,13.0
160
- 158,14.0
161
- 159,10.0
162
- 160,12.0
163
- 161,11.0
164
- 162,10.0
165
- 163,11.0
166
- 164,11.0
167
- 165,9.0
168
- 166,31.0
169
- 167,39.0
170
- 168,18.0
171
- 169,24.0
172
- 170,18.0
173
- 171,18.0
174
- 172,24.0
175
- 173,16.0
176
- 174,25.0
177
- 175,23.0
178
- 176,26.0
179
- 177,23.0
180
- 178,26.0
181
- 179,21.0
182
- 180,28.0
183
- 181,20.0
184
- 182,22.0
185
- 183,30.0
186
- 184,27.0
187
- 185,34.0
188
- 186,31.0
189
- 187,39.0
190
- 188,29.0
191
- 189,29.0
192
- 190,37.0
193
- 191,27.0
194
- 192,36.0
195
- 193,34.0
196
- 194,46.0
197
- 195,35.0
198
- 196,52.0
199
- 197,32.0
200
- 198,30.0
201
- 199,69.0
202
- 200,38.0
203
- 201,39.0
204
- 202,57.0
205
- 203,38.0
206
- 204,68.0
207
- 205,47.0
208
- 206,45.0
209
- 207,63.0
210
- 208,47.0
211
- 209,86.0
212
- 210,67.0
213
- 211,60.0
214
- 212,48.0
215
- 213,55.0
216
- 214,95.0
217
- 215,58.0
218
- 216,70.0
219
- 217,58.0
220
- 218,42.0
221
- 219,69.0
222
- 220,47.0
223
- 221,109.0
224
- 222,70.0
225
- 223,80.0
226
- 224,77.0
227
- 225,61.0
228
- 226,72.0
229
- 227,55.0
230
- 228,77.0
231
- 229,61.0
232
- 230,79.0
233
- 231,66.0
234
- 232,68.0
235
- 233,99.0
236
- 234,143.0
237
- 235,82.0
238
- 236,85.0
239
- 237,103.0
240
- 238,99.0
241
- 239,93.0
242
- 240,100.0
243
- 241,101.0
244
- 242,151.0
245
- 243,195.0
246
- 244,100.0
247
- 245,99.0
248
- 246,127.0
249
- 247,105.0
250
- 248,127.0
251
- 249,142.0
252
- 250,169.0
253
- 251,108.0
254
- 252,128.0
255
- 253,123.0
256
- 254,134.0
257
- 255,126.0
258
- 256,114.0
259
- 257,200.0
260
- 258,123.0
261
- 259,159.0
262
- 260,125.0
263
- 261,142.0
264
- 262,178.0
265
- 263,96.0
266
- 264,200.0
267
- 265,200.0
268
- 266,113.0
269
- 267,90.0
270
- 268,200.0
271
- 269,122.0
272
- 270,140.0
273
- 271,116.0
274
- 272,128.0
275
- 273,190.0
276
- 274,170.0
277
- 275,96.0
278
- 276,126.0
279
- 277,200.0
280
- 278,88.0
281
- 279,76.0
282
- 280,74.0
283
- 281,84.0
284
- 282,130.0
285
- 283,200.0
286
- 284,86.0
287
- 285,153.0
288
- 286,200.0
289
- 287,59.0
290
- 288,135.0
291
- 289,62.0
292
- 290,200.0
293
- 291,182.0
294
- 292,138.0
295
- 293,200.0
296
- 294,118.0
297
- 295,50.0
298
- 296,74.0
299
- 297,62.0
300
- 298,200.0
301
- 299,124.0
302
- 300,111.0
303
- 301,61.0
304
- 302,132.0
305
- 303,200.0
306
- 304,80.0
307
- 305,60.0
308
- 306,77.0
309
- 307,47.0
310
- 308,80.0
311
- 309,64.0
312
- 310,96.0
313
- 311,200.0
314
- 312,200.0
315
- 313,133.0
316
- 314,200.0
317
- 315,188.0
318
- 316,132.0
319
- 317,150.0
320
- 318,135.0
321
- 319,184.0
322
- 320,138.0
323
- 321,176.0
324
- 322,200.0
325
- 323,161.0
326
- 324,158.0
327
- 325,142.0
328
- 326,133.0
329
- 327,151.0
330
- 328,143.0
331
- 329,160.0
332
- 330,150.0
333
- 331,134.0
334
- 332,147.0
335
- 333,132.0
336
- 334,143.0
337
- 335,137.0
338
- 336,155.0
339
- 337,138.0
340
- 338,138.0
341
- 339,130.0
342
- 340,148.0
343
- 341,146.0
344
- 342,152.0
345
- 343,135.0
346
- 344,175.0
347
- 345,153.0
348
- 346,155.0
349
- 347,131.0
350
- 348,156.0
351
- 349,138.0
352
- 350,151.0
353
- 351,162.0
354
- 352,200.0
355
- 353,175.0
356
- 354,156.0
357
- 355,145.0
358
- 356,168.0
359
- 357,200.0
360
- 358,181.0
361
- 359,145.0
362
- 360,189.0
363
- 361,200.0
364
- 362,144.0
365
- 363,200.0
366
- 364,178.0
367
- 365,200.0
368
- 366,179.0
369
- 367,200.0
370
- 368,177.0
371
- 369,200.0
372
- 370,185.0
373
- 371,195.0
374
- 372,200.0
375
- 373,200.0
376
- 374,190.0
377
- 375,200.0
378
- 376,200.0
379
- 377,200.0
380
- 378,200.0
381
- 379,200.0
382
- 380,200.0
383
- 381,200.0
384
- 382,170.0
385
- 383,173.0
386
- 384,162.0
387
- 385,162.0
388
- 386,149.0
389
- 387,173.0
390
- 388,200.0
391
- 389,200.0
392
- 390,200.0
393
- 391,156.0
394
- 392,157.0
395
- 393,169.0
396
- 394,182.0
397
- 395,154.0
398
- 396,200.0
399
- 397,200.0
400
- 398,200.0
401
- 399,200.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CartPole-v1/{Train_CartPole-v1_DuelingDQN_ray_20230407-153236 → Train_ray_CartPole-v1_DuelingDQN_20230517-224129}/config.yaml RENAMED
@@ -1,47 +1,44 @@
1
  general_cfg:
2
  algo_name: DuelingDQN
 
3
  device: cpu
4
- env_name: CartPole-v1
5
- eval_eps: 10
6
- eval_per_episode: 5
7
  load_checkpoint: false
8
- load_path: Train_CartPole-v1_DQN_20221026-054757
9
- max_steps: 200
 
 
10
  mode: train
 
11
  mp_backend: ray
12
  n_workers: 2
13
- new_step_api: true
14
- render: false
15
- render_mode: human
16
- save_fig: true
17
  seed: 1
18
- show_fig: false
19
- test_eps: 10
20
- train_eps: 400
21
- wrapper: null
22
  algo_cfg:
23
  batch_size: 64
24
  buffer_size: 100000
 
 
25
  epsilon_decay: 500
26
  epsilon_end: 0.01
27
  epsilon_start: 0.95
28
- gamma: 0.99
29
- hidden_dim: 256
30
  lr: 0.0001
31
  target_update: 4
32
  value_layers:
33
  - activation: relu
34
  layer_dim:
35
- - n_states
36
  - 256
37
  layer_type: linear
38
  - activation: relu
39
  layer_dim:
40
  - 256
41
- - 256
42
- layer_type: linear
43
- - activation: none
44
- layer_dim:
45
- - 256
46
- - n_actions
47
  layer_type: linear
 
 
 
 
 
 
 
 
1
  general_cfg:
2
  algo_name: DuelingDQN
3
+ collect_traj: false
4
  device: cpu
5
+ env_name: gym
 
 
6
  load_checkpoint: false
7
+ load_model_step: best
8
+ load_path: Train_single_CartPole-v1_DQN_20230515-211721
9
+ max_episode: 100
10
+ max_step: 200
11
  mode: train
12
+ model_save_fre: 500
13
  mp_backend: ray
14
  n_workers: 2
15
+ online_eval: true
16
+ online_eval_episode: 10
 
 
17
  seed: 1
 
 
 
 
18
  algo_cfg:
19
  batch_size: 64
20
  buffer_size: 100000
21
+ buffer_type: REPLAY_QUE
22
+ dueling: true
23
  epsilon_decay: 500
24
  epsilon_end: 0.01
25
  epsilon_start: 0.95
26
+ gamma: 0.95
 
27
  lr: 0.0001
28
  target_update: 4
29
  value_layers:
30
  - activation: relu
31
  layer_dim:
 
32
  - 256
33
  layer_type: linear
34
  - activation: relu
35
  layer_dim:
36
  - 256
 
 
 
 
 
 
37
  layer_type: linear
38
+ env_cfg:
39
+ id: CartPole-v1
40
+ ignore_params:
41
+ - wrapper
42
+ - ignore_params
43
+ render_mode: null
44
+ wrapper: null
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - General Configs:
2
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
3
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
4
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - env_name gym <class 'str'>
5
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - algo_name DuelingDQN <class 'str'>
6
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - mode train <class 'str'>
7
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - device cpu <class 'str'>
8
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - seed 1 <class 'int'>
9
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - max_episode 100 <class 'int'>
10
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - max_step 200 <class 'int'>
11
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
12
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - mp_backend ray <class 'str'>
13
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - n_workers 2 <class 'int'>
14
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
15
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
16
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
17
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
18
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
19
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - load_model_step best <class 'str'>
20
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
21
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - Algo Configs:
22
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
23
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
24
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - dueling 1 <class 'bool'>
25
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
26
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
27
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
28
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
29
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
30
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
31
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - batch_size 64 <class 'int'>
32
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - target_update 4 <class 'int'>
33
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
34
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
35
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
36
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - Env Configs:
37
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
38
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
39
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
40
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - render_mode None <class 'str'>
41
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - wrapper None <class 'str'>
42
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
43
+ 2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
44
+ 2023-05-17 22:41:35 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
45
+ 2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps
46
+ 2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 0 with reward 23.0 in 23 steps
47
+ 2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 2 with reward 10.0 in 10 steps
48
+ 2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 3 with reward 9.0 in 9 steps
49
+ 2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 2 with reward 29.0 in 29 steps
50
+ 2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 4 with reward 11.0 in 11 steps
51
+ 2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 6 with reward 15.0 in 15 steps
52
+ 2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 18.0 in 18 steps
53
+ 2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 7 with reward 9.0 in 9 steps
54
+ 2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps
55
+ 2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 10 with reward 13.0 in 13 steps
56
+ 2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 9 with reward 25.0 in 25 steps
57
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 12.0 in 12 steps
58
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 13 with reward 10.0 in 10 steps
59
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 33.0 in 33 steps
60
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 14 with reward 9.0 in 9 steps
61
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 15 with reward 10.0 in 10 steps
62
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 16 with reward 13.0 in 13 steps
63
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 17 with reward 16.0 in 16 steps
64
+ 2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 18 with reward 9.0 in 9 steps
65
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 20 with reward 11.0 in 11 steps
66
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 19 with reward 16.0 in 16 steps
67
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 18.0 in 18 steps
68
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 22 with reward 18.0 in 18 steps
69
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 23 with reward 11.0 in 11 steps
70
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 24 with reward 9.0 in 9 steps
71
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 26 with reward 9.0 in 9 steps
72
+ 2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps
73
+ 2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 11.0 in 11 steps
74
+ 2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 27 with reward 12.0 in 12 steps
75
+ 2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 15.0 in 15 steps
76
+ 2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 30 with reward 19.0 in 19 steps
77
+ 2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 31 with reward 10.0 in 10 steps
78
+ 2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 32 with reward 13.0 in 13 steps
79
+ 2023-05-17 22:41:44 - RayLog - INFO: - update_step: 500, online_eval_reward: 200.000
80
+ 2023-05-17 22:41:44 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
81
+ 2023-05-17 22:41:45 - RayLog - INFO: - Worker 0 finished episode 33 with reward 97.0 in 97 steps
82
+ 2023-05-17 22:41:45 - RayLog - INFO: - Worker 1 finished episode 34 with reward 96.0 in 96 steps
83
+ 2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 36 with reward 24.0 in 24 steps
84
+ 2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 35 with reward 34.0 in 34 steps
85
+ 2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 37 with reward 17.0 in 17 steps
86
+ 2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 38 with reward 23.0 in 23 steps
87
+ 2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 39 with reward 16.0 in 16 steps
88
+ 2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 41 with reward 17.0 in 17 steps
89
+ 2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 40 with reward 24.0 in 24 steps
90
+ 2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 42 with reward 21.0 in 21 steps
91
+ 2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 43 with reward 29.0 in 29 steps
92
+ 2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 44 with reward 22.0 in 22 steps
93
+ 2023-05-17 22:41:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 100.000
94
+ 2023-05-17 22:41:49 - RayLog - INFO: - Worker 0 finished episode 45 with reward 84.0 in 84 steps
95
+ 2023-05-17 22:41:49 - RayLog - INFO: - Worker 1 finished episode 46 with reward 75.0 in 75 steps
96
+ 2023-05-17 22:41:50 - RayLog - INFO: - Worker 1 finished episode 48 with reward 52.0 in 52 steps
97
+ 2023-05-17 22:41:50 - RayLog - INFO: - Worker 0 finished episode 47 with reward 66.0 in 66 steps
98
+ 2023-05-17 22:41:51 - RayLog - INFO: - Worker 1 finished episode 49 with reward 63.0 in 63 steps
99
+ 2023-05-17 22:41:52 - RayLog - INFO: - Worker 0 finished episode 50 with reward 94.0 in 94 steps
100
+ 2023-05-17 22:41:53 - RayLog - INFO: - Worker 1 finished episode 51 with reward 75.0 in 75 steps
101
+ 2023-05-17 22:41:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 120.000
102
+ 2023-05-17 22:41:54 - RayLog - INFO: - Worker 0 finished episode 52 with reward 102.0 in 102 steps
103
+ 2023-05-17 22:41:55 - RayLog - INFO: - Worker 1 finished episode 53 with reward 93.0 in 93 steps
104
+ 2023-05-17 22:41:57 - RayLog - INFO: - Worker 1 finished episode 55 with reward 126.0 in 126 steps
105
+ 2023-05-17 22:41:58 - RayLog - INFO: - Worker 0 finished episode 54 with reward 200.0 in 200 steps
106
+ 2023-05-17 22:41:59 - RayLog - INFO: - update_step: 2000, online_eval_reward: 200.000
107
+ 2023-05-17 22:42:01 - RayLog - INFO: - Worker 1 finished episode 56 with reward 200.0 in 200 steps
108
+ 2023-05-17 22:42:02 - RayLog - INFO: - Worker 0 finished episode 57 with reward 200.0 in 200 steps
109
+ 2023-05-17 22:42:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 167.000
110
+ 2023-05-17 22:42:05 - RayLog - INFO: - Worker 1 finished episode 58 with reward 200.0 in 200 steps
111
+ 2023-05-17 22:42:06 - RayLog - INFO: - Worker 0 finished episode 59 with reward 168.0 in 168 steps
112
+ 2023-05-17 22:42:09 - RayLog - INFO: - Worker 0 finished episode 61 with reward 164.0 in 164 steps
113
+ 2023-05-17 22:42:09 - RayLog - INFO: - update_step: 3000, online_eval_reward: 145.000
114
+ 2023-05-17 22:42:09 - RayLog - INFO: - Worker 1 finished episode 60 with reward 189.0 in 189 steps
115
+ 2023-05-17 22:42:12 - RayLog - INFO: - Worker 0 finished episode 62 with reward 152.0 in 152 steps
116
+ 2023-05-17 22:42:12 - RayLog - INFO: - Worker 1 finished episode 63 with reward 162.0 in 162 steps
117
+ 2023-05-17 22:42:14 - RayLog - INFO: - update_step: 3500, online_eval_reward: 151.000
118
+ 2023-05-17 22:42:15 - RayLog - INFO: - Worker 0 finished episode 64 with reward 143.0 in 143 steps
119
+ 2023-05-17 22:42:16 - RayLog - INFO: - Worker 1 finished episode 65 with reward 163.0 in 163 steps
120
+ 2023-05-17 22:42:19 - RayLog - INFO: - Worker 0 finished episode 66 with reward 187.0 in 187 steps
121
+ 2023-05-17 22:42:19 - RayLog - INFO: - update_step: 4000, online_eval_reward: 189.000
122
+ 2023-05-17 22:42:20 - RayLog - INFO: - Worker 1 finished episode 67 with reward 200.0 in 200 steps
123
+ 2023-05-17 22:42:22 - RayLog - INFO: - Worker 0 finished episode 68 with reward 173.0 in 173 steps
124
+ 2023-05-17 22:42:23 - RayLog - INFO: - Worker 1 finished episode 69 with reward 170.0 in 170 steps
125
+ 2023-05-17 22:42:24 - RayLog - INFO: - update_step: 4500, online_eval_reward: 178.000
126
+ 2023-05-17 22:42:26 - RayLog - INFO: - Worker 0 finished episode 70 with reward 200.0 in 200 steps
127
+ 2023-05-17 22:42:27 - RayLog - INFO: - Worker 1 finished episode 71 with reward 200.0 in 200 steps
128
+ 2023-05-17 22:42:30 - RayLog - INFO: - update_step: 5000, online_eval_reward: 197.000
129
+ 2023-05-17 22:42:30 - RayLog - INFO: - Worker 0 finished episode 72 with reward 200.0 in 200 steps
130
+ 2023-05-17 22:42:31 - RayLog - INFO: - Worker 1 finished episode 73 with reward 200.0 in 200 steps
131
+ 2023-05-17 22:42:35 - RayLog - INFO: - Worker 0 finished episode 74 with reward 197.0 in 197 steps
132
+ 2023-05-17 22:42:35 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000
133
+ 2023-05-17 22:42:36 - RayLog - INFO: - Worker 1 finished episode 75 with reward 200.0 in 200 steps
134
+ 2023-05-17 22:42:39 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps
135
+ 2023-05-17 22:42:40 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps
136
+ 2023-05-17 22:42:40 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000
137
+ 2023-05-17 22:42:43 - RayLog - INFO: - Worker 0 finished episode 78 with reward 200.0 in 200 steps
138
+ 2023-05-17 22:42:44 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps
139
+ 2023-05-17 22:42:45 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000
140
+ 2023-05-17 22:42:47 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps
141
+ 2023-05-17 22:42:48 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps
142
+ 2023-05-17 22:42:51 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000
143
+ 2023-05-17 22:42:52 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps
144
+ 2023-05-17 22:42:53 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps
145
+ 2023-05-17 22:42:56 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps
146
+ 2023-05-17 22:42:56 - RayLog - INFO: - update_step: 7500, online_eval_reward: 200.000
147
+ 2023-05-17 22:42:57 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps
148
+ 2023-05-17 22:43:00 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps
149
+ 2023-05-17 22:43:01 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps
150
+ 2023-05-17 22:43:02 - RayLog - INFO: - update_step: 8000, online_eval_reward: 200.000
151
+ 2023-05-17 22:43:05 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps
152
+ 2023-05-17 22:43:06 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps
153
+ 2023-05-17 22:43:07 - RayLog - INFO: - update_step: 8500, online_eval_reward: 200.000
154
+ 2023-05-17 22:43:09 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps
155
+ 2023-05-17 22:43:10 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps
156
+ 2023-05-17 22:43:12 - RayLog - INFO: - update_step: 9000, online_eval_reward: 200.000
157
+ 2023-05-17 22:43:13 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps
158
+ 2023-05-17 22:43:14 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps
159
+ 2023-05-17 22:43:18 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps
160
+ 2023-05-17 22:43:18 - RayLog - INFO: - update_step: 9500, online_eval_reward: 200.000
161
+ 2023-05-17 22:43:19 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps
162
+ 2023-05-17 22:43:22 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps
163
+ 2023-05-17 22:43:23 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps
164
+ 2023-05-17 22:43:23 - RayLog - INFO: - update_step: 10000, online_eval_reward: 200.000
165
+ 2023-05-17 22:43:26 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps
166
+ 2023-05-17 22:43:27 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps
167
+ 2023-05-17 22:43:29 - RayLog - INFO: - update_step: 10500, online_eval_reward: 200.000
168
+ 2023-05-17 22:43:30 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps
169
+ 2023-05-17 22:43:32 - SimpleLog - INFO: - Finish training! total time consumed: 122.69s
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 ADDED
Binary file (548 kB). View file
 
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best ADDED
Binary file (548 kB). View file
 
CartPole-v1/{Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87aab291b33d6423c3c54eced436183398700a290427c1913be9d65f5503b5ae
3
- size 537607
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c9b4a566642bacd5610c3e7b42d10f1feb9704e2a4cb2c004a7d85f75a0aba9
3
+ size 40
CartPole-v1/{Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03f1262598e3d636dd22e3b2fc0dfe52bf7a55348d54f51f02a8410682ec5a18
3
- size 537607
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9867609ac2d205c8c66fe7bc380a67b26f152a046fb5e97d523f5b2bf1c147fd
3
+ size 10028