diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml b/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml deleted file mode 100644 index e6762719835c73d20bafa5a2e0c20cbffcbeb24f..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml +++ /dev/null @@ -1,40 +0,0 @@ -general_cfg: - algo_name: DoubleDQN - device: cuda - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_DoubleDQN_20221122-125516 - max_steps: 200 - mode: test - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 100 -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt b/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt deleted file mode 100644 index a40125e02a3b2b63bec1969abf59e09d8177382c..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt +++ /dev/null @@ -1,14 +0,0 @@ -2022-11-22 12:56:12 - r - INFO: - n_states: 4, n_actions: 2 -2022-11-22 12:56:14 - r - INFO: - Start testing! -2022-11-22 12:56:14 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda -2022-11-22 12:56:14 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 5/10, Reward: 138.000, Step: 138 -2022-11-22 12:56:15 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2022-11-22 12:56:15 - r - INFO: - Finish testing! diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth b/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth deleted file mode 100644 index 27cfa80035673442926e5e8ae4a75214ae1e6aad..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:92c80e643e52ee0e109e55ba083247021287455374ec28f27c4f2705e51fee23 -size 272471 diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png b/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png deleted file mode 100644 index e23e1854c6c6d9577ebf4b54c724d0238d167a37..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv b/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv deleted file mode 100644 index e167003140d6dce2789243c707e2d930452261d8..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,200.0,200 -2,200.0,200 -3,200.0,200 -4,138.0,138 -5,200.0,200 -6,200.0,200 -7,200.0,200 -8,200.0,200 -9,200.0,200 diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/config.yaml b/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/config.yaml deleted file mode 100644 index 072d7483adcca10b5dd189b74cf30ae7cc5aa7a2..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/config.yaml +++ /dev/null @@ -1,46 +0,0 @@ -general_cfg: - algo_name: DoubleDQN - device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_DoubleDQN_mp_20230406-160028 - max_steps: 200 - mode: test - mp_backend: mp - n_workers: 1 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 400 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.95 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt b/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt deleted file mode 100644 index d7d28ca8cfd26732536dd28d1f4e413731193035..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt +++ /dev/null @@ -1,52 +0,0 @@ -2023-04-06 16:04:10 - r - INFO: - Hyperparameters: -2023-04-06 16:04:10 - r - INFO: - ================================================================================ -2023-04-06 16:04:10 - r - INFO: - Name Value Type -2023-04-06 16:04:10 - r - INFO: - env_name CartPole-v1 -2023-04-06 16:04:10 - r - INFO: - new_step_api 1 -2023-04-06 16:04:10 - r - INFO: - wrapper None -2023-04-06 16:04:10 - r - INFO: - render 0 -2023-04-06 16:04:10 - r - INFO: - render_mode human -2023-04-06 16:04:10 - r - INFO: - algo_name DoubleDQN -2023-04-06 16:04:10 - r - INFO: - mode test -2023-04-06 16:04:10 - r - INFO: - mp_backend mp -2023-04-06 16:04:10 - r - INFO: - seed 1 -2023-04-06 16:04:10 - r - INFO: - device cpu -2023-04-06 16:04:10 - r - INFO: - train_eps 400 -2023-04-06 16:04:10 - r - INFO: - test_eps 10 -2023-04-06 16:04:10 - r - INFO: - eval_eps 10 -2023-04-06 16:04:10 - r - INFO: - eval_per_episode 5 -2023-04-06 16:04:10 - r - INFO: - max_steps 200 -2023-04-06 16:04:10 - r - INFO: - load_checkpoint 1 -2023-04-06 16:04:10 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_mp_20230406-160028 -2023-04-06 16:04:10 - r - INFO: - show_fig 0 -2023-04-06 16:04:10 - r - INFO: - save_fig 1 -2023-04-06 16:04:10 - r - INFO: - n_workers 1 -2023-04-06 16:04:10 - r - INFO: - epsilon_start 0.95 -2023-04-06 16:04:10 - r - INFO: - epsilon_end 0.01 -2023-04-06 16:04:10 - r - INFO: - epsilon_decay 500 -2023-04-06 16:04:10 - r - INFO: - gamma 0.95 -2023-04-06 16:04:10 - r - INFO: - lr 0.0001 -2023-04-06 16:04:10 - r - INFO: - buffer_size 100000 -2023-04-06 16:04:10 - r - INFO: - batch_size 64 -2023-04-06 16:04:10 - r - INFO: - target_update 4 -2023-04-06 16:04:10 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-06 16:04:10 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410 -2023-04-06 16:04:10 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/results -2023-04-06 16:04:10 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/logs -2023-04-06 16:04:10 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/traj -2023-04-06 16:04:10 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/videos -2023-04-06 16:04:10 - r - INFO: - ================================================================================ -2023-04-06 16:04:10 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-06 16:04:10 - r - INFO: - Start testing! -2023-04-06 16:04:10 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu -2023-04-06 16:04:10 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 -2023-04-06 16:04:10 - r - INFO: - Episode: 10/10, Reward: 198.000, Step: 198 -2023-04-06 16:04:10 - r - INFO: - Finish testing! diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth b/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth deleted file mode 100644 index a145e71eceffd117546543c32d5e5214414c6eb2..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cd537f6ab4ac0ff27caa323076685e3a3fff04b064dbdfc509baae76e9a9406 -size 272407 diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png b/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png deleted file mode 100644 index 25a6f1170c6b7006af7cbbeb89719d0473d5d424..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv b/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv deleted file mode 100644 index a5e22806d9977ec8a28d3da237cdd05d757ef4d9..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,200.0,200 -2,200.0,200 -3,200.0,200 -4,200.0,200 -5,200.0,200 -6,200.0,200 -7,200.0,200 -8,200.0,200 -9,198.0,198 diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml b/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml deleted file mode 100644 index b53ab4979916e32cdfbbb7f1af10a280b319963b..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml +++ /dev/null @@ -1,46 +0,0 @@ -general_cfg: - algo_name: DoubleDQN - device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_DoubleDQN_ray_20230406-162938 - max_steps: 200 - mode: test - mp_backend: ray - n_workers: 1 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 400 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.95 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt b/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt deleted file mode 100644 index 2ffc7ffa9344bf08e6ef4582bc56185af96b4586..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt +++ /dev/null @@ -1,52 +0,0 @@ -2023-04-06 17:03:48 - r - INFO: - Hyperparameters: -2023-04-06 17:03:48 - r - INFO: - ================================================================================ -2023-04-06 17:03:48 - r - INFO: - Name Value Type -2023-04-06 17:03:48 - r - INFO: - env_name CartPole-v1 -2023-04-06 17:03:48 - r - INFO: - new_step_api 1 -2023-04-06 17:03:48 - r - INFO: - wrapper None -2023-04-06 17:03:48 - r - INFO: - render 0 -2023-04-06 17:03:48 - r - INFO: - render_mode human -2023-04-06 17:03:48 - r - INFO: - algo_name DoubleDQN -2023-04-06 17:03:48 - r - INFO: - mode test -2023-04-06 17:03:48 - r - INFO: - mp_backend ray -2023-04-06 17:03:48 - r - INFO: - seed 1 -2023-04-06 17:03:48 - r - INFO: - device cpu -2023-04-06 17:03:48 - r - INFO: - train_eps 400 -2023-04-06 17:03:48 - r - INFO: - test_eps 10 -2023-04-06 17:03:48 - r - INFO: - eval_eps 10 -2023-04-06 17:03:48 - r - INFO: - eval_per_episode 5 -2023-04-06 17:03:48 - r - INFO: - max_steps 200 -2023-04-06 17:03:48 - r - INFO: - load_checkpoint 1 -2023-04-06 17:03:48 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_ray_20230406-162938 -2023-04-06 17:03:48 - r - INFO: - show_fig 0 -2023-04-06 17:03:48 - r - INFO: - save_fig 1 -2023-04-06 17:03:48 - r - INFO: - n_workers 1 -2023-04-06 17:03:48 - r - INFO: - epsilon_start 0.95 -2023-04-06 17:03:48 - r - INFO: - epsilon_end 0.01 -2023-04-06 17:03:48 - r - INFO: - epsilon_decay 500 -2023-04-06 17:03:48 - r - INFO: - gamma 0.95 -2023-04-06 17:03:48 - r - INFO: - lr 0.0001 -2023-04-06 17:03:48 - r - INFO: - buffer_size 100000 -2023-04-06 17:03:48 - r - INFO: - batch_size 64 -2023-04-06 17:03:48 - r - INFO: - target_update 4 -2023-04-06 17:03:48 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-06 17:03:48 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348 -2023-04-06 17:03:48 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/results -2023-04-06 17:03:48 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/logs -2023-04-06 17:03:48 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/traj -2023-04-06 17:03:48 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/videos -2023-04-06 17:03:48 - r - INFO: - ================================================================================ -2023-04-06 17:03:48 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-06 17:03:48 - r - INFO: - Start testing! -2023-04-06 17:03:48 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu -2023-04-06 17:03:48 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:48 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:49 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2023-04-06 17:03:49 - r - INFO: - Finish testing! diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt b/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt deleted file mode 100644 index 2cfaf6fe119daae13b1e48521ac5f0a6dfd5731c..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7d3a724152cea263dec5b58d80bee101405e7b3268a34c265d414ebbd771c5ac -size 272407 diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png b/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png deleted file mode 100644 index 3b7b4f7de0438536dbc520bdf9b56573679c088e..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv b/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv deleted file mode 100644 index cbbcf2eb2cccfce2f3060e96b3484890fe578ac1..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,200.0,200 -2,200.0,200 -3,200.0,200 -4,200.0,200 -5,200.0,200 -6,200.0,200 -7,200.0,200 -8,200.0,200 -9,200.0,200 diff --git a/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/config.yaml b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..255734e31d4b8bc9b03222b9244586f7b4c5bac7 --- /dev/null +++ b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/config.yaml @@ -0,0 +1,43 @@ +general_cfg: + algo_name: DoubleDQN + collect_traj: false + device: cpu + env_name: gym + load_checkpoint: true + load_model_step: best + load_path: Train_single_CartPole-v1_DoubleDQN_20230516-114540 + max_episode: 10 + max_step: 200 + mode: test + model_save_fre: 500 + mp_backend: single + n_workers: 2 + online_eval: true + online_eval_episode: 10 + seed: 1 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + buffer_type: REPLAY_QUE + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.99 + lr: 0.0001 + target_update: 4 + value_layers: + - activation: relu + layer_dim: + - 256 + layer_type: linear + - activation: relu + layer_dim: + - 256 + layer_type: linear +env_cfg: + id: CartPole-v1 + ignore_params: + - wrapper + - ignore_params + render_mode: null + wrapper: null diff --git a/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..762e9c5f539a2c60f2067f4b702586d9c4aa580b --- /dev/null +++ b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt @@ -0,0 +1,55 @@ +2023-05-16 11:53:05 - SimpleLog - INFO: - General Configs: +2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:53:05 - SimpleLog - INFO: - env_name gym +2023-05-16 11:53:05 - SimpleLog - INFO: - algo_name DoubleDQN +2023-05-16 11:53:05 - SimpleLog - INFO: - mode test +2023-05-16 11:53:05 - SimpleLog - INFO: - device cpu +2023-05-16 11:53:05 - SimpleLog - INFO: - seed 1 +2023-05-16 11:53:05 - SimpleLog - INFO: - max_episode 10 +2023-05-16 11:53:05 - SimpleLog - INFO: - max_step 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - collect_traj 0 +2023-05-16 11:53:05 - SimpleLog - INFO: - mp_backend single +2023-05-16 11:53:05 - SimpleLog - INFO: - n_workers 2 +2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval 1 +2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval_episode 10 +2023-05-16 11:53:05 - SimpleLog - INFO: - model_save_fre 500 +2023-05-16 11:53:05 - SimpleLog - INFO: - load_checkpoint 1 +2023-05-16 11:53:05 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DoubleDQN_20230516-114540 +2023-05-16 11:53:05 - SimpleLog - INFO: - load_model_step best +2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:53:05 - SimpleLog - INFO: - Algo Configs: +2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_start 0.95 +2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_end 0.01 +2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_decay 500 +2023-05-16 11:53:05 - SimpleLog - INFO: - gamma 0.99 +2023-05-16 11:53:05 - SimpleLog - INFO: - lr 0.0001 +2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_size 100000 +2023-05-16 11:53:05 - SimpleLog - INFO: - batch_size 64 +2023-05-16 11:53:05 - SimpleLog - INFO: - target_update 4 +2023-05-16 11:53:05 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] +2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_type REPLAY_QUE +2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:53:05 - SimpleLog - INFO: - Env Configs: +2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:53:05 - SimpleLog - INFO: - id CartPole-v1 +2023-05-16 11:53:05 - SimpleLog - INFO: - render_mode None +2023-05-16 11:53:05 - SimpleLog - INFO: - wrapper None +2023-05-16 11:53:05 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] +2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:53:05 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) +2023-05-16 11:53:05 - SimpleLog - INFO: - Start testing! +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:53:05 - SimpleLog - INFO: - Finish testing! total time consumed: 0.24s diff --git a/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0 b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0 new file mode 100644 index 0000000000000000000000000000000000000000..703cee19dd3705b22181035ae8f3f529172ad4a7 --- /dev/null +++ b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350321a3436f2c600f7c9a0f8ba02ba28a6ad9c6e949481d6926ca5daf32d79e +size 1056 diff --git a/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1 b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1 new file mode 100644 index 0000000000000000000000000000000000000000..f0f1ae52b024f18cfba77c479bd307fcea78c962 --- /dev/null +++ b/CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25447bc5c9337e3d33f456f66eb8230e83dcc359ad3630edde9c63f21baefd4d +size 40 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml b/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml deleted file mode 100644 index 3847978e720c68cf0fafd5dc6a10e2a39a2bbe79..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml +++ /dev/null @@ -1,40 +0,0 @@ -general_cfg: - algo_name: DoubleDQN - device: cuda - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: Train_CartPole-v1_DQN_20221026-054757 - max_steps: 200 - mode: train - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 100 -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt b/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt deleted file mode 100644 index 209a7ba57be2eca83009483d690e916c7257e755..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt +++ /dev/null @@ -1,116 +0,0 @@ -2022-11-22 12:55:16 - r - INFO: - n_states: 4, n_actions: 2 -2022-11-22 12:55:19 - r - INFO: - Start training! -2022-11-22 12:55:19 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda -2022-11-22 12:55:19 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18 -2022-11-22 12:55:19 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35 -2022-11-22 12:55:19 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:19 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32 -2022-11-22 12:55:19 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16 -2022-11-22 12:55:19 - r - INFO: - Current episode 5 has the best eval reward: 9.100 -2022-11-22 12:55:19 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9 -2022-11-22 12:55:19 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12 -2022-11-22 12:55:19 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16 -2022-11-22 12:55:19 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14 -2022-11-22 12:55:19 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12 -2022-11-22 12:55:19 - r - INFO: - Current episode 10 has the best eval reward: 9.200 -2022-11-22 12:55:19 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:19 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14 -2022-11-22 12:55:19 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19 -2022-11-22 12:55:19 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9 -2022-11-22 12:55:19 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15 -2022-11-22 12:55:19 - r - INFO: - Current episode 15 has the best eval reward: 9.300 -2022-11-22 12:55:19 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12 -2022-11-22 12:55:19 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11 -2022-11-22 12:55:19 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9 -2022-11-22 12:55:19 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:19 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17 -2022-11-22 12:55:19 - r - INFO: - Episode: 21/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:19 - r - INFO: - Episode: 22/100, Reward: 15.000, Step: 15 -2022-11-22 12:55:19 - r - INFO: - Episode: 23/100, Reward: 22.000, Step: 22 -2022-11-22 12:55:20 - r - INFO: - Episode: 24/100, Reward: 26.000, Step: 26 -2022-11-22 12:55:20 - r - INFO: - Episode: 25/100, Reward: 19.000, Step: 19 -2022-11-22 12:55:20 - r - INFO: - Current episode 25 has the best eval reward: 9.800 -2022-11-22 12:55:20 - r - INFO: - Episode: 26/100, Reward: 10.000, Step: 10 -2022-11-22 12:55:20 - r - INFO: - Episode: 27/100, Reward: 10.000, Step: 10 -2022-11-22 12:55:20 - r - INFO: - Episode: 28/100, Reward: 11.000, Step: 11 -2022-11-22 12:55:20 - r - INFO: - Episode: 29/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:20 - r - INFO: - Episode: 30/100, Reward: 16.000, Step: 16 -2022-11-22 12:55:20 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:20 - r - INFO: - Episode: 32/100, Reward: 15.000, Step: 15 -2022-11-22 12:55:20 - r - INFO: - Episode: 33/100, Reward: 12.000, Step: 12 -2022-11-22 12:55:20 - r - INFO: - Episode: 34/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:20 - r - INFO: - Episode: 35/100, Reward: 13.000, Step: 13 -2022-11-22 12:55:20 - r - INFO: - Episode: 36/100, Reward: 11.000, Step: 11 -2022-11-22 12:55:20 - r - INFO: - Episode: 37/100, Reward: 9.000, Step: 9 -2022-11-22 12:55:20 - r - INFO: - Episode: 38/100, Reward: 9.000, Step: 9 -2022-11-22 12:55:20 - r - INFO: - Episode: 39/100, Reward: 10.000, Step: 10 -2022-11-22 12:55:20 - r - INFO: - Episode: 40/100, Reward: 14.000, Step: 14 -2022-11-22 12:55:20 - r - INFO: - Episode: 41/100, Reward: 9.000, Step: 9 -2022-11-22 12:55:20 - r - INFO: - Episode: 42/100, Reward: 10.000, Step: 10 -2022-11-22 12:55:20 - r - INFO: - Episode: 43/100, Reward: 9.000, Step: 9 -2022-11-22 12:55:20 - r - INFO: - Episode: 44/100, Reward: 14.000, Step: 14 -2022-11-22 12:55:20 - r - INFO: - Episode: 45/100, Reward: 10.000, Step: 10 -2022-11-22 12:55:20 - r - INFO: - Episode: 46/100, Reward: 19.000, Step: 19 -2022-11-22 12:55:20 - r - INFO: - Episode: 47/100, Reward: 10.000, Step: 10 -2022-11-22 12:55:20 - r - INFO: - Episode: 48/100, Reward: 14.000, Step: 14 -2022-11-22 12:55:20 - r - INFO: - Episode: 49/100, Reward: 18.000, Step: 18 -2022-11-22 12:55:20 - r - INFO: - Episode: 50/100, Reward: 32.000, Step: 32 -2022-11-22 12:55:20 - r - INFO: - Current episode 50 has the best eval reward: 24.300 -2022-11-22 12:55:21 - r - INFO: - Episode: 51/100, Reward: 17.000, Step: 17 -2022-11-22 12:55:21 - r - INFO: - Episode: 52/100, Reward: 15.000, Step: 15 -2022-11-22 12:55:21 - r - INFO: - Episode: 53/100, Reward: 18.000, Step: 18 -2022-11-22 12:55:21 - r - INFO: - Episode: 54/100, Reward: 14.000, Step: 14 -2022-11-22 12:55:21 - r - INFO: - Episode: 55/100, Reward: 22.000, Step: 22 -2022-11-22 12:55:21 - r - INFO: - Episode: 56/100, Reward: 14.000, Step: 14 -2022-11-22 12:55:21 - r - INFO: - Episode: 57/100, Reward: 21.000, Step: 21 -2022-11-22 12:55:21 - r - INFO: - Episode: 58/100, Reward: 21.000, Step: 21 -2022-11-22 12:55:21 - r - INFO: - Episode: 59/100, Reward: 23.000, Step: 23 -2022-11-22 12:55:21 - r - INFO: - Episode: 60/100, Reward: 21.000, Step: 21 -2022-11-22 12:55:21 - r - INFO: - Episode: 61/100, Reward: 21.000, Step: 21 -2022-11-22 12:55:21 - r - INFO: - Episode: 62/100, Reward: 35.000, Step: 35 -2022-11-22 12:55:21 - r - INFO: - Episode: 63/100, Reward: 23.000, Step: 23 -2022-11-22 12:55:21 - r - INFO: - Episode: 64/100, Reward: 27.000, Step: 27 -2022-11-22 12:55:21 - r - INFO: - Episode: 65/100, Reward: 24.000, Step: 24 -2022-11-22 12:55:21 - r - INFO: - Current episode 65 has the best eval reward: 29.700 -2022-11-22 12:55:21 - r - INFO: - Episode: 66/100, Reward: 28.000, Step: 28 -2022-11-22 12:55:21 - r - INFO: - Episode: 67/100, Reward: 30.000, Step: 30 -2022-11-22 12:55:22 - r - INFO: - Episode: 68/100, Reward: 33.000, Step: 33 -2022-11-22 12:55:22 - r - INFO: - Episode: 69/100, Reward: 33.000, Step: 33 -2022-11-22 12:55:22 - r - INFO: - Episode: 70/100, Reward: 26.000, Step: 26 -2022-11-22 12:55:22 - r - INFO: - Current episode 70 has the best eval reward: 34.400 -2022-11-22 12:55:22 - r - INFO: - Episode: 71/100, Reward: 37.000, Step: 37 -2022-11-22 12:55:22 - r - INFO: - Episode: 72/100, Reward: 28.000, Step: 28 -2022-11-22 12:55:22 - r - INFO: - Episode: 73/100, Reward: 30.000, Step: 30 -2022-11-22 12:55:22 - r - INFO: - Episode: 74/100, Reward: 41.000, Step: 41 -2022-11-22 12:55:22 - r - INFO: - Episode: 75/100, Reward: 45.000, Step: 45 -2022-11-22 12:55:22 - r - INFO: - Current episode 75 has the best eval reward: 35.600 -2022-11-22 12:55:23 - r - INFO: - Episode: 76/100, Reward: 68.000, Step: 68 -2022-11-22 12:55:23 - r - INFO: - Episode: 77/100, Reward: 33.000, Step: 33 -2022-11-22 12:55:23 - r - INFO: - Episode: 78/100, Reward: 46.000, Step: 46 -2022-11-22 12:55:23 - r - INFO: - Episode: 79/100, Reward: 54.000, Step: 54 -2022-11-22 12:55:23 - r - INFO: - Episode: 80/100, Reward: 37.000, Step: 37 -2022-11-22 12:55:23 - r - INFO: - Current episode 80 has the best eval reward: 42.800 -2022-11-22 12:55:23 - r - INFO: - Episode: 81/100, Reward: 43.000, Step: 43 -2022-11-22 12:55:23 - r - INFO: - Episode: 82/100, Reward: 79.000, Step: 79 -2022-11-22 12:55:23 - r - INFO: - Episode: 83/100, Reward: 36.000, Step: 36 -2022-11-22 12:55:24 - r - INFO: - Episode: 84/100, Reward: 58.000, Step: 58 -2022-11-22 12:55:24 - r - INFO: - Episode: 85/100, Reward: 42.000, Step: 42 -2022-11-22 12:55:24 - r - INFO: - Current episode 85 has the best eval reward: 62.100 -2022-11-22 12:55:24 - r - INFO: - Episode: 86/100, Reward: 136.000, Step: 136 -2022-11-22 12:55:24 - r - INFO: - Episode: 87/100, Reward: 57.000, Step: 57 -2022-11-22 12:55:24 - r - INFO: - Episode: 88/100, Reward: 46.000, Step: 46 -2022-11-22 12:55:25 - r - INFO: - Episode: 89/100, Reward: 105.000, Step: 105 -2022-11-22 12:55:25 - r - INFO: - Episode: 90/100, Reward: 63.000, Step: 63 -2022-11-22 12:55:25 - r - INFO: - Current episode 90 has the best eval reward: 76.600 -2022-11-22 12:55:25 - r - INFO: - Episode: 91/100, Reward: 84.000, Step: 84 -2022-11-22 12:55:26 - r - INFO: - Episode: 92/100, Reward: 136.000, Step: 136 -2022-11-22 12:55:26 - r - INFO: - Episode: 93/100, Reward: 121.000, Step: 121 -2022-11-22 12:55:26 - r - INFO: - Episode: 94/100, Reward: 96.000, Step: 96 -2022-11-22 12:55:26 - r - INFO: - Episode: 95/100, Reward: 106.000, Step: 106 -2022-11-22 12:55:27 - r - INFO: - Current episode 95 has the best eval reward: 187.300 -2022-11-22 12:55:27 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200 -2022-11-22 12:55:28 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200 -2022-11-22 12:55:28 - r - INFO: - Episode: 98/100, Reward: 113.000, Step: 113 -2022-11-22 12:55:28 - r - INFO: - Episode: 99/100, Reward: 113.000, Step: 113 -2022-11-22 12:55:29 - r - INFO: - Episode: 100/100, Reward: 132.000, Step: 132 -2022-11-22 12:55:29 - r - INFO: - Finish training! diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth b/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth deleted file mode 100644 index 27cfa80035673442926e5e8ae4a75214ae1e6aad..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:92c80e643e52ee0e109e55ba083247021287455374ec28f27c4f2705e51fee23 -size 272471 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png b/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png deleted file mode 100644 index 960506f50ae56e8eaf3ccf1aaf5eab33a330df7a..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv b/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv deleted file mode 100644 index 63e820e8a20e94235a1ba88cf959752718c0d476..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv +++ /dev/null @@ -1,101 +0,0 @@ -episodes,rewards,steps -0,18.0,18 -1,35.0,35 -2,13.0,13 -3,32.0,32 -4,16.0,16 -5,9.0,9 -6,12.0,12 -7,16.0,16 -8,14.0,14 -9,12.0,12 -10,13.0,13 -11,14.0,14 -12,19.0,19 -13,9.0,9 -14,15.0,15 -15,12.0,12 -16,11.0,11 -17,9.0,9 -18,13.0,13 -19,17.0,17 -20,13.0,13 -21,15.0,15 -22,22.0,22 -23,26.0,26 -24,19.0,19 -25,10.0,10 -26,10.0,10 -27,11.0,11 -28,13.0,13 -29,16.0,16 -30,13.0,13 -31,15.0,15 -32,12.0,12 -33,13.0,13 -34,13.0,13 -35,11.0,11 -36,9.0,9 -37,9.0,9 -38,10.0,10 -39,14.0,14 -40,9.0,9 -41,10.0,10 -42,9.0,9 -43,14.0,14 -44,10.0,10 -45,19.0,19 -46,10.0,10 -47,14.0,14 -48,18.0,18 -49,32.0,32 -50,17.0,17 -51,15.0,15 -52,18.0,18 -53,14.0,14 -54,22.0,22 -55,14.0,14 -56,21.0,21 -57,21.0,21 -58,23.0,23 -59,21.0,21 -60,21.0,21 -61,35.0,35 -62,23.0,23 -63,27.0,27 -64,24.0,24 -65,28.0,28 -66,30.0,30 -67,33.0,33 -68,33.0,33 -69,26.0,26 -70,37.0,37 -71,28.0,28 -72,30.0,30 -73,41.0,41 -74,45.0,45 -75,68.0,68 -76,33.0,33 -77,46.0,46 -78,54.0,54 -79,37.0,37 -80,43.0,43 -81,79.0,79 -82,36.0,36 -83,58.0,58 -84,42.0,42 -85,136.0,136 -86,57.0,57 -87,46.0,46 -88,105.0,105 -89,63.0,63 -90,84.0,84 -91,136.0,136 -92,121.0,121 -93,96.0,96 -94,106.0,106 -95,200.0,200 -96,200.0,200 -97,113.0,113 -98,113.0,113 -99,132.0,132 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt b/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt deleted file mode 100644 index be12ec011e5691053604d327e293f0de3e16e75b..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt +++ /dev/null @@ -1,42 +0,0 @@ -2023-04-06 16:00:28 - r - INFO: - Hyperparameters: -2023-04-06 16:00:28 - r - INFO: - ================================================================================ -2023-04-06 16:00:28 - r - INFO: - Name Value Type -2023-04-06 16:00:28 - r - INFO: - env_name CartPole-v1 -2023-04-06 16:00:28 - r - INFO: - new_step_api 1 -2023-04-06 16:00:28 - r - INFO: - wrapper None -2023-04-06 16:00:28 - r - INFO: - render 0 -2023-04-06 16:00:28 - r - INFO: - render_mode human -2023-04-06 16:00:28 - r - INFO: - algo_name DoubleDQN -2023-04-06 16:00:28 - r - INFO: - mode train -2023-04-06 16:00:28 - r - INFO: - mp_backend mp -2023-04-06 16:00:28 - r - INFO: - seed 1 -2023-04-06 16:00:28 - r - INFO: - device cpu -2023-04-06 16:00:28 - r - INFO: - train_eps 400 -2023-04-06 16:00:28 - r - INFO: - test_eps 10 -2023-04-06 16:00:28 - r - INFO: - eval_eps 10 -2023-04-06 16:00:28 - r - INFO: - eval_per_episode 5 -2023-04-06 16:00:28 - r - INFO: - max_steps 200 -2023-04-06 16:00:28 - r - INFO: - load_checkpoint 0 -2023-04-06 16:00:28 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 -2023-04-06 16:00:28 - r - INFO: - show_fig 0 -2023-04-06 16:00:28 - r - INFO: - save_fig 1 -2023-04-06 16:00:28 - r - INFO: - n_workers 2 -2023-04-06 16:00:28 - r - INFO: - epsilon_start 0.95 -2023-04-06 16:00:28 - r - INFO: - epsilon_end 0.01 -2023-04-06 16:00:28 - r - INFO: - epsilon_decay 500 -2023-04-06 16:00:28 - r - INFO: - gamma 0.95 -2023-04-06 16:00:28 - r - INFO: - lr 0.0001 -2023-04-06 16:00:28 - r - INFO: - buffer_size 100000 -2023-04-06 16:00:28 - r - INFO: - batch_size 64 -2023-04-06 16:00:28 - r - INFO: - target_update 4 -2023-04-06 16:00:28 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-06 16:00:28 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028 -2023-04-06 16:00:28 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/results -2023-04-06 16:00:28 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/logs -2023-04-06 16:00:28 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/traj -2023-04-06 16:00:28 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/videos -2023-04-06 16:00:28 - r - INFO: - ================================================================================ -2023-04-06 16:00:28 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-06 16:00:28 - r - INFO: - Start training! -2023-04-06 16:00:28 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu -2023-04-06 16:01:56 - r - INFO: - Finish training! diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth b/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth deleted file mode 100644 index a145e71eceffd117546543c32d5e5214414c6eb2..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4cd537f6ab4ac0ff27caa323076685e3a3fff04b064dbdfc509baae76e9a9406 -size 272407 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png b/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png deleted file mode 100644 index d11ab17669185bc5bd7055fa42c99635af0d424c..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv b/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv deleted file mode 100644 index d40ff05607ca48ad5e4d119a3bb9f3498c8a5440..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv +++ /dev/null @@ -1,402 +0,0 @@ -episodes,rewards -0,18.0 -1,39.0 -2,18.0 -3,28.0 -4,15.0 -5,12.0 -6,15.0 -7,40.0 -8,31.0 -9,17.0 -10,17.0 -11,31.0 -12,13.0 -13,14.0 -14,15.0 -15,21.0 -16,9.0 -17,10.0 -18,22.0 -19,19.0 -20,11.0 -21,13.0 -22,15.0 -23,20.0 -24,14.0 -25,12.0 -26,11.0 -27,10.0 -28,14.0 -29,12.0 -30,9.0 -31,10.0 -32,16.0 -33,13.0 -34,12.0 -35,15.0 -36,10.0 -37,17.0 -38,10.0 -39,10.0 -40,13.0 -41,12.0 -42,10.0 -43,12.0 -44,12.0 -45,12.0 -46,9.0 -47,10.0 -48,10.0 -49,11.0 -50,9.0 -51,13.0 -52,9.0 -53,10.0 -54,10.0 -55,11.0 -56,10.0 -57,10.0 -58,14.0 -59,9.0 -60,10.0 -61,10.0 -62,9.0 -63,12.0 -64,10.0 -65,13.0 -66,12.0 -67,15.0 -68,10.0 -69,13.0 -70,14.0 -71,10.0 -72,30.0 -73,11.0 -74,9.0 -75,11.0 -76,9.0 -77,11.0 -78,9.0 -79,11.0 -80,16.0 -81,21.0 -82,10.0 -83,9.0 -84,14.0 -85,9.0 -86,13.0 -87,9.0 -88,13.0 -89,17.0 -90,26.0 -91,32.0 -92,14.0 -93,17.0 -94,11.0 -95,34.0 -96,10.0 -97,23.0 -98,14.0 -99,37.0 -100,27.0 -101,34.0 -102,23.0 -103,59.0 -104,9.0 -105,91.0 -106,61.0 -107,47.0 -108,21.0 -109,27.0 -110,22.0 -111,30.0 -112,22.0 -113,29.0 -114,25.0 -115,68.0 -116,102.0 -117,54.0 -118,46.0 -119,34.0 -120,61.0 -121,81.0 -122,55.0 -123,67.0 -124,71.0 -125,46.0 -126,88.0 -127,90.0 -128,68.0 -129,114.0 -130,66.0 -131,102.0 -132,100.0 -133,88.0 -134,80.0 -135,81.0 -136,49.0 -137,123.0 -138,197.0 -139,146.0 -140,93.0 -141,135.0 -142,117.0 -143,104.0 -144,168.0 -145,114.0 -146,82.0 -147,153.0 -148,106.0 -149,140.0 -150,100.0 -151,120.0 -152,117.0 -153,173.0 -154,200.0 -155,142.0 -156,180.0 -157,156.0 -158,149.0 -159,173.0 -160,187.0 -161,200.0 -162,188.0 -163,156.0 -164,170.0 -165,158.0 -166,200.0 -167,152.0 -168,194.0 -169,196.0 -170,189.0 -171,200.0 -172,173.0 -173,200.0 -174,154.0 -175,200.0 -176,200.0 -177,200.0 -178,189.0 -179,194.0 -180,199.0 -181,200.0 -182,200.0 -183,189.0 -184,200.0 -185,200.0 -186,200.0 -187,200.0 -188,200.0 -189,200.0 -190,200.0 -191,200.0 -192,200.0 -193,200.0 -194,200.0 -195,189.0 -196,198.0 -197,195.0 -198,199.0 -199,200.0 -200,200.0 -201,200.0 -202,198.0 -203,196.0 -204,200.0 -205,200.0 -206,200.0 -207,200.0 -208,200.0 -209,200.0 -210,195.0 -211,198.0 -212,200.0 -213,200.0 -214,200.0 -215,200.0 -216,200.0 -217,194.0 -218,200.0 -219,200.0 -220,200.0 -221,200.0 -222,200.0 -223,200.0 -224,197.0 -225,200.0 -226,200.0 -227,200.0 -228,199.0 -229,200.0 -230,200.0 -231,198.0 -232,200.0 -233,200.0 -234,197.0 -235,200.0 -236,200.0 -237,200.0 -238,200.0 -239,196.0 -240,200.0 -241,200.0 -242,195.0 -243,200.0 -244,200.0 -245,200.0 -246,200.0 -247,200.0 -248,200.0 -249,200.0 -250,200.0 -251,200.0 -252,200.0 -253,200.0 -254,200.0 -255,199.0 -256,200.0 -257,200.0 -258,200.0 -259,200.0 -260,200.0 -261,200.0 -262,200.0 -263,200.0 -264,200.0 -265,200.0 -266,200.0 -267,200.0 -268,200.0 -269,200.0 -270,200.0 -271,200.0 -272,200.0 -273,200.0 -274,200.0 -275,200.0 -276,200.0 -277,200.0 -278,200.0 -279,200.0 -280,200.0 -281,200.0 -282,200.0 -283,200.0 -284,200.0 -285,200.0 -286,200.0 -287,200.0 -288,200.0 -289,200.0 -290,200.0 -291,200.0 -292,200.0 -293,200.0 -294,200.0 -295,200.0 -296,200.0 -297,200.0 -298,200.0 -299,200.0 -300,200.0 -301,200.0 -302,200.0 -303,200.0 -304,200.0 -305,200.0 -306,200.0 -307,200.0 -308,200.0 -309,200.0 -310,200.0 -311,200.0 -312,200.0 -313,200.0 -314,200.0 -315,200.0 -316,200.0 -317,200.0 -318,200.0 -319,200.0 -320,200.0 -321,200.0 -322,200.0 -323,200.0 -324,200.0 -325,200.0 -326,200.0 -327,200.0 -328,200.0 -329,200.0 -330,200.0 -331,200.0 -332,200.0 -333,200.0 -334,200.0 -335,200.0 -336,200.0 -337,200.0 -338,200.0 -339,200.0 -340,200.0 -341,200.0 -342,200.0 -343,200.0 -344,200.0 -345,200.0 -346,200.0 -347,200.0 -348,200.0 -349,200.0 -350,200.0 -351,200.0 -352,200.0 -353,200.0 -354,200.0 -355,200.0 -356,200.0 -357,200.0 -358,200.0 -359,200.0 -360,200.0 -361,200.0 -362,200.0 -363,200.0 -364,200.0 -365,200.0 -366,200.0 -367,200.0 -368,200.0 -369,200.0 -370,200.0 -371,200.0 -372,200.0 -373,200.0 -374,200.0 -375,200.0 -376,200.0 -377,200.0 -378,200.0 -379,200.0 -380,200.0 -381,200.0 -382,200.0 -383,200.0 -384,200.0 -385,200.0 -386,200.0 -387,200.0 -388,200.0 -389,200.0 -390,200.0 -391,200.0 -392,200.0 -393,200.0 -394,200.0 -395,200.0 -396,200.0 -397,200.0 -398,200.0 -399,200.0 -400,200.0 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt b/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt deleted file mode 100644 index e526872af487ae5b47029e37d58ad3067e04dccb..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt +++ /dev/null @@ -1,42 +0,0 @@ -2023-04-06 16:29:38 - r - INFO: - Hyperparameters: -2023-04-06 16:29:38 - r - INFO: - ================================================================================ -2023-04-06 16:29:38 - r - INFO: - Name Value Type -2023-04-06 16:29:38 - r - INFO: - env_name CartPole-v1 -2023-04-06 16:29:38 - r - INFO: - new_step_api 1 -2023-04-06 16:29:38 - r - INFO: - wrapper None -2023-04-06 16:29:38 - r - INFO: - render 0 -2023-04-06 16:29:38 - r - INFO: - render_mode human -2023-04-06 16:29:38 - r - INFO: - algo_name DoubleDQN -2023-04-06 16:29:38 - r - INFO: - mode train -2023-04-06 16:29:38 - r - INFO: - mp_backend ray -2023-04-06 16:29:38 - r - INFO: - seed 1 -2023-04-06 16:29:38 - r - INFO: - device cpu -2023-04-06 16:29:38 - r - INFO: - train_eps 400 -2023-04-06 16:29:38 - r - INFO: - test_eps 10 -2023-04-06 16:29:38 - r - INFO: - eval_eps 10 -2023-04-06 16:29:38 - r - INFO: - eval_per_episode 5 -2023-04-06 16:29:38 - r - INFO: - max_steps 200 -2023-04-06 16:29:38 - r - INFO: - load_checkpoint 0 -2023-04-06 16:29:38 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 -2023-04-06 16:29:38 - r - INFO: - show_fig 0 -2023-04-06 16:29:38 - r - INFO: - save_fig 1 -2023-04-06 16:29:38 - r - INFO: - n_workers 2 -2023-04-06 16:29:38 - r - INFO: - epsilon_start 0.95 -2023-04-06 16:29:38 - r - INFO: - epsilon_end 0.01 -2023-04-06 16:29:38 - r - INFO: - epsilon_decay 500 -2023-04-06 16:29:38 - r - INFO: - gamma 0.95 -2023-04-06 16:29:38 - r - INFO: - lr 0.0001 -2023-04-06 16:29:38 - r - INFO: - buffer_size 100000 -2023-04-06 16:29:38 - r - INFO: - batch_size 64 -2023-04-06 16:29:38 - r - INFO: - target_update 4 -2023-04-06 16:29:38 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-06 16:29:38 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938 -2023-04-06 16:29:38 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/results -2023-04-06 16:29:38 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/logs -2023-04-06 16:29:38 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/traj -2023-04-06 16:29:38 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/videos -2023-04-06 16:29:38 - r - INFO: - ================================================================================ -2023-04-06 16:29:40 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-06 16:29:40 - r - INFO: - Start training! -2023-04-06 16:29:40 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu -2023-04-06 16:37:19 - r - INFO: - Finish training! diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt b/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt deleted file mode 100644 index 2cfaf6fe119daae13b1e48521ac5f0a6dfd5731c..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7d3a724152cea263dec5b58d80bee101405e7b3268a34c265d414ebbd771c5ac -size 272407 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png b/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png deleted file mode 100644 index b8c37adadf6279152bb177b581261610b76abe42..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv b/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv deleted file mode 100644 index 041045a06958699a5cb9542d0184a015b947d90b..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv +++ /dev/null @@ -1,401 +0,0 @@ -episodes,rewards -0,16.0 -1,18.0 -2,18.0 -3,28.0 -4,16.0 -5,15.0 -6,14.0 -7,15.0 -8,11.0 -9,49.0 -10,31.0 -11,31.0 -12,11.0 -13,14.0 -14,13.0 -15,21.0 -16,9.0 -17,28.0 -18,10.0 -19,19.0 -20,15.0 -21,12.0 -22,13.0 -23,18.0 -24,15.0 -25,14.0 -26,13.0 -27,11.0 -28,12.0 -29,14.0 -30,12.0 -31,10.0 -32,9.0 -33,13.0 -34,16.0 -35,17.0 -36,15.0 -37,12.0 -38,10.0 -39,18.0 -40,10.0 -41,11.0 -42,21.0 -43,16.0 -44,15.0 -45,19.0 -46,15.0 -47,20.0 -48,15.0 -49,14.0 -50,12.0 -51,10.0 -52,12.0 -53,11.0 -54,12.0 -55,10.0 -56,11.0 -57,9.0 -58,15.0 -59,10.0 -60,15.0 -61,11.0 -62,12.0 -63,15.0 -64,13.0 -65,11.0 -66,12.0 -67,12.0 -68,10.0 -69,11.0 -70,11.0 -71,9.0 -72,9.0 -73,11.0 -74,9.0 -75,10.0 -76,9.0 -77,10.0 -78,9.0 -79,10.0 -80,10.0 -81,9.0 -82,15.0 -83,10.0 -84,10.0 -85,11.0 -86,11.0 -87,13.0 -88,13.0 -89,9.0 -90,16.0 -91,12.0 -92,15.0 -93,9.0 -94,10.0 -95,10.0 -96,11.0 -97,11.0 -98,9.0 -99,12.0 -100,16.0 -101,10.0 -102,15.0 -103,9.0 -104,9.0 -105,10.0 -106,11.0 -107,10.0 -108,13.0 -109,11.0 -110,9.0 -111,14.0 -112,10.0 -113,12.0 -114,10.0 -115,10.0 -116,10.0 -117,13.0 -118,10.0 -119,11.0 -120,9.0 -121,11.0 -122,10.0 -123,10.0 -124,12.0 -125,41.0 -126,9.0 -127,31.0 -128,14.0 -129,14.0 -130,14.0 -131,11.0 -132,14.0 -133,12.0 -134,16.0 -135,11.0 -136,12.0 -137,16.0 -138,12.0 -139,14.0 -140,12.0 -141,18.0 -142,15.0 -143,18.0 -144,14.0 -145,14.0 -146,18.0 -147,17.0 -148,24.0 -149,13.0 -150,18.0 -151,15.0 -152,20.0 -153,17.0 -154,16.0 -155,17.0 -156,14.0 -157,26.0 -158,26.0 -159,42.0 -160,25.0 -161,58.0 -162,48.0 -163,48.0 -164,61.0 -165,115.0 -166,156.0 -167,56.0 -168,61.0 -169,83.0 -170,36.0 -171,47.0 -172,31.0 -173,27.0 -174,50.0 -175,34.0 -176,32.0 -177,49.0 -178,30.0 -179,50.0 -180,34.0 -181,27.0 -182,49.0 -183,35.0 -184,52.0 -185,35.0 -186,47.0 -187,50.0 -188,35.0 -189,54.0 -190,33.0 -191,50.0 -192,63.0 -193,121.0 -194,86.0 -195,46.0 -196,54.0 -197,42.0 -198,73.0 -199,45.0 -200,48.0 -201,72.0 -202,60.0 -203,96.0 -204,40.0 -205,46.0 -206,65.0 -207,84.0 -208,115.0 -209,78.0 -210,33.0 -211,40.0 -212,32.0 -213,39.0 -214,47.0 -215,37.0 -216,53.0 -217,37.0 -218,56.0 -219,36.0 -220,101.0 -221,105.0 -222,172.0 -223,116.0 -224,200.0 -225,162.0 -226,200.0 -227,200.0 -228,200.0 -229,200.0 -230,200.0 -231,200.0 -232,200.0 -233,200.0 -234,200.0 -235,200.0 -236,200.0 -237,200.0 -238,200.0 -239,200.0 -240,200.0 -241,200.0 -242,200.0 -243,200.0 -244,200.0 -245,200.0 -246,200.0 -247,200.0 -248,200.0 -249,200.0 -250,200.0 -251,200.0 -252,200.0 -253,200.0 -254,200.0 -255,200.0 -256,200.0 -257,200.0 -258,200.0 -259,200.0 -260,200.0 -261,200.0 -262,200.0 -263,200.0 -264,200.0 -265,200.0 -266,200.0 -267,200.0 -268,200.0 -269,200.0 -270,200.0 -271,200.0 -272,200.0 -273,200.0 -274,200.0 -275,200.0 -276,200.0 -277,200.0 -278,200.0 -279,200.0 -280,200.0 -281,200.0 -282,200.0 -283,200.0 -284,200.0 -285,200.0 -286,200.0 -287,200.0 -288,200.0 -289,199.0 -290,200.0 -291,190.0 -292,179.0 -293,189.0 -294,193.0 -295,200.0 -296,200.0 -297,200.0 -298,195.0 -299,200.0 -300,186.0 -301,175.0 -302,177.0 -303,185.0 -304,167.0 -305,172.0 -306,164.0 -307,146.0 -308,187.0 -309,150.0 -310,146.0 -311,165.0 -312,200.0 -313,200.0 -314,200.0 -315,200.0 -316,200.0 -317,158.0 -318,181.0 -319,174.0 -320,175.0 -321,176.0 -322,170.0 -323,161.0 -324,180.0 -325,200.0 -326,198.0 -327,179.0 -328,192.0 -329,157.0 -330,151.0 -331,198.0 -332,154.0 -333,165.0 -334,200.0 -335,179.0 -336,200.0 -337,191.0 -338,177.0 -339,200.0 -340,171.0 -341,200.0 -342,200.0 -343,200.0 -344,200.0 -345,200.0 -346,200.0 -347,163.0 -348,134.0 -349,200.0 -350,140.0 -351,200.0 -352,200.0 -353,139.0 -354,152.0 -355,136.0 -356,200.0 -357,200.0 -358,173.0 -359,200.0 -360,155.0 -361,134.0 -362,200.0 -363,186.0 -364,142.0 -365,200.0 -366,200.0 -367,128.0 -368,200.0 -369,200.0 -370,200.0 -371,200.0 -372,200.0 -373,200.0 -374,200.0 -375,142.0 -376,162.0 -377,180.0 -378,120.0 -379,190.0 -380,169.0 -381,125.0 -382,189.0 -383,158.0 -384,197.0 -385,200.0 -386,200.0 -387,139.0 -388,158.0 -389,165.0 -390,200.0 -391,200.0 -392,113.0 -393,115.0 -394,117.0 -395,119.0 -396,110.0 -397,119.0 -398,200.0 -399,133.0 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/config.yaml b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/config.yaml similarity index 60% rename from CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/config.yaml rename to CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/config.yaml index 277b45a4dae8725ccfe44c5cddf3dff9276250d7..99fc7538fe2c241c74fd5d8222d57310fbd9a70a 100644 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/config.yaml +++ b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/config.yaml @@ -1,46 +1,43 @@ general_cfg: algo_name: DoubleDQN + collect_traj: false device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 + env_name: gym load_checkpoint: false + load_model_step: best load_path: Train_CartPole-v1_DQN_20221026-054757 - max_steps: 200 + max_episode: 100 + max_step: 200 mode: train + model_save_fre: 500 mp_backend: ray n_workers: 2 - new_step_api: true - render: false - render_mode: human - save_fig: true + online_eval: true + online_eval_episode: 10 seed: 1 - show_fig: false - test_eps: 10 - train_eps: 400 - wrapper: null algo_cfg: batch_size: 64 buffer_size: 100000 + buffer_type: REPLAY_QUE epsilon_decay: 500 epsilon_end: 0.01 epsilon_start: 0.95 - gamma: 0.95 + gamma: 0.99 lr: 0.0001 target_update: 4 value_layers: - activation: relu layer_dim: - - n_states - 256 layer_type: linear - activation: relu layer_dim: - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions layer_type: linear +env_cfg: + id: CartPole-v1 + ignore_params: + - wrapper + - ignore_params + render_mode: null + wrapper: null diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ca00588e8aa08e930e3bf1954c1ab4e5401fb1b --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt @@ -0,0 +1,157 @@ +2023-05-16 11:51:26 - SimpleLog - INFO: - General Configs: +2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:51:26 - SimpleLog - INFO: - env_name gym +2023-05-16 11:51:26 - SimpleLog - INFO: - algo_name DoubleDQN +2023-05-16 11:51:26 - SimpleLog - INFO: - mode train +2023-05-16 11:51:26 - SimpleLog - INFO: - device cpu +2023-05-16 11:51:26 - SimpleLog - INFO: - seed 1 +2023-05-16 11:51:26 - SimpleLog - INFO: - max_episode 100 +2023-05-16 11:51:26 - SimpleLog - INFO: - max_step 200 +2023-05-16 11:51:26 - SimpleLog - INFO: - collect_traj 0 +2023-05-16 11:51:26 - SimpleLog - INFO: - mp_backend ray +2023-05-16 11:51:26 - SimpleLog - INFO: - n_workers 2 +2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval 1 +2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval_episode 10 +2023-05-16 11:51:26 - SimpleLog - INFO: - model_save_fre 500 +2023-05-16 11:51:26 - SimpleLog - INFO: - load_checkpoint 0 +2023-05-16 11:51:26 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 +2023-05-16 11:51:26 - SimpleLog - INFO: - load_model_step best +2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:51:26 - SimpleLog - INFO: - Algo Configs: +2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_start 0.95 +2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_end 0.01 +2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_decay 500 +2023-05-16 11:51:26 - SimpleLog - INFO: - gamma 0.99 +2023-05-16 11:51:26 - SimpleLog - INFO: - lr 0.0001 +2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_size 100000 +2023-05-16 11:51:26 - SimpleLog - INFO: - batch_size 64 +2023-05-16 11:51:26 - SimpleLog - INFO: - target_update 4 +2023-05-16 11:51:26 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] +2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_type REPLAY_QUE +2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:51:26 - SimpleLog - INFO: - Env Configs: +2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:51:26 - SimpleLog - INFO: - id CartPole-v1 +2023-05-16 11:51:26 - SimpleLog - INFO: - render_mode None +2023-05-16 11:51:26 - SimpleLog - INFO: - wrapper None +2023-05-16 11:51:26 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] +2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:51:32 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) +2023-05-16 11:51:37 - RayLog - INFO: - Worker 0 finished episode 0 with reward 16.0 in 16 steps +2023-05-16 11:51:37 - RayLog - INFO: - Worker 1 finished episode 0 with reward 20.0 in 20 steps +2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 2 with reward 11.0 in 11 steps +2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 2 with reward 15.0 in 15 steps +2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 4 with reward 13.0 in 13 steps +2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 3 with reward 22.0 in 22 steps +2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 9.0 in 9 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 6 with reward 14.0 in 14 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 7 with reward 12.0 in 12 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 9 with reward 13.0 in 13 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 10 with reward 14.0 in 14 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 8 with reward 35.0 in 35 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 13.0 in 13 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 15.0 in 15 steps +2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 13 with reward 12.0 in 12 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 14 with reward 10.0 in 10 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 15 with reward 11.0 in 11 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 16 with reward 11.0 in 11 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 18 with reward 12.0 in 12 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 17 with reward 21.0 in 21 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 19 with reward 15.0 in 15 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 20 with reward 18.0 in 18 steps +2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 13.0 in 13 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 22 with reward 10.0 in 10 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 23 with reward 16.0 in 16 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 24 with reward 19.0 in 19 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 27 with reward 10.0 in 10 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 10.0 in 10 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 26 with reward 26.0 in 26 steps +2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 12.0 in 12 steps +2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 30 with reward 15.0 in 15 steps +2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 32 with reward 10.0 in 10 steps +2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 31 with reward 18.0 in 18 steps +2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 33 with reward 13.0 in 13 steps +2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 34 with reward 10.0 in 10 steps +2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 36 with reward 9.0 in 9 steps +2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 35 with reward 12.0 in 12 steps +2023-05-16 11:51:45 - RayLog - INFO: - update_step: 500, online_eval_reward: 9.000 +2023-05-16 11:51:45 - RayLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model! +2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 38 with reward 10.0 in 10 steps +2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 37 with reward 13.0 in 13 steps +2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 40 with reward 9.0 in 9 steps +2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 39 with reward 14.0 in 14 steps +2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 41 with reward 14.0 in 14 steps +2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 42 with reward 20.0 in 20 steps +2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 43 with reward 15.0 in 15 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 44 with reward 19.0 in 19 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 45 with reward 17.0 in 17 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 47 with reward 12.0 in 12 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 46 with reward 15.0 in 15 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 48 with reward 14.0 in 14 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 49 with reward 16.0 in 16 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 50 with reward 9.0 in 9 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 51 with reward 13.0 in 13 steps +2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 52 with reward 10.0 in 10 steps +2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 53 with reward 13.0 in 13 steps +2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 54 with reward 13.0 in 13 steps +2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 56 with reward 14.0 in 14 steps +2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 55 with reward 20.0 in 20 steps +2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 57 with reward 14.0 in 14 steps +2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 58 with reward 16.0 in 16 steps +2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 59 with reward 11.0 in 11 steps +2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 60 with reward 14.0 in 14 steps +2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 61 with reward 9.0 in 9 steps +2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 63 with reward 19.0 in 19 steps +2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 62 with reward 23.0 in 23 steps +2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 64 with reward 18.0 in 18 steps +2023-05-16 11:51:49 - RayLog - INFO: - Worker 1 finished episode 65 with reward 26.0 in 26 steps +2023-05-16 11:51:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 69.000 +2023-05-16 11:51:49 - RayLog - INFO: - current update step obtain a better online_eval_reward: 69.000, save the best model! +2023-05-16 11:51:50 - RayLog - INFO: - Worker 1 finished episode 67 with reward 93.0 in 93 steps +2023-05-16 11:51:51 - RayLog - INFO: - Worker 0 finished episode 66 with reward 127.0 in 127 steps +2023-05-16 11:51:51 - RayLog - INFO: - Worker 1 finished episode 68 with reward 40.0 in 40 steps +2023-05-16 11:51:52 - RayLog - INFO: - Worker 0 finished episode 69 with reward 54.0 in 54 steps +2023-05-16 11:51:52 - RayLog - INFO: - Worker 1 finished episode 70 with reward 48.0 in 48 steps +2023-05-16 11:51:53 - RayLog - INFO: - Worker 0 finished episode 71 with reward 62.0 in 62 steps +2023-05-16 11:51:53 - RayLog - INFO: - Worker 1 finished episode 72 with reward 60.0 in 60 steps +2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 74 with reward 35.0 in 35 steps +2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 73 with reward 47.0 in 47 steps +2023-05-16 11:51:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 63.000 +2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 75 with reward 38.0 in 38 steps +2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 76 with reward 46.0 in 46 steps +2023-05-16 11:51:55 - RayLog - INFO: - Worker 1 finished episode 77 with reward 40.0 in 40 steps +2023-05-16 11:51:55 - RayLog - INFO: - Worker 0 finished episode 78 with reward 57.0 in 57 steps +2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 79 with reward 38.0 in 38 steps +2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 81 with reward 33.0 in 33 steps +2023-05-16 11:51:56 - RayLog - INFO: - Worker 0 finished episode 80 with reward 51.0 in 51 steps +2023-05-16 11:51:57 - RayLog - INFO: - Worker 1 finished episode 82 with reward 44.0 in 44 steps +2023-05-16 11:51:58 - RayLog - INFO: - Worker 0 finished episode 83 with reward 70.0 in 70 steps +2023-05-16 11:51:58 - RayLog - INFO: - Worker 1 finished episode 84 with reward 55.0 in 55 steps +2023-05-16 11:51:58 - RayLog - INFO: - update_step: 2000, online_eval_reward: 82.000 +2023-05-16 11:51:58 - RayLog - INFO: - current update step obtain a better online_eval_reward: 82.000, save the best model! +2023-05-16 11:51:59 - RayLog - INFO: - Worker 0 finished episode 85 with reward 66.0 in 66 steps +2023-05-16 11:51:59 - RayLog - INFO: - Worker 1 finished episode 86 with reward 56.0 in 56 steps +2023-05-16 11:52:00 - RayLog - INFO: - Worker 1 finished episode 88 with reward 45.0 in 45 steps +2023-05-16 11:52:00 - RayLog - INFO: - Worker 0 finished episode 87 with reward 68.0 in 68 steps +2023-05-16 11:52:01 - RayLog - INFO: - Worker 1 finished episode 89 with reward 50.0 in 50 steps +2023-05-16 11:52:02 - RayLog - INFO: - Worker 0 finished episode 90 with reward 79.0 in 79 steps +2023-05-16 11:52:02 - RayLog - INFO: - Worker 1 finished episode 91 with reward 57.0 in 57 steps +2023-05-16 11:52:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 77.000 +2023-05-16 11:52:04 - RayLog - INFO: - Worker 1 finished episode 93 with reward 66.0 in 66 steps +2023-05-16 11:52:04 - RayLog - INFO: - Worker 0 finished episode 92 with reward 84.0 in 84 steps +2023-05-16 11:52:05 - RayLog - INFO: - Worker 1 finished episode 94 with reward 56.0 in 56 steps +2023-05-16 11:52:07 - RayLog - INFO: - Worker 0 finished episode 95 with reward 134.0 in 134 steps +2023-05-16 11:52:08 - RayLog - INFO: - Worker 1 finished episode 96 with reward 115.0 in 115 steps +2023-05-16 11:52:10 - RayLog - INFO: - update_step: 3000, online_eval_reward: 200.000 +2023-05-16 11:52:10 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-16 11:52:12 - RayLog - INFO: - Worker 0 finished episode 97 with reward 200.0 in 200 steps +2023-05-16 11:52:13 - RayLog - INFO: - Worker 1 finished episode 98 with reward 200.0 in 200 steps +2023-05-16 11:52:15 - RayLog - INFO: - update_step: 3500, online_eval_reward: 200.000 +2023-05-16 11:52:16 - RayLog - INFO: - Worker 0 finished episode 99 with reward 200.0 in 200 steps +2023-05-16 11:52:17 - RayLog - INFO: - Worker 1 finished episode 100 with reward 200.0 in 200 steps +2023-05-16 11:52:19 - SimpleLog - INFO: - Finish training! total time consumed: 53.70s diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000 new file mode 100644 index 0000000000000000000000000000000000000000..d49d88ae4d331590fdbe3c21974fb24bf2cec08a Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500 new file mode 100644 index 0000000000000000000000000000000000000000..4c92a8780571c813b97bafe04f4d0535aa13e4d6 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000 new file mode 100644 index 0000000000000000000000000000000000000000..63837b8e5e818b3784c8516027c26d7ada378291 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500 new file mode 100644 index 0000000000000000000000000000000000000000..383360f27840ec420fff60c1ec8edc7b95c84ee9 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000 new file mode 100644 index 0000000000000000000000000000000000000000..63bd22592cbdc053466578464d3157acc95093a3 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500 new file mode 100644 index 0000000000000000000000000000000000000000..7a2638174ae33fd243e31bf237a1fb547db256ed Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500 new file mode 100644 index 0000000000000000000000000000000000000000..bb53c201e020ea92abc3d88ce97833ac56c68126 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best new file mode 100644 index 0000000000000000000000000000000000000000..63bd22592cbdc053466578464d3157acc95093a3 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0 new file mode 100644 index 0000000000000000000000000000000000000000..2131c7778eaeb069712a64836dbf8a0ee9d1c9b7 --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be6be5f2b8ae4530630b850e07d2bbd6010678cb75c3d3050606cdfa0e1f6acd +size 40 diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0 new file mode 100644 index 0000000000000000000000000000000000000000..664caca63294e36a2afce2b800e70c42397a6aa0 --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa028324617b734607430bc18aa93daae8536fcfea762ed7cdd92c65a472dd0 +size 10028 diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1 new file mode 100644 index 0000000000000000000000000000000000000000..6b96310beaabfcd803a0aa203f5a1fb464684768 --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37344be608143375d6347aff7b1395cef1e5b52479a11b5faf17e1f631046d1d +size 40 diff --git a/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1 b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1 new file mode 100644 index 0000000000000000000000000000000000000000..0fe573ed3bbada497d8e24a4bbb1767110fd2a4f --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c34b8e5d11e95894f9f70682c415e3475f765542af5aa31b961c6d0d8e11923 +size 177587 diff --git a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/config.yaml b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/config.yaml similarity index 58% rename from CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/config.yaml rename to CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/config.yaml index faa408b92309eec46d4404df33100e4a39696f7b..d04c425b9f652de9e273cbf2cca7c72a15eedd88 100644 --- a/CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/config.yaml +++ b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/config.yaml @@ -1,46 +1,43 @@ general_cfg: algo_name: DoubleDQN + collect_traj: false device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 + env_name: gym load_checkpoint: false + load_model_step: best load_path: Train_CartPole-v1_DQN_20221026-054757 - max_steps: 200 + max_episode: 100 + max_step: 200 mode: train - mp_backend: mp + model_save_fre: 500 + mp_backend: single n_workers: 2 - new_step_api: true - render: false - render_mode: human - save_fig: true + online_eval: true + online_eval_episode: 10 seed: 1 - show_fig: false - test_eps: 10 - train_eps: 400 - wrapper: null algo_cfg: batch_size: 64 buffer_size: 100000 + buffer_type: REPLAY_QUE epsilon_decay: 500 epsilon_end: 0.01 epsilon_start: 0.95 - gamma: 0.95 + gamma: 0.99 lr: 0.0001 target_update: 4 value_layers: - activation: relu layer_dim: - - n_states - 256 layer_type: linear - activation: relu layer_dim: - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions layer_type: linear +env_cfg: + id: CartPole-v1 + ignore_params: + - wrapper + - ignore_params + render_mode: null + wrapper: null diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..83796be5db8b23235533aeef287ef90bd5680c09 --- /dev/null +++ b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt @@ -0,0 +1,162 @@ +2023-05-16 11:45:40 - SimpleLog - INFO: - General Configs: +2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:45:40 - SimpleLog - INFO: - env_name gym +2023-05-16 11:45:40 - SimpleLog - INFO: - algo_name DoubleDQN +2023-05-16 11:45:40 - SimpleLog - INFO: - mode train +2023-05-16 11:45:40 - SimpleLog - INFO: - device cpu +2023-05-16 11:45:40 - SimpleLog - INFO: - seed 1 +2023-05-16 11:45:40 - SimpleLog - INFO: - max_episode 100 +2023-05-16 11:45:40 - SimpleLog - INFO: - max_step 200 +2023-05-16 11:45:40 - SimpleLog - INFO: - collect_traj 0 +2023-05-16 11:45:40 - SimpleLog - INFO: - mp_backend single +2023-05-16 11:45:40 - SimpleLog - INFO: - n_workers 2 +2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval 1 +2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval_episode 10 +2023-05-16 11:45:40 - SimpleLog - INFO: - model_save_fre 500 +2023-05-16 11:45:40 - SimpleLog - INFO: - load_checkpoint 0 +2023-05-16 11:45:40 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 +2023-05-16 11:45:40 - SimpleLog - INFO: - load_model_step best +2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:45:40 - SimpleLog - INFO: - Algo Configs: +2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_start 0.95 +2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_end 0.01 +2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_decay 500 +2023-05-16 11:45:40 - SimpleLog - INFO: - gamma 0.99 +2023-05-16 11:45:40 - SimpleLog - INFO: - lr 0.0001 +2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_size 100000 +2023-05-16 11:45:40 - SimpleLog - INFO: - batch_size 64 +2023-05-16 11:45:40 - SimpleLog - INFO: - target_update 4 +2023-05-16 11:45:40 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] +2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_type REPLAY_QUE +2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:45:40 - SimpleLog - INFO: - Env Configs: +2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type +2023-05-16 11:45:40 - SimpleLog - INFO: - id CartPole-v1 +2023-05-16 11:45:40 - SimpleLog - INFO: - render_mode None +2023-05-16 11:45:40 - SimpleLog - INFO: - wrapper None +2023-05-16 11:45:40 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] +2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================ +2023-05-16 11:45:40 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) +2023-05-16 11:45:40 - SimpleLog - INFO: - Start training! +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 0, ep_reward: 25.0, ep_step: 25 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 1, ep_reward: 17.0, ep_step: 17 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 2, ep_reward: 19.0, ep_step: 19 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 3, ep_reward: 14.0, ep_step: 14 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 4, ep_reward: 14.0, ep_step: 14 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 5, ep_reward: 21.0, ep_step: 21 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 6, ep_reward: 22.0, ep_step: 22 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 7, ep_reward: 13.0, ep_step: 13 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 8, ep_reward: 27.0, ep_step: 27 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 9, ep_reward: 11.0, ep_step: 11 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 10, ep_reward: 14.0, ep_step: 14 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 11, ep_reward: 24.0, ep_step: 24 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 12, ep_reward: 23.0, ep_step: 23 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 13, ep_reward: 12.0, ep_step: 12 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 14, ep_reward: 12.0, ep_step: 12 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 15, ep_reward: 13.0, ep_step: 13 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 16, ep_reward: 11.0, ep_step: 11 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 17, ep_reward: 15.0, ep_step: 15 +2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 18, ep_reward: 12.0, ep_step: 12 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 19, ep_reward: 27.0, ep_step: 27 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 20, ep_reward: 14.0, ep_step: 14 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 21, ep_reward: 19.0, ep_step: 19 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 22, ep_reward: 10.0, ep_step: 10 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 23, ep_reward: 10.0, ep_step: 10 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 24, ep_reward: 15.0, ep_step: 15 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 25, ep_reward: 15.0, ep_step: 15 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 26, ep_reward: 14.0, ep_step: 14 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 27, ep_reward: 11.0, ep_step: 11 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 28, ep_reward: 10.0, ep_step: 10 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 29, ep_reward: 12.0, ep_step: 12 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 30, ep_reward: 10.0, ep_step: 10 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 31, ep_reward: 9.0, ep_step: 9 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 32, ep_reward: 11.0, ep_step: 11 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 33, ep_reward: 9.0, ep_step: 9 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 34, ep_reward: 13.0, ep_step: 13 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 35, ep_reward: 10.0, ep_step: 10 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 36, ep_reward: 9.0, ep_step: 9 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 37, ep_reward: 10.0, ep_step: 10 +2023-05-16 11:45:41 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 9.000 +2023-05-16 11:45:41 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model! +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 38, ep_reward: 14.0, ep_step: 14 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 39, ep_reward: 11.0, ep_step: 11 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 40, ep_reward: 9.0, ep_step: 9 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 41, ep_reward: 9.0, ep_step: 9 +2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 42, ep_reward: 9.0, ep_step: 9 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 43, ep_reward: 11.0, ep_step: 11 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 44, ep_reward: 21.0, ep_step: 21 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 45, ep_reward: 13.0, ep_step: 13 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 46, ep_reward: 12.0, ep_step: 12 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 47, ep_reward: 30.0, ep_step: 30 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 48, ep_reward: 20.0, ep_step: 20 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 49, ep_reward: 28.0, ep_step: 28 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 50, ep_reward: 22.0, ep_step: 22 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 51, ep_reward: 20.0, ep_step: 20 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 52, ep_reward: 26.0, ep_step: 26 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 53, ep_reward: 24.0, ep_step: 24 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 54, ep_reward: 30.0, ep_step: 30 +2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 55, ep_reward: 26.0, ep_step: 26 +2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 56, ep_reward: 41.0, ep_step: 41 +2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 57, ep_reward: 58.0, ep_step: 58 +2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 58, ep_reward: 59.0, ep_step: 59 +2023-05-16 11:45:43 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 63.000 +2023-05-16 11:45:43 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 63.000, save the best model! +2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 59, ep_reward: 58.0, ep_step: 58 +2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 60, ep_reward: 47.0, ep_step: 47 +2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 61, ep_reward: 84.0, ep_step: 84 +2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 62, ep_reward: 44.0, ep_step: 44 +2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 63, ep_reward: 59.0, ep_step: 59 +2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 64, ep_reward: 39.0, ep_step: 39 +2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 65, ep_reward: 53.0, ep_step: 53 +2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 66, ep_reward: 70.0, ep_step: 70 +2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 67, ep_reward: 58.0, ep_step: 58 +2023-05-16 11:45:45 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 65.000 +2023-05-16 11:45:45 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 65.000, save the best model! +2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 68, ep_reward: 101.0, ep_step: 101 +2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 69, ep_reward: 52.0, ep_step: 52 +2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 70, ep_reward: 58.0, ep_step: 58 +2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 71, ep_reward: 61.0, ep_step: 61 +2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 72, ep_reward: 91.0, ep_step: 91 +2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 73, ep_reward: 54.0, ep_step: 54 +2023-05-16 11:45:46 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 65.000 +2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 74, ep_reward: 98.0, ep_step: 98 +2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 75, ep_reward: 67.0, ep_step: 67 +2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 76, ep_reward: 70.0, ep_step: 70 +2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 77, ep_reward: 74.0, ep_step: 74 +2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 78, ep_reward: 72.0, ep_step: 72 +2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 79, ep_reward: 81.0, ep_step: 81 +2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 80, ep_reward: 82.0, ep_step: 82 +2023-05-16 11:45:48 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 94.000 +2023-05-16 11:45:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 94.000, save the best model! +2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 81, ep_reward: 97.0, ep_step: 97 +2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 82, ep_reward: 89.0, ep_step: 89 +2023-05-16 11:45:49 - SimpleLog - INFO: - episode: 83, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 84, ep_reward: 142.0, ep_step: 142 +2023-05-16 11:45:50 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 153.000 +2023-05-16 11:45:50 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 153.000, save the best model! +2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 85, ep_reward: 114.0, ep_step: 114 +2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 86, ep_reward: 162.0, ep_step: 162 +2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 87, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:51 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 200.000 +2023-05-16 11:45:51 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-16 11:45:52 - SimpleLog - INFO: - episode: 88, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:53 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 200.000 +2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 90, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:54 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:55 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 200.000 +2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:56 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:57 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 200.000 +2023-05-16 11:45:57 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:45:58 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 200.000 +2023-05-16 11:45:59 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:46:00 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200 +2023-05-16 11:46:00 - SimpleLog - INFO: - Finish training! total time consumed: 20.03s diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000 new file mode 100644 index 0000000000000000000000000000000000000000..2c0f693341e7f1765a32a2c9e0a4fc2ba4bce063 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500 new file mode 100644 index 0000000000000000000000000000000000000000..52ca3c7e027c4f8e267dd07223001714447cda9d Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000 new file mode 100644 index 0000000000000000000000000000000000000000..0711475446e7fd3f1427b0ca13f2c6c2da0e6730 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500 new file mode 100644 index 0000000000000000000000000000000000000000..4b866dd62b1ed2064cf5159663feabfd7b4ba6db Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000 new file mode 100644 index 0000000000000000000000000000000000000000..547bc3b88a10df24eaf4669b81afc589a01705af Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500 new file mode 100644 index 0000000000000000000000000000000000000000..cc0c5cddda2ad5a4aa464adb984a4c0b53626ace Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000 new file mode 100644 index 0000000000000000000000000000000000000000..5d8a120bc65d688431124489b3daea7924708e80 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4500 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4500 new file mode 100644 index 0000000000000000000000000000000000000000..8c91ab8980f18678bef22cd28c3cb7f4ec47d46b Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/500 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/500 new file mode 100644 index 0000000000000000000000000000000000000000..47d188cfffa939cbefc6018ec88e67447556e9e2 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/5000 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/5000 new file mode 100644 index 0000000000000000000000000000000000000000..bcd92f6fe94bde7429c6f58b14698fedd83543f8 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/5000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/5500 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/5500 new file mode 100644 index 0000000000000000000000000000000000000000..1003cf45f042e85c49aa90be3afa26917de5854e Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/5500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/best b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/best new file mode 100644 index 0000000000000000000000000000000000000000..cc0c5cddda2ad5a4aa464adb984a4c0b53626ace Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/best differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/tb_logs/interact/events.out.tfevents.1684208740.JMac.local.51823.0 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/tb_logs/interact/events.out.tfevents.1684208740.JMac.local.51823.0 new file mode 100644 index 0000000000000000000000000000000000000000..5dee29ded8431bd2181ed22554f8efff64681070 --- /dev/null +++ b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/tb_logs/interact/events.out.tfevents.1684208740.JMac.local.51823.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f7e483f09561c4736d71ed6c19268e5e5ed200081a72fdb53081997b87da0d +size 10436 diff --git a/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/tb_logs/model/events.out.tfevents.1684208740.JMac.local.51823.1 b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/tb_logs/model/events.out.tfevents.1684208740.JMac.local.51823.1 new file mode 100644 index 0000000000000000000000000000000000000000..fa1d9d28f9a14e3bcff4da80ad17186969c1bd9c --- /dev/null +++ b/CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/tb_logs/model/events.out.tfevents.1684208740.JMac.local.51823.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2a23b249131f6170e5c0b3dff3b94a461c20bfd6ac77953f757388dbaa3d7b +size 287249