diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml b/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml deleted file mode 100644 index 02c74e40f0a75da35a2e7df104d875ce793d3ec2..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml +++ /dev/null @@ -1,41 +0,0 @@ -general_cfg: - algo_name: DuelingDQN - device: cuda - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_DuelingDQN_20221122-125403 - max_steps: 200 - mode: test - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 100 -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt b/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt deleted file mode 100644 index b75371ca5c4aafaa594dc83020cbf19def1cdbe0..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt +++ /dev/null @@ -1,14 +0,0 @@ -2022-11-22 12:54:55 - r - INFO: - n_states: 4, n_actions: 2 -2022-11-22 12:54:58 - r - INFO: - Start testing! -2022-11-22 12:54:58 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda -2022-11-22 12:54:58 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2022-11-22 12:54:58 - r - INFO: - Finish testing! diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/models/checkpoint.pt b/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/models/checkpoint.pt deleted file mode 100644 index 2f8f1bb2765fa7bb055a8236cbc3aa26d8825a98..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39a64f85a8848a1f4c937e359c4370458cf318c2bffbae406915ce7ee449dc50 -size 539719 diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png b/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png deleted file mode 100644 index 5a8dd72d80717b4283b5c360717089e9874f50fc..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv b/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv deleted file mode 100644 index cbbcf2eb2cccfce2f3060e96b3484890fe578ac1..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,200.0,200 -2,200.0,200 -3,200.0,200 -4,200.0,200 -5,200.0,200 -6,200.0,200 -7,200.0,200 -8,200.0,200 -9,200.0,200 diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml b/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml deleted file mode 100644 index 86012d297c175e34fdbca7d861d86c9f7fa97388..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml +++ /dev/null @@ -1,47 +0,0 @@ -general_cfg: - algo_name: DuelingDQN - device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_DuelingDQN_mp_20230407-170853 - max_steps: 200 - mode: test - mp_backend: mp - n_workers: 1 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 200 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt b/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt deleted file mode 100644 index 012765caa73c0ed489bcb948cdce74da3cd45f69..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt +++ /dev/null @@ -1,53 +0,0 @@ -2023-04-07 17:11:20 - r - INFO: - Hyperparameters: -2023-04-07 17:11:20 - r - INFO: - ================================================================================ -2023-04-07 17:11:20 - r - INFO: - Name Value Type -2023-04-07 17:11:20 - r - INFO: - env_name CartPole-v1 -2023-04-07 17:11:20 - r - INFO: - new_step_api 1 -2023-04-07 17:11:20 - r - INFO: - wrapper None -2023-04-07 17:11:20 - r - INFO: - render 0 -2023-04-07 17:11:20 - r - INFO: - render_mode human -2023-04-07 17:11:20 - r - INFO: - algo_name DuelingDQN -2023-04-07 17:11:20 - r - INFO: - mode test -2023-04-07 17:11:20 - r - INFO: - mp_backend mp -2023-04-07 17:11:20 - r - INFO: - seed 1 -2023-04-07 17:11:20 - r - INFO: - device cpu -2023-04-07 17:11:20 - r - INFO: - train_eps 200 -2023-04-07 17:11:20 - r - INFO: - test_eps 10 -2023-04-07 17:11:20 - r - INFO: - eval_eps 10 -2023-04-07 17:11:20 - r - INFO: - eval_per_episode 5 -2023-04-07 17:11:20 - r - INFO: - max_steps 200 -2023-04-07 17:11:20 - r - INFO: - load_checkpoint 1 -2023-04-07 17:11:20 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_mp_20230407-170853 -2023-04-07 17:11:20 - r - INFO: - show_fig 0 -2023-04-07 17:11:20 - r - INFO: - save_fig 1 -2023-04-07 17:11:20 - r - INFO: - n_workers 1 -2023-04-07 17:11:20 - r - INFO: - epsilon_start 0.95 -2023-04-07 17:11:20 - r - INFO: - epsilon_end 0.01 -2023-04-07 17:11:20 - r - INFO: - epsilon_decay 500 -2023-04-07 17:11:20 - r - INFO: - gamma 0.99 -2023-04-07 17:11:20 - r - INFO: - lr 0.0001 -2023-04-07 17:11:20 - r - INFO: - buffer_size 100000 -2023-04-07 17:11:20 - r - INFO: - batch_size 64 -2023-04-07 17:11:20 - r - INFO: - target_update 4 -2023-04-07 17:11:20 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-07 17:11:20 - r - INFO: - hidden_dim 256 -2023-04-07 17:11:20 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120 -2023-04-07 17:11:20 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/results -2023-04-07 17:11:20 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/logs -2023-04-07 17:11:20 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/traj -2023-04-07 17:11:20 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/videos -2023-04-07 17:11:20 - r - INFO: - ================================================================================ -2023-04-07 17:11:20 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-07 17:11:20 - r - INFO: - Start testing! -2023-04-07 17:11:20 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu -2023-04-07 17:11:20 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 -2023-04-07 17:11:21 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 -2023-04-07 17:11:21 - r - INFO: - Episode: 3/10, Reward: 190.000, Step: 190 -2023-04-07 17:11:21 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 -2023-04-07 17:11:21 - r - INFO: - Episode: 5/10, Reward: 187.000, Step: 187 -2023-04-07 17:11:21 - r - INFO: - Episode: 6/10, Reward: 182.000, Step: 182 -2023-04-07 17:11:21 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2023-04-07 17:11:21 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 -2023-04-07 17:11:21 - r - INFO: - Episode: 9/10, Reward: 196.000, Step: 196 -2023-04-07 17:11:21 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2023-04-07 17:11:21 - r - INFO: - Finish testing! diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt b/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt deleted file mode 100644 index 7f8c7f24c0c9fc6d4a576d479507b204876171ea..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87aab291b33d6423c3c54eced436183398700a290427c1913be9d65f5503b5ae -size 537607 diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png b/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png deleted file mode 100644 index 40f1167bff6ebfc47a6bf79a42d8a2e8f8f56014..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv b/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv deleted file mode 100644 index ac13e4c9167d41ac2fc6c3ed3640606a2eaae65b..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,200.0,200 -1,200.0,200 -2,190.0,190 -3,200.0,200 -4,187.0,187 -5,182.0,182 -6,200.0,200 -7,200.0,200 -8,196.0,196 -9,200.0,200 diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml b/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml deleted file mode 100644 index f96c847df597c3bec2d1dfd5303825cd1370157a..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml +++ /dev/null @@ -1,47 +0,0 @@ -general_cfg: - algo_name: DuelingDQN - device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: true - load_path: Train_CartPole-v1_DuelingDQN_20230407-153236 - max_steps: 200 - mode: test - mp_backend: ray - n_workers: 1 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 400 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt b/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt deleted file mode 100644 index d67567b83f689297f270ecc30ca910d4d40eeb46..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt +++ /dev/null @@ -1,53 +0,0 @@ -2023-04-07 16:52:08 - r - INFO: - Hyperparameters: -2023-04-07 16:52:08 - r - INFO: - ================================================================================ -2023-04-07 16:52:08 - r - INFO: - Name Value Type -2023-04-07 16:52:08 - r - INFO: - env_name CartPole-v1 -2023-04-07 16:52:08 - r - INFO: - new_step_api 1 -2023-04-07 16:52:08 - r - INFO: - wrapper None -2023-04-07 16:52:08 - r - INFO: - render 0 -2023-04-07 16:52:08 - r - INFO: - render_mode human -2023-04-07 16:52:08 - r - INFO: - algo_name DuelingDQN -2023-04-07 16:52:08 - r - INFO: - mode test -2023-04-07 16:52:08 - r - INFO: - mp_backend ray -2023-04-07 16:52:08 - r - INFO: - seed 1 -2023-04-07 16:52:08 - r - INFO: - device cpu -2023-04-07 16:52:08 - r - INFO: - train_eps 400 -2023-04-07 16:52:08 - r - INFO: - test_eps 10 -2023-04-07 16:52:08 - r - INFO: - eval_eps 10 -2023-04-07 16:52:08 - r - INFO: - eval_per_episode 5 -2023-04-07 16:52:08 - r - INFO: - max_steps 200 -2023-04-07 16:52:08 - r - INFO: - load_checkpoint 1 -2023-04-07 16:52:08 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_20230407-153236 -2023-04-07 16:52:08 - r - INFO: - show_fig 0 -2023-04-07 16:52:08 - r - INFO: - save_fig 1 -2023-04-07 16:52:08 - r - INFO: - n_workers 1 -2023-04-07 16:52:08 - r - INFO: - epsilon_start 0.95 -2023-04-07 16:52:08 - r - INFO: - epsilon_end 0.01 -2023-04-07 16:52:08 - r - INFO: - epsilon_decay 500 -2023-04-07 16:52:08 - r - INFO: - gamma 0.99 -2023-04-07 16:52:08 - r - INFO: - lr 0.0001 -2023-04-07 16:52:08 - r - INFO: - buffer_size 100000 -2023-04-07 16:52:08 - r - INFO: - batch_size 64 -2023-04-07 16:52:08 - r - INFO: - target_update 4 -2023-04-07 16:52:08 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-07 16:52:08 - r - INFO: - hidden_dim 256 -2023-04-07 16:52:08 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208 -2023-04-07 16:52:08 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/results -2023-04-07 16:52:08 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/logs -2023-04-07 16:52:08 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/traj -2023-04-07 16:52:08 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/videos -2023-04-07 16:52:08 - r - INFO: - ================================================================================ -2023-04-07 16:52:08 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-07 16:52:08 - r - INFO: - Start testing! -2023-04-07 16:52:08 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu -2023-04-07 16:52:08 - r - INFO: - Episode: 1/10, Reward: 171.000, Step: 171 -2023-04-07 16:52:08 - r - INFO: - Episode: 2/10, Reward: 185.000, Step: 185 -2023-04-07 16:52:08 - r - INFO: - Episode: 3/10, Reward: 159.000, Step: 159 -2023-04-07 16:52:08 - r - INFO: - Episode: 4/10, Reward: 155.000, Step: 155 -2023-04-07 16:52:08 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 -2023-04-07 16:52:08 - r - INFO: - Episode: 6/10, Reward: 120.000, Step: 120 -2023-04-07 16:52:08 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 -2023-04-07 16:52:08 - r - INFO: - Episode: 8/10, Reward: 187.000, Step: 187 -2023-04-07 16:52:08 - r - INFO: - Episode: 9/10, Reward: 154.000, Step: 154 -2023-04-07 16:52:08 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 -2023-04-07 16:52:08 - r - INFO: - Finish testing! diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt b/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt deleted file mode 100644 index d64126c7ac0958c7567fbb3cc692d7d11ed40efc..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03f1262598e3d636dd22e3b2fc0dfe52bf7a55348d54f51f02a8410682ec5a18 -size 537607 diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png b/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png deleted file mode 100644 index 9bb4b42a700435f1b8dfd21b5f3e6c03821f5281..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv b/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv deleted file mode 100644 index 4b5323e1519c0179ab0a52f1d89956a4f4de8a96..0000000000000000000000000000000000000000 --- a/CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv +++ /dev/null @@ -1,11 +0,0 @@ -episodes,rewards,steps -0,171.0,171 -1,185.0,185 -2,159.0,159 -3,155.0,155 -4,200.0,200 -5,120.0,120 -6,200.0,200 -7,187.0,187 -8,154.0,154 -9,200.0,200 diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml b/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml deleted file mode 100644 index 9a67fa1f30ac96e885331c757c96522fd98e0a3e..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml +++ /dev/null @@ -1,41 +0,0 @@ -general_cfg: - algo_name: DuelingDQN - device: cuda - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: Train_CartPole-v1_DQN_20221026-054757 - max_steps: 200 - mode: train - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 100 -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt b/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt deleted file mode 100644 index e7f22f9e0180b5a9c7236a7c7a3784c36f7c9cd7..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt +++ /dev/null @@ -1,119 +0,0 @@ -2022-11-22 12:54:03 - r - INFO: - n_states: 4, n_actions: 2 -2022-11-22 12:54:06 - r - INFO: - Start training! -2022-11-22 12:54:06 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda -2022-11-22 12:54:06 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18 -2022-11-22 12:54:06 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35 -2022-11-22 12:54:06 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13 -2022-11-22 12:54:06 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32 -2022-11-22 12:54:06 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16 -2022-11-22 12:54:06 - r - INFO: - Current episode 5 has the best eval reward: 9.100 -2022-11-22 12:54:06 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9 -2022-11-22 12:54:06 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12 -2022-11-22 12:54:06 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16 -2022-11-22 12:54:06 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14 -2022-11-22 12:54:06 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12 -2022-11-22 12:54:06 - r - INFO: - Current episode 10 has the best eval reward: 9.200 -2022-11-22 12:54:06 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13 -2022-11-22 12:54:06 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14 -2022-11-22 12:54:06 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19 -2022-11-22 12:54:06 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9 -2022-11-22 12:54:06 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15 -2022-11-22 12:54:06 - r - INFO: - Current episode 15 has the best eval reward: 9.300 -2022-11-22 12:54:06 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12 -2022-11-22 12:54:06 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11 -2022-11-22 12:54:06 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9 -2022-11-22 12:54:07 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13 -2022-11-22 12:54:07 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17 -2022-11-22 12:54:07 - r - INFO: - Current episode 20 has the best eval reward: 9.900 -2022-11-22 12:54:07 - r - INFO: - Episode: 21/100, Reward: 14.000, Step: 14 -2022-11-22 12:54:07 - r - INFO: - Episode: 22/100, Reward: 20.000, Step: 20 -2022-11-22 12:54:07 - r - INFO: - Episode: 23/100, Reward: 11.000, Step: 11 -2022-11-22 12:54:07 - r - INFO: - Episode: 24/100, Reward: 24.000, Step: 24 -2022-11-22 12:54:07 - r - INFO: - Episode: 25/100, Reward: 11.000, Step: 11 -2022-11-22 12:54:07 - r - INFO: - Episode: 26/100, Reward: 11.000, Step: 11 -2022-11-22 12:54:07 - r - INFO: - Episode: 27/100, Reward: 11.000, Step: 11 -2022-11-22 12:54:07 - r - INFO: - Episode: 28/100, Reward: 13.000, Step: 13 -2022-11-22 12:54:07 - r - INFO: - Episode: 29/100, Reward: 11.000, Step: 11 -2022-11-22 12:54:07 - r - INFO: - Episode: 30/100, Reward: 8.000, Step: 8 -2022-11-22 12:54:07 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13 -2022-11-22 12:54:07 - r - INFO: - Episode: 32/100, Reward: 9.000, Step: 9 -2022-11-22 12:54:07 - r - INFO: - Episode: 33/100, Reward: 34.000, Step: 34 -2022-11-22 12:54:07 - r - INFO: - Episode: 34/100, Reward: 10.000, Step: 10 -2022-11-22 12:54:07 - r - INFO: - Episode: 35/100, Reward: 10.000, Step: 10 -2022-11-22 12:54:07 - r - INFO: - Episode: 36/100, Reward: 10.000, Step: 10 -2022-11-22 12:54:07 - r - INFO: - Episode: 37/100, Reward: 34.000, Step: 34 -2022-11-22 12:54:07 - r - INFO: - Episode: 38/100, Reward: 35.000, Step: 35 -2022-11-22 12:54:07 - r - INFO: - Episode: 39/100, Reward: 32.000, Step: 32 -2022-11-22 12:54:08 - r - INFO: - Episode: 40/100, Reward: 37.000, Step: 37 -2022-11-22 12:54:08 - r - INFO: - Current episode 40 has the best eval reward: 27.500 -2022-11-22 12:54:08 - r - INFO: - Episode: 41/100, Reward: 29.000, Step: 29 -2022-11-22 12:54:08 - r - INFO: - Episode: 42/100, Reward: 52.000, Step: 52 -2022-11-22 12:54:08 - r - INFO: - Episode: 43/100, Reward: 54.000, Step: 54 -2022-11-22 12:54:08 - r - INFO: - Episode: 44/100, Reward: 90.000, Step: 90 -2022-11-22 12:54:08 - r - INFO: - Episode: 45/100, Reward: 91.000, Step: 91 -2022-11-22 12:54:09 - r - INFO: - Current episode 45 has the best eval reward: 87.500 -2022-11-22 12:54:09 - r - INFO: - Episode: 46/100, Reward: 51.000, Step: 51 -2022-11-22 12:54:09 - r - INFO: - Episode: 47/100, Reward: 101.000, Step: 101 -2022-11-22 12:54:09 - r - INFO: - Episode: 48/100, Reward: 67.000, Step: 67 -2022-11-22 12:54:09 - r - INFO: - Episode: 49/100, Reward: 103.000, Step: 103 -2022-11-22 12:54:10 - r - INFO: - Episode: 50/100, Reward: 45.000, Step: 45 -2022-11-22 12:54:10 - r - INFO: - Episode: 51/100, Reward: 137.000, Step: 137 -2022-11-22 12:54:10 - r - INFO: - Episode: 52/100, Reward: 47.000, Step: 47 -2022-11-22 12:54:10 - r - INFO: - Episode: 53/100, Reward: 89.000, Step: 89 -2022-11-22 12:54:11 - r - INFO: - Episode: 54/100, Reward: 95.000, Step: 95 -2022-11-22 12:54:11 - r - INFO: - Episode: 55/100, Reward: 55.000, Step: 55 -2022-11-22 12:54:11 - r - INFO: - Episode: 56/100, Reward: 92.000, Step: 92 -2022-11-22 12:54:12 - r - INFO: - Episode: 57/100, Reward: 155.000, Step: 155 -2022-11-22 12:54:12 - r - INFO: - Episode: 58/100, Reward: 125.000, Step: 125 -2022-11-22 12:54:12 - r - INFO: - Episode: 59/100, Reward: 152.000, Step: 152 -2022-11-22 12:54:13 - r - INFO: - Episode: 60/100, Reward: 199.000, Step: 199 -2022-11-22 12:54:13 - r - INFO: - Current episode 60 has the best eval reward: 179.100 -2022-11-22 12:54:14 - r - INFO: - Episode: 61/100, Reward: 88.000, Step: 88 -2022-11-22 12:54:14 - r - INFO: - Episode: 62/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:14 - r - INFO: - Episode: 63/100, Reward: 176.000, Step: 176 -2022-11-22 12:54:15 - r - INFO: - Episode: 64/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:15 - r - INFO: - Episode: 65/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:16 - r - INFO: - Current episode 65 has the best eval reward: 198.700 -2022-11-22 12:54:16 - r - INFO: - Episode: 66/100, Reward: 193.000, Step: 193 -2022-11-22 12:54:17 - r - INFO: - Episode: 67/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:17 - r - INFO: - Episode: 68/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:18 - r - INFO: - Episode: 69/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:18 - r - INFO: - Episode: 70/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:19 - r - INFO: - Current episode 70 has the best eval reward: 200.000 -2022-11-22 12:54:20 - r - INFO: - Episode: 71/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:20 - r - INFO: - Episode: 72/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:20 - r - INFO: - Episode: 73/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:21 - r - INFO: - Episode: 74/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:21 - r - INFO: - Episode: 75/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:22 - r - INFO: - Current episode 75 has the best eval reward: 200.000 -2022-11-22 12:54:23 - r - INFO: - Episode: 76/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:23 - r - INFO: - Episode: 77/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:24 - r - INFO: - Episode: 78/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:24 - r - INFO: - Episode: 79/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:24 - r - INFO: - Episode: 80/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:25 - r - INFO: - Current episode 80 has the best eval reward: 200.000 -2022-11-22 12:54:26 - r - INFO: - Episode: 81/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:26 - r - INFO: - Episode: 82/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:27 - r - INFO: - Episode: 83/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:27 - r - INFO: - Episode: 84/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:27 - r - INFO: - Episode: 85/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:28 - r - INFO: - Current episode 85 has the best eval reward: 200.000 -2022-11-22 12:54:29 - r - INFO: - Episode: 86/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:29 - r - INFO: - Episode: 87/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:30 - r - INFO: - Episode: 88/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:30 - r - INFO: - Episode: 89/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:30 - r - INFO: - Episode: 90/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:31 - r - INFO: - Current episode 90 has the best eval reward: 200.000 -2022-11-22 12:54:32 - r - INFO: - Episode: 91/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:32 - r - INFO: - Episode: 92/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:33 - r - INFO: - Episode: 93/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:33 - r - INFO: - Episode: 94/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:34 - r - INFO: - Episode: 95/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:34 - r - INFO: - Current episode 95 has the best eval reward: 200.000 -2022-11-22 12:54:35 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:35 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:36 - r - INFO: - Episode: 98/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:36 - r - INFO: - Episode: 99/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:37 - r - INFO: - Episode: 100/100, Reward: 200.000, Step: 200 -2022-11-22 12:54:37 - r - INFO: - Current episode 100 has the best eval reward: 200.000 -2022-11-22 12:54:37 - r - INFO: - Finish training! diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/models/checkpoint.pt b/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/models/checkpoint.pt deleted file mode 100644 index 2f8f1bb2765fa7bb055a8236cbc3aa26d8825a98..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39a64f85a8848a1f4c937e359c4370458cf318c2bffbae406915ce7ee449dc50 -size 539719 diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png b/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png deleted file mode 100644 index 85e24583ce03fbb4f497cfffaf1cb2eb4637736b..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv b/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv deleted file mode 100644 index 0785d1aa07110fa323de7c6ed268527fc42555f3..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv +++ /dev/null @@ -1,101 +0,0 @@ -episodes,rewards,steps -0,18.0,18 -1,35.0,35 -2,13.0,13 -3,32.0,32 -4,16.0,16 -5,9.0,9 -6,12.0,12 -7,16.0,16 -8,14.0,14 -9,12.0,12 -10,13.0,13 -11,14.0,14 -12,19.0,19 -13,9.0,9 -14,15.0,15 -15,12.0,12 -16,11.0,11 -17,9.0,9 -18,13.0,13 -19,17.0,17 -20,14.0,14 -21,20.0,20 -22,11.0,11 -23,24.0,24 -24,11.0,11 -25,11.0,11 -26,11.0,11 -27,13.0,13 -28,11.0,11 -29,8.0,8 -30,13.0,13 -31,9.0,9 -32,34.0,34 -33,10.0,10 -34,10.0,10 -35,10.0,10 -36,34.0,34 -37,35.0,35 -38,32.0,32 -39,37.0,37 -40,29.0,29 -41,52.0,52 -42,54.0,54 -43,90.0,90 -44,91.0,91 -45,51.0,51 -46,101.0,101 -47,67.0,67 -48,103.0,103 -49,45.0,45 -50,137.0,137 -51,47.0,47 -52,89.0,89 -53,95.0,95 -54,55.0,55 -55,92.0,92 -56,155.0,155 -57,125.0,125 -58,152.0,152 -59,199.0,199 -60,88.0,88 -61,200.0,200 -62,176.0,176 -63,200.0,200 -64,200.0,200 -65,193.0,193 -66,200.0,200 -67,200.0,200 -68,200.0,200 -69,200.0,200 -70,200.0,200 -71,200.0,200 -72,200.0,200 -73,200.0,200 -74,200.0,200 -75,200.0,200 -76,200.0,200 -77,200.0,200 -78,200.0,200 -79,200.0,200 -80,200.0,200 -81,200.0,200 -82,200.0,200 -83,200.0,200 -84,200.0,200 -85,200.0,200 -86,200.0,200 -87,200.0,200 -88,200.0,200 -89,200.0,200 -90,200.0,200 -91,200.0,200 -92,200.0,200 -93,200.0,200 -94,200.0,200 -95,200.0,200 -96,200.0,200 -97,200.0,200 -98,200.0,200 -99,200.0,200 diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/config.yaml b/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/config.yaml deleted file mode 100644 index 568a83fd1348cc893db157b052213bb314d1d4a3..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/config.yaml +++ /dev/null @@ -1,47 +0,0 @@ -general_cfg: - algo_name: DuelingDQN - device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 - load_checkpoint: false - load_path: Train_CartPole-v1_DQN_20221026-054757 - max_steps: 200 - mode: train - mp_backend: mp - n_workers: 4 - new_step_api: true - render: false - render_mode: human - save_fig: true - seed: 1 - show_fig: false - test_eps: 10 - train_eps: 200 - wrapper: null -algo_cfg: - batch_size: 64 - buffer_size: 100000 - epsilon_decay: 500 - epsilon_end: 0.01 - epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 - lr: 0.0001 - target_update: 4 - value_layers: - - activation: relu - layer_dim: - - n_states - - 256 - layer_type: linear - - activation: relu - layer_dim: - - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions - layer_type: linear diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt b/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt deleted file mode 100644 index 48370a2194a38c23fce62f232cc066a9e00cb77c..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt +++ /dev/null @@ -1,43 +0,0 @@ -2023-04-07 17:08:53 - r - INFO: - Hyperparameters: -2023-04-07 17:08:53 - r - INFO: - ================================================================================ -2023-04-07 17:08:53 - r - INFO: - Name Value Type -2023-04-07 17:08:53 - r - INFO: - env_name CartPole-v1 -2023-04-07 17:08:53 - r - INFO: - new_step_api 1 -2023-04-07 17:08:53 - r - INFO: - wrapper None -2023-04-07 17:08:53 - r - INFO: - render 0 -2023-04-07 17:08:53 - r - INFO: - render_mode human -2023-04-07 17:08:53 - r - INFO: - algo_name DuelingDQN -2023-04-07 17:08:53 - r - INFO: - mode train -2023-04-07 17:08:53 - r - INFO: - mp_backend mp -2023-04-07 17:08:53 - r - INFO: - seed 1 -2023-04-07 17:08:53 - r - INFO: - device cpu -2023-04-07 17:08:53 - r - INFO: - train_eps 200 -2023-04-07 17:08:53 - r - INFO: - test_eps 10 -2023-04-07 17:08:53 - r - INFO: - eval_eps 10 -2023-04-07 17:08:53 - r - INFO: - eval_per_episode 5 -2023-04-07 17:08:53 - r - INFO: - max_steps 200 -2023-04-07 17:08:53 - r - INFO: - load_checkpoint 0 -2023-04-07 17:08:53 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 -2023-04-07 17:08:53 - r - INFO: - show_fig 0 -2023-04-07 17:08:53 - r - INFO: - save_fig 1 -2023-04-07 17:08:53 - r - INFO: - n_workers 4 -2023-04-07 17:08:53 - r - INFO: - epsilon_start 0.95 -2023-04-07 17:08:53 - r - INFO: - epsilon_end 0.01 -2023-04-07 17:08:53 - r - INFO: - epsilon_decay 500 -2023-04-07 17:08:53 - r - INFO: - gamma 0.99 -2023-04-07 17:08:53 - r - INFO: - lr 0.0001 -2023-04-07 17:08:53 - r - INFO: - buffer_size 100000 -2023-04-07 17:08:53 - r - INFO: - batch_size 64 -2023-04-07 17:08:53 - r - INFO: - target_update 4 -2023-04-07 17:08:53 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-07 17:08:53 - r - INFO: - hidden_dim 256 -2023-04-07 17:08:53 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853 -2023-04-07 17:08:53 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/results -2023-04-07 17:08:53 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/logs -2023-04-07 17:08:53 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/traj -2023-04-07 17:08:53 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/videos -2023-04-07 17:08:53 - r - INFO: - ================================================================================ -2023-04-07 17:08:53 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-07 17:08:53 - r - INFO: - Start training! -2023-04-07 17:08:53 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu -2023-04-07 17:10:11 - r - INFO: - Finish training! diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt b/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt deleted file mode 100644 index 7f8c7f24c0c9fc6d4a576d479507b204876171ea..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87aab291b33d6423c3c54eced436183398700a290427c1913be9d65f5503b5ae -size 537607 diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png b/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png deleted file mode 100644 index 8929d278ea2794d0b5f2a947e6a4611037515262..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv b/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv deleted file mode 100644 index e5886e684e410c0116bce001ca27f99855ce62e7..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv +++ /dev/null @@ -1,202 +0,0 @@ -episodes,rewards -0,17.0 -1,16.0 -2,24.0 -3,38.0 -4,18.0 -5,18.0 -6,19.0 -7,17.0 -8,27.0 -9,16.0 -10,15.0 -11,31.0 -12,33.0 -13,13.0 -14,9.0 -15,34.0 -16,32.0 -17,34.0 -18,12.0 -19,13.0 -20,11.0 -21,9.0 -22,9.0 -23,11.0 -24,38.0 -25,13.0 -26,20.0 -27,10.0 -28,33.0 -29,15.0 -30,32.0 -31,11.0 -32,13.0 -33,12.0 -34,9.0 -35,13.0 -36,14.0 -37,21.0 -38,48.0 -39,19.0 -40,65.0 -41,28.0 -42,15.0 -43,11.0 -44,54.0 -45,26.0 -46,37.0 -47,40.0 -48,54.0 -49,54.0 -50,50.0 -51,84.0 -52,55.0 -53,43.0 -54,45.0 -55,48.0 -56,88.0 -57,41.0 -58,46.0 -59,61.0 -60,32.0 -61,53.0 -62,59.0 -63,49.0 -64,60.0 -65,35.0 -66,82.0 -67,50.0 -68,108.0 -69,121.0 -70,113.0 -71,67.0 -72,87.0 -73,96.0 -74,181.0 -75,62.0 -76,137.0 -77,175.0 -78,123.0 -79,149.0 -80,172.0 -81,200.0 -82,156.0 -83,146.0 -84,200.0 -85,200.0 -86,200.0 -87,128.0 -88,188.0 -89,200.0 -90,200.0 -91,200.0 -92,200.0 -93,200.0 -94,200.0 -95,200.0 -96,200.0 -97,200.0 -98,195.0 -99,200.0 -100,200.0 -101,196.0 -102,200.0 -103,200.0 -104,192.0 -105,200.0 -106,190.0 -107,200.0 -108,200.0 -109,200.0 -110,197.0 -111,200.0 -112,200.0 -113,200.0 -114,200.0 -115,200.0 -116,200.0 -117,200.0 -118,200.0 -119,200.0 -120,200.0 -121,200.0 -122,200.0 -123,200.0 -124,200.0 -125,200.0 -126,200.0 -127,200.0 -128,200.0 -129,200.0 -130,200.0 -131,200.0 -132,200.0 -133,200.0 -134,200.0 -135,200.0 -136,200.0 -137,197.0 -138,200.0 -139,200.0 -140,200.0 -141,200.0 -142,200.0 -143,200.0 -144,21.0 -145,193.0 -146,123.0 -147,194.0 -148,9.0 -149,9.0 -150,48.0 -151,200.0 -152,200.0 -153,200.0 -154,200.0 -155,200.0 -156,200.0 -157,200.0 -158,200.0 -159,200.0 -160,200.0 -161,200.0 -162,200.0 -163,200.0 -164,200.0 -165,200.0 -166,200.0 -167,200.0 -168,200.0 -169,200.0 -170,200.0 -171,200.0 -172,200.0 -173,200.0 -174,200.0 -175,200.0 -176,200.0 -177,200.0 -178,200.0 -179,200.0 -180,200.0 -181,200.0 -182,200.0 -183,200.0 -184,200.0 -185,200.0 -186,200.0 -187,200.0 -188,200.0 -189,200.0 -190,200.0 -191,200.0 -192,200.0 -193,200.0 -194,200.0 -195,200.0 -196,200.0 -197,200.0 -198,200.0 -199,200.0 -200,200.0 diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt b/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt deleted file mode 100644 index 7c6331b3bfb85948bc7c3d56ce3596e8c4158760..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt +++ /dev/null @@ -1,43 +0,0 @@ -2023-04-07 15:32:36 - r - INFO: - Hyperparameters: -2023-04-07 15:32:36 - r - INFO: - ================================================================================ -2023-04-07 15:32:36 - r - INFO: - Name Value Type -2023-04-07 15:32:36 - r - INFO: - env_name CartPole-v1 -2023-04-07 15:32:36 - r - INFO: - new_step_api 1 -2023-04-07 15:32:36 - r - INFO: - wrapper None -2023-04-07 15:32:36 - r - INFO: - render 0 -2023-04-07 15:32:36 - r - INFO: - render_mode human -2023-04-07 15:32:36 - r - INFO: - algo_name DuelingDQN -2023-04-07 15:32:36 - r - INFO: - mode train -2023-04-07 15:32:36 - r - INFO: - mp_backend ray -2023-04-07 15:32:36 - r - INFO: - seed 1 -2023-04-07 15:32:36 - r - INFO: - device cpu -2023-04-07 15:32:36 - r - INFO: - train_eps 400 -2023-04-07 15:32:36 - r - INFO: - test_eps 10 -2023-04-07 15:32:36 - r - INFO: - eval_eps 10 -2023-04-07 15:32:36 - r - INFO: - eval_per_episode 5 -2023-04-07 15:32:36 - r - INFO: - max_steps 200 -2023-04-07 15:32:36 - r - INFO: - load_checkpoint 0 -2023-04-07 15:32:36 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 -2023-04-07 15:32:36 - r - INFO: - show_fig 0 -2023-04-07 15:32:36 - r - INFO: - save_fig 1 -2023-04-07 15:32:36 - r - INFO: - n_workers 2 -2023-04-07 15:32:36 - r - INFO: - epsilon_start 0.95 -2023-04-07 15:32:36 - r - INFO: - epsilon_end 0.01 -2023-04-07 15:32:36 - r - INFO: - epsilon_decay 500 -2023-04-07 15:32:36 - r - INFO: - gamma 0.99 -2023-04-07 15:32:36 - r - INFO: - lr 0.0001 -2023-04-07 15:32:36 - r - INFO: - buffer_size 100000 -2023-04-07 15:32:36 - r - INFO: - batch_size 64 -2023-04-07 15:32:36 - r - INFO: - target_update 4 -2023-04-07 15:32:36 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] -2023-04-07 15:32:36 - r - INFO: - hidden_dim 256 -2023-04-07 15:32:36 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236 -2023-04-07 15:32:36 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/results -2023-04-07 15:32:36 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/logs -2023-04-07 15:32:36 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/traj -2023-04-07 15:32:36 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/videos -2023-04-07 15:32:36 - r - INFO: - ================================================================================ -2023-04-07 15:32:39 - r - INFO: - n_states: 4, n_actions: 2 -2023-04-07 15:32:39 - r - INFO: - Start training! -2023-04-07 15:32:39 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu -2023-04-07 15:40:31 - r - INFO: - Finish training! diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt b/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt deleted file mode 100644 index d64126c7ac0958c7567fbb3cc692d7d11ed40efc..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03f1262598e3d636dd22e3b2fc0dfe52bf7a55348d54f51f02a8410682ec5a18 -size 537607 diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png b/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png deleted file mode 100644 index cfdfafd18636c2948a7b26bfba0f154ba7391cd7..0000000000000000000000000000000000000000 Binary files a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png and /dev/null differ diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv b/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv deleted file mode 100644 index 7526f7040d7073e2ac8d4fb26a106b41cd50d186..0000000000000000000000000000000000000000 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv +++ /dev/null @@ -1,401 +0,0 @@ -episodes,rewards -0,18.0 -1,18.0 -2,19.0 -3,28.0 -4,17.0 -5,15.0 -6,13.0 -7,15.0 -8,38.0 -9,31.0 -10,11.0 -11,31.0 -12,10.0 -13,17.0 -14,14.0 -15,13.0 -16,21.0 -17,15.0 -18,9.0 -19,10.0 -20,22.0 -21,19.0 -22,11.0 -23,13.0 -24,20.0 -25,15.0 -26,14.0 -27,12.0 -28,10.0 -29,11.0 -30,12.0 -31,14.0 -32,9.0 -33,10.0 -34,16.0 -35,13.0 -36,15.0 -37,12.0 -38,14.0 -39,10.0 -40,14.0 -41,10.0 -42,11.0 -43,16.0 -44,16.0 -45,12.0 -46,15.0 -47,19.0 -48,15.0 -49,20.0 -50,15.0 -51,11.0 -52,13.0 -53,12.0 -54,12.0 -55,12.0 -56,12.0 -57,12.0 -58,11.0 -59,10.0 -60,13.0 -61,11.0 -62,12.0 -63,9.0 -64,11.0 -65,11.0 -66,10.0 -67,9.0 -68,11.0 -69,11.0 -70,11.0 -71,12.0 -72,10.0 -73,12.0 -74,9.0 -75,10.0 -76,9.0 -77,10.0 -78,9.0 -79,10.0 -80,11.0 -81,9.0 -82,12.0 -83,11.0 -84,12.0 -85,10.0 -86,9.0 -87,11.0 -88,9.0 -89,9.0 -90,10.0 -91,15.0 -92,11.0 -93,9.0 -94,10.0 -95,16.0 -96,13.0 -97,9.0 -98,10.0 -99,10.0 -100,13.0 -101,11.0 -102,10.0 -103,9.0 -104,13.0 -105,16.0 -106,12.0 -107,9.0 -108,11.0 -109,9.0 -110,13.0 -111,11.0 -112,18.0 -113,13.0 -114,9.0 -115,12.0 -116,10.0 -117,10.0 -118,10.0 -119,13.0 -120,10.0 -121,11.0 -122,10.0 -123,10.0 -124,9.0 -125,10.0 -126,11.0 -127,14.0 -128,12.0 -129,9.0 -130,11.0 -131,14.0 -132,11.0 -133,10.0 -134,13.0 -135,9.0 -136,11.0 -137,11.0 -138,11.0 -139,9.0 -140,10.0 -141,9.0 -142,9.0 -143,12.0 -144,9.0 -145,10.0 -146,9.0 -147,10.0 -148,9.0 -149,10.0 -150,9.0 -151,12.0 -152,9.0 -153,9.0 -154,10.0 -155,9.0 -156,10.0 -157,13.0 -158,14.0 -159,10.0 -160,12.0 -161,11.0 -162,10.0 -163,11.0 -164,11.0 -165,9.0 -166,31.0 -167,39.0 -168,18.0 -169,24.0 -170,18.0 -171,18.0 -172,24.0 -173,16.0 -174,25.0 -175,23.0 -176,26.0 -177,23.0 -178,26.0 -179,21.0 -180,28.0 -181,20.0 -182,22.0 -183,30.0 -184,27.0 -185,34.0 -186,31.0 -187,39.0 -188,29.0 -189,29.0 -190,37.0 -191,27.0 -192,36.0 -193,34.0 -194,46.0 -195,35.0 -196,52.0 -197,32.0 -198,30.0 -199,69.0 -200,38.0 -201,39.0 -202,57.0 -203,38.0 -204,68.0 -205,47.0 -206,45.0 -207,63.0 -208,47.0 -209,86.0 -210,67.0 -211,60.0 -212,48.0 -213,55.0 -214,95.0 -215,58.0 -216,70.0 -217,58.0 -218,42.0 -219,69.0 -220,47.0 -221,109.0 -222,70.0 -223,80.0 -224,77.0 -225,61.0 -226,72.0 -227,55.0 -228,77.0 -229,61.0 -230,79.0 -231,66.0 -232,68.0 -233,99.0 -234,143.0 -235,82.0 -236,85.0 -237,103.0 -238,99.0 -239,93.0 -240,100.0 -241,101.0 -242,151.0 -243,195.0 -244,100.0 -245,99.0 -246,127.0 -247,105.0 -248,127.0 -249,142.0 -250,169.0 -251,108.0 -252,128.0 -253,123.0 -254,134.0 -255,126.0 -256,114.0 -257,200.0 -258,123.0 -259,159.0 -260,125.0 -261,142.0 -262,178.0 -263,96.0 -264,200.0 -265,200.0 -266,113.0 -267,90.0 -268,200.0 -269,122.0 -270,140.0 -271,116.0 -272,128.0 -273,190.0 -274,170.0 -275,96.0 -276,126.0 -277,200.0 -278,88.0 -279,76.0 -280,74.0 -281,84.0 -282,130.0 -283,200.0 -284,86.0 -285,153.0 -286,200.0 -287,59.0 -288,135.0 -289,62.0 -290,200.0 -291,182.0 -292,138.0 -293,200.0 -294,118.0 -295,50.0 -296,74.0 -297,62.0 -298,200.0 -299,124.0 -300,111.0 -301,61.0 -302,132.0 -303,200.0 -304,80.0 -305,60.0 -306,77.0 -307,47.0 -308,80.0 -309,64.0 -310,96.0 -311,200.0 -312,200.0 -313,133.0 -314,200.0 -315,188.0 -316,132.0 -317,150.0 -318,135.0 -319,184.0 -320,138.0 -321,176.0 -322,200.0 -323,161.0 -324,158.0 -325,142.0 -326,133.0 -327,151.0 -328,143.0 -329,160.0 -330,150.0 -331,134.0 -332,147.0 -333,132.0 -334,143.0 -335,137.0 -336,155.0 -337,138.0 -338,138.0 -339,130.0 -340,148.0 -341,146.0 -342,152.0 -343,135.0 -344,175.0 -345,153.0 -346,155.0 -347,131.0 -348,156.0 -349,138.0 -350,151.0 -351,162.0 -352,200.0 -353,175.0 -354,156.0 -355,145.0 -356,168.0 -357,200.0 -358,181.0 -359,145.0 -360,189.0 -361,200.0 -362,144.0 -363,200.0 -364,178.0 -365,200.0 -366,179.0 -367,200.0 -368,177.0 -369,200.0 -370,185.0 -371,195.0 -372,200.0 -373,200.0 -374,190.0 -375,200.0 -376,200.0 -377,200.0 -378,200.0 -379,200.0 -380,200.0 -381,200.0 -382,170.0 -383,173.0 -384,162.0 -385,162.0 -386,149.0 -387,173.0 -388,200.0 -389,200.0 -390,200.0 -391,156.0 -392,157.0 -393,169.0 -394,182.0 -395,154.0 -396,200.0 -397,200.0 -398,200.0 -399,200.0 diff --git a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/config.yaml b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/config.yaml similarity index 53% rename from CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/config.yaml rename to CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/config.yaml index cfb5a718011cda7f7c2d8c08ec0563c432ad955d..9e3e73ae6492da75e8f7939447e8829ae04954ff 100644 --- a/CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/config.yaml +++ b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/config.yaml @@ -1,47 +1,44 @@ general_cfg: algo_name: DuelingDQN + collect_traj: false device: cpu - env_name: CartPole-v1 - eval_eps: 10 - eval_per_episode: 5 + env_name: gym load_checkpoint: false - load_path: Train_CartPole-v1_DQN_20221026-054757 - max_steps: 200 + load_model_step: best + load_path: Train_single_CartPole-v1_DQN_20230515-211721 + max_episode: 100 + max_step: 200 mode: train + model_save_fre: 500 mp_backend: ray n_workers: 2 - new_step_api: true - render: false - render_mode: human - save_fig: true + online_eval: true + online_eval_episode: 10 seed: 1 - show_fig: false - test_eps: 10 - train_eps: 400 - wrapper: null algo_cfg: batch_size: 64 buffer_size: 100000 + buffer_type: REPLAY_QUE + dueling: true epsilon_decay: 500 epsilon_end: 0.01 epsilon_start: 0.95 - gamma: 0.99 - hidden_dim: 256 + gamma: 0.95 lr: 0.0001 target_update: 4 value_layers: - activation: relu layer_dim: - - n_states - 256 layer_type: linear - activation: relu layer_dim: - 256 - - 256 - layer_type: linear - - activation: none - layer_dim: - - 256 - - n_actions layer_type: linear +env_cfg: + id: CartPole-v1 + ignore_params: + - wrapper + - ignore_params + render_mode: null + wrapper: null diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..bcffce3fb5a1368423f613fdad8aeff5cb7734d6 --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt @@ -0,0 +1,169 @@ +2023-05-17 22:41:29 - SimpleLog - INFO: - General Configs: +2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type +2023-05-17 22:41:29 - SimpleLog - INFO: - env_name gym +2023-05-17 22:41:29 - SimpleLog - INFO: - algo_name DuelingDQN +2023-05-17 22:41:29 - SimpleLog - INFO: - mode train +2023-05-17 22:41:29 - SimpleLog - INFO: - device cpu +2023-05-17 22:41:29 - SimpleLog - INFO: - seed 1 +2023-05-17 22:41:29 - SimpleLog - INFO: - max_episode 100 +2023-05-17 22:41:29 - SimpleLog - INFO: - max_step 200 +2023-05-17 22:41:29 - SimpleLog - INFO: - collect_traj 0 +2023-05-17 22:41:29 - SimpleLog - INFO: - mp_backend ray +2023-05-17 22:41:29 - SimpleLog - INFO: - n_workers 2 +2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval 1 +2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval_episode 10 +2023-05-17 22:41:29 - SimpleLog - INFO: - model_save_fre 500 +2023-05-17 22:41:29 - SimpleLog - INFO: - load_checkpoint 0 +2023-05-17 22:41:29 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 +2023-05-17 22:41:29 - SimpleLog - INFO: - load_model_step best +2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:41:29 - SimpleLog - INFO: - Algo Configs: +2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type +2023-05-17 22:41:29 - SimpleLog - INFO: - dueling 1 +2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_start 0.95 +2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_end 0.01 +2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_decay 500 +2023-05-17 22:41:29 - SimpleLog - INFO: - gamma 0.95 +2023-05-17 22:41:29 - SimpleLog - INFO: - lr 0.0001 +2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_size 100000 +2023-05-17 22:41:29 - SimpleLog - INFO: - batch_size 64 +2023-05-17 22:41:29 - SimpleLog - INFO: - target_update 4 +2023-05-17 22:41:29 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] +2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_type REPLAY_QUE +2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:41:29 - SimpleLog - INFO: - Env Configs: +2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type +2023-05-17 22:41:29 - SimpleLog - INFO: - id CartPole-v1 +2023-05-17 22:41:29 - SimpleLog - INFO: - render_mode None +2023-05-17 22:41:29 - SimpleLog - INFO: - wrapper None +2023-05-17 22:41:29 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] +2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:41:35 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) +2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps +2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 0 with reward 23.0 in 23 steps +2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 2 with reward 10.0 in 10 steps +2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 3 with reward 9.0 in 9 steps +2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 2 with reward 29.0 in 29 steps +2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 4 with reward 11.0 in 11 steps +2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 6 with reward 15.0 in 15 steps +2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 18.0 in 18 steps +2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 7 with reward 9.0 in 9 steps +2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps +2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 10 with reward 13.0 in 13 steps +2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 9 with reward 25.0 in 25 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 12.0 in 12 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 13 with reward 10.0 in 10 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 33.0 in 33 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 14 with reward 9.0 in 9 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 15 with reward 10.0 in 10 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 16 with reward 13.0 in 13 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 17 with reward 16.0 in 16 steps +2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 18 with reward 9.0 in 9 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 20 with reward 11.0 in 11 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 19 with reward 16.0 in 16 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 18.0 in 18 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 22 with reward 18.0 in 18 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 23 with reward 11.0 in 11 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 24 with reward 9.0 in 9 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 26 with reward 9.0 in 9 steps +2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps +2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 11.0 in 11 steps +2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 27 with reward 12.0 in 12 steps +2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 15.0 in 15 steps +2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 30 with reward 19.0 in 19 steps +2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 31 with reward 10.0 in 10 steps +2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 32 with reward 13.0 in 13 steps +2023-05-17 22:41:44 - RayLog - INFO: - update_step: 500, online_eval_reward: 200.000 +2023-05-17 22:41:44 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-17 22:41:45 - RayLog - INFO: - Worker 0 finished episode 33 with reward 97.0 in 97 steps +2023-05-17 22:41:45 - RayLog - INFO: - Worker 1 finished episode 34 with reward 96.0 in 96 steps +2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 36 with reward 24.0 in 24 steps +2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 35 with reward 34.0 in 34 steps +2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 37 with reward 17.0 in 17 steps +2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 38 with reward 23.0 in 23 steps +2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 39 with reward 16.0 in 16 steps +2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 41 with reward 17.0 in 17 steps +2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 40 with reward 24.0 in 24 steps +2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 42 with reward 21.0 in 21 steps +2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 43 with reward 29.0 in 29 steps +2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 44 with reward 22.0 in 22 steps +2023-05-17 22:41:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 100.000 +2023-05-17 22:41:49 - RayLog - INFO: - Worker 0 finished episode 45 with reward 84.0 in 84 steps +2023-05-17 22:41:49 - RayLog - INFO: - Worker 1 finished episode 46 with reward 75.0 in 75 steps +2023-05-17 22:41:50 - RayLog - INFO: - Worker 1 finished episode 48 with reward 52.0 in 52 steps +2023-05-17 22:41:50 - RayLog - INFO: - Worker 0 finished episode 47 with reward 66.0 in 66 steps +2023-05-17 22:41:51 - RayLog - INFO: - Worker 1 finished episode 49 with reward 63.0 in 63 steps +2023-05-17 22:41:52 - RayLog - INFO: - Worker 0 finished episode 50 with reward 94.0 in 94 steps +2023-05-17 22:41:53 - RayLog - INFO: - Worker 1 finished episode 51 with reward 75.0 in 75 steps +2023-05-17 22:41:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 120.000 +2023-05-17 22:41:54 - RayLog - INFO: - Worker 0 finished episode 52 with reward 102.0 in 102 steps +2023-05-17 22:41:55 - RayLog - INFO: - Worker 1 finished episode 53 with reward 93.0 in 93 steps +2023-05-17 22:41:57 - RayLog - INFO: - Worker 1 finished episode 55 with reward 126.0 in 126 steps +2023-05-17 22:41:58 - RayLog - INFO: - Worker 0 finished episode 54 with reward 200.0 in 200 steps +2023-05-17 22:41:59 - RayLog - INFO: - update_step: 2000, online_eval_reward: 200.000 +2023-05-17 22:42:01 - RayLog - INFO: - Worker 1 finished episode 56 with reward 200.0 in 200 steps +2023-05-17 22:42:02 - RayLog - INFO: - Worker 0 finished episode 57 with reward 200.0 in 200 steps +2023-05-17 22:42:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 167.000 +2023-05-17 22:42:05 - RayLog - INFO: - Worker 1 finished episode 58 with reward 200.0 in 200 steps +2023-05-17 22:42:06 - RayLog - INFO: - Worker 0 finished episode 59 with reward 168.0 in 168 steps +2023-05-17 22:42:09 - RayLog - INFO: - Worker 0 finished episode 61 with reward 164.0 in 164 steps +2023-05-17 22:42:09 - RayLog - INFO: - update_step: 3000, online_eval_reward: 145.000 +2023-05-17 22:42:09 - RayLog - INFO: - Worker 1 finished episode 60 with reward 189.0 in 189 steps +2023-05-17 22:42:12 - RayLog - INFO: - Worker 0 finished episode 62 with reward 152.0 in 152 steps +2023-05-17 22:42:12 - RayLog - INFO: - Worker 1 finished episode 63 with reward 162.0 in 162 steps +2023-05-17 22:42:14 - RayLog - INFO: - update_step: 3500, online_eval_reward: 151.000 +2023-05-17 22:42:15 - RayLog - INFO: - Worker 0 finished episode 64 with reward 143.0 in 143 steps +2023-05-17 22:42:16 - RayLog - INFO: - Worker 1 finished episode 65 with reward 163.0 in 163 steps +2023-05-17 22:42:19 - RayLog - INFO: - Worker 0 finished episode 66 with reward 187.0 in 187 steps +2023-05-17 22:42:19 - RayLog - INFO: - update_step: 4000, online_eval_reward: 189.000 +2023-05-17 22:42:20 - RayLog - INFO: - Worker 1 finished episode 67 with reward 200.0 in 200 steps +2023-05-17 22:42:22 - RayLog - INFO: - Worker 0 finished episode 68 with reward 173.0 in 173 steps +2023-05-17 22:42:23 - RayLog - INFO: - Worker 1 finished episode 69 with reward 170.0 in 170 steps +2023-05-17 22:42:24 - RayLog - INFO: - update_step: 4500, online_eval_reward: 178.000 +2023-05-17 22:42:26 - RayLog - INFO: - Worker 0 finished episode 70 with reward 200.0 in 200 steps +2023-05-17 22:42:27 - RayLog - INFO: - Worker 1 finished episode 71 with reward 200.0 in 200 steps +2023-05-17 22:42:30 - RayLog - INFO: - update_step: 5000, online_eval_reward: 197.000 +2023-05-17 22:42:30 - RayLog - INFO: - Worker 0 finished episode 72 with reward 200.0 in 200 steps +2023-05-17 22:42:31 - RayLog - INFO: - Worker 1 finished episode 73 with reward 200.0 in 200 steps +2023-05-17 22:42:35 - RayLog - INFO: - Worker 0 finished episode 74 with reward 197.0 in 197 steps +2023-05-17 22:42:35 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000 +2023-05-17 22:42:36 - RayLog - INFO: - Worker 1 finished episode 75 with reward 200.0 in 200 steps +2023-05-17 22:42:39 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps +2023-05-17 22:42:40 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps +2023-05-17 22:42:40 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000 +2023-05-17 22:42:43 - RayLog - INFO: - Worker 0 finished episode 78 with reward 200.0 in 200 steps +2023-05-17 22:42:44 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps +2023-05-17 22:42:45 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000 +2023-05-17 22:42:47 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps +2023-05-17 22:42:48 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps +2023-05-17 22:42:51 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000 +2023-05-17 22:42:52 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps +2023-05-17 22:42:53 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps +2023-05-17 22:42:56 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps +2023-05-17 22:42:56 - RayLog - INFO: - update_step: 7500, online_eval_reward: 200.000 +2023-05-17 22:42:57 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps +2023-05-17 22:43:00 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps +2023-05-17 22:43:01 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps +2023-05-17 22:43:02 - RayLog - INFO: - update_step: 8000, online_eval_reward: 200.000 +2023-05-17 22:43:05 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps +2023-05-17 22:43:06 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps +2023-05-17 22:43:07 - RayLog - INFO: - update_step: 8500, online_eval_reward: 200.000 +2023-05-17 22:43:09 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps +2023-05-17 22:43:10 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps +2023-05-17 22:43:12 - RayLog - INFO: - update_step: 9000, online_eval_reward: 200.000 +2023-05-17 22:43:13 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps +2023-05-17 22:43:14 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps +2023-05-17 22:43:18 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps +2023-05-17 22:43:18 - RayLog - INFO: - update_step: 9500, online_eval_reward: 200.000 +2023-05-17 22:43:19 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps +2023-05-17 22:43:22 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps +2023-05-17 22:43:23 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps +2023-05-17 22:43:23 - RayLog - INFO: - update_step: 10000, online_eval_reward: 200.000 +2023-05-17 22:43:26 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps +2023-05-17 22:43:27 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps +2023-05-17 22:43:29 - RayLog - INFO: - update_step: 10500, online_eval_reward: 200.000 +2023-05-17 22:43:30 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps +2023-05-17 22:43:32 - SimpleLog - INFO: - Finish training! total time consumed: 122.69s diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 new file mode 100644 index 0000000000000000000000000000000000000000..8a1a7fc585401da088a6bdea8d34199f552b87a1 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 new file mode 100644 index 0000000000000000000000000000000000000000..0c970675fae8a72051a34f62d7ea41e45fbcc903 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 new file mode 100644 index 0000000000000000000000000000000000000000..7c553e306350fa2c312319c174433ff548d18bfb Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 new file mode 100644 index 0000000000000000000000000000000000000000..7b9ea91528f7d2f901b891ffe407ce1eecede849 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 new file mode 100644 index 0000000000000000000000000000000000000000..0d5ea43949d6581e17c12d1edcfd4957a49a1d0b Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 new file mode 100644 index 0000000000000000000000000000000000000000..6c78b231f60c254d57f3a736465317bc5fdcf2ec Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 new file mode 100644 index 0000000000000000000000000000000000000000..16ff119e33c1c01249bc7f329ff7ad3517f3bb70 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 new file mode 100644 index 0000000000000000000000000000000000000000..38e1eb98fd7a547b6e2ade9c92ae68b0001120f2 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 new file mode 100644 index 0000000000000000000000000000000000000000..111cc0b9915b87b31a4b9c03d2a1ff0dd6aee9f2 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 new file mode 100644 index 0000000000000000000000000000000000000000..861dfd6f85b9a98638b96711a5e8867af1f1dbc4 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 new file mode 100644 index 0000000000000000000000000000000000000000..70db3c549b999a0e295c54cb1d08c54d477c9d65 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 new file mode 100644 index 0000000000000000000000000000000000000000..2abca6df474c2346e089135b23bb062dfd3d6e6c Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 new file mode 100644 index 0000000000000000000000000000000000000000..2bb550dcabf521425701b68b0e9452557bd190e9 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 new file mode 100644 index 0000000000000000000000000000000000000000..50f1b3486d023d8822bd37b24b96da2f1d93332a Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 new file mode 100644 index 0000000000000000000000000000000000000000..2ac84962a51dbd241e0ac6c8450777d0c7106b89 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 new file mode 100644 index 0000000000000000000000000000000000000000..85cdc9fd144d298ed35b0377bad48fd29d0d80d1 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 new file mode 100644 index 0000000000000000000000000000000000000000..4a03da0ae0deb5c577859b7536628633dbabe3bc Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 new file mode 100644 index 0000000000000000000000000000000000000000..21e89314f50b1d5876d860cbfbd252edc0e82fbb Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 new file mode 100644 index 0000000000000000000000000000000000000000..671d71639d2274a31f2b26df4377702bdf681556 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 new file mode 100644 index 0000000000000000000000000000000000000000..5a056e40d83af0709d0c8882f464021d8160024c Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 new file mode 100644 index 0000000000000000000000000000000000000000..a4eda1f4cd16280d209cc4aa65e56c42da80854f Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best new file mode 100644 index 0000000000000000000000000000000000000000..70db3c549b999a0e295c54cb1d08c54d477c9d65 Binary files /dev/null and b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best differ diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0 new file mode 100644 index 0000000000000000000000000000000000000000..e777e27cd322c1959613e4d59dfbe5d943f9b957 --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c9b4a566642bacd5610c3e7b42d10f1feb9704e2a4cb2c004a7d85f75a0aba9 +size 40 diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0 new file mode 100644 index 0000000000000000000000000000000000000000..75dc2ac024a92cba2c4a47ecd6997bd519a9abef --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9867609ac2d205c8c66fe7bc380a67b26f152a046fb5e97d523f5b2bf1c147fd +size 10028 diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.1 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.1 new file mode 100644 index 0000000000000000000000000000000000000000..cf1543c241819ce51b7b0e93a227aa18686a85a1 --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4847f9588dc9c347debfc783cad98f92a9b0e1e5db600b4662da61e2dab30e6 +size 40 diff --git a/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.1 b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.1 new file mode 100644 index 0000000000000000000000000000000000000000..6f91799c6abe4b9a427da88bc9e7a8f5675a606c --- /dev/null +++ b/CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/model/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2e6e0d51dc6d1689b2eddc7d32991cac8108b99ee68d0750a53d25d5890403 +size 516618 diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/config.yaml b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a203609467be220d76360ff3216a07ca4838dc0b --- /dev/null +++ b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/config.yaml @@ -0,0 +1,44 @@ +general_cfg: + algo_name: DuelingDQN + collect_traj: false + device: cpu + env_name: gym + load_checkpoint: false + load_model_step: best + load_path: Train_single_CartPole-v1_DQN_20230515-211721 + max_episode: 100 + max_step: 200 + mode: train + model_save_fre: 500 + mp_backend: single + n_workers: 2 + online_eval: true + online_eval_episode: 10 + seed: 1 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + buffer_type: REPLAY_QUE + dueling: true + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.0001 + target_update: 4 + value_layers: + - activation: relu + layer_dim: + - 256 + layer_type: linear + - activation: relu + layer_dim: + - 256 + layer_type: linear +env_cfg: + id: CartPole-v1 + ignore_params: + - wrapper + - ignore_params + render_mode: null + wrapper: null diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/logs/log.txt b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/logs/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..96bfc997b328664ed7d2f2611d7429971bd3596f --- /dev/null +++ b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/logs/log.txt @@ -0,0 +1,174 @@ +2023-05-17 22:38:43 - SimpleLog - INFO: - General Configs: +2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:38:43 - SimpleLog - INFO: - Name Value Type +2023-05-17 22:38:43 - SimpleLog - INFO: - env_name gym +2023-05-17 22:38:43 - SimpleLog - INFO: - algo_name DuelingDQN +2023-05-17 22:38:43 - SimpleLog - INFO: - mode train +2023-05-17 22:38:43 - SimpleLog - INFO: - device cpu +2023-05-17 22:38:43 - SimpleLog - INFO: - seed 1 +2023-05-17 22:38:43 - SimpleLog - INFO: - max_episode 100 +2023-05-17 22:38:43 - SimpleLog - INFO: - max_step 200 +2023-05-17 22:38:43 - SimpleLog - INFO: - collect_traj 0 +2023-05-17 22:38:43 - SimpleLog - INFO: - mp_backend single +2023-05-17 22:38:43 - SimpleLog - INFO: - n_workers 2 +2023-05-17 22:38:43 - SimpleLog - INFO: - online_eval 1 +2023-05-17 22:38:43 - SimpleLog - INFO: - online_eval_episode 10 +2023-05-17 22:38:43 - SimpleLog - INFO: - model_save_fre 500 +2023-05-17 22:38:43 - SimpleLog - INFO: - load_checkpoint 0 +2023-05-17 22:38:43 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 +2023-05-17 22:38:43 - SimpleLog - INFO: - load_model_step best +2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:38:43 - SimpleLog - INFO: - Algo Configs: +2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:38:43 - SimpleLog - INFO: - Name Value Type +2023-05-17 22:38:43 - SimpleLog - INFO: - dueling 1 +2023-05-17 22:38:43 - SimpleLog - INFO: - epsilon_start 0.95 +2023-05-17 22:38:43 - SimpleLog - INFO: - epsilon_end 0.01 +2023-05-17 22:38:43 - SimpleLog - INFO: - epsilon_decay 500 +2023-05-17 22:38:43 - SimpleLog - INFO: - gamma 0.95 +2023-05-17 22:38:43 - SimpleLog - INFO: - lr 0.0001 +2023-05-17 22:38:43 - SimpleLog - INFO: - buffer_size 100000 +2023-05-17 22:38:43 - SimpleLog - INFO: - batch_size 64 +2023-05-17 22:38:43 - SimpleLog - INFO: - target_update 4 +2023-05-17 22:38:43 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] +2023-05-17 22:38:43 - SimpleLog - INFO: - buffer_type REPLAY_QUE +2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:38:43 - SimpleLog - INFO: - Env Configs: +2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:38:43 - SimpleLog - INFO: - Name Value Type +2023-05-17 22:38:43 - SimpleLog - INFO: - id CartPole-v1 +2023-05-17 22:38:43 - SimpleLog - INFO: - render_mode None +2023-05-17 22:38:43 - SimpleLog - INFO: - wrapper None +2023-05-17 22:38:43 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] +2023-05-17 22:38:43 - SimpleLog - INFO: - ================================================================================ +2023-05-17 22:38:43 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2) +2023-05-17 22:38:43 - SimpleLog - INFO: - Start training! +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 0, ep_reward: 35.0, ep_step: 35 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 1, ep_reward: 14.0, ep_step: 14 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 2, ep_reward: 20.0, ep_step: 20 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 3, ep_reward: 33.0, ep_step: 33 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 4, ep_reward: 21.0, ep_step: 21 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 5, ep_reward: 19.0, ep_step: 19 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 6, ep_reward: 15.0, ep_step: 15 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 7, ep_reward: 14.0, ep_step: 14 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 8, ep_reward: 9.0, ep_step: 9 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 9, ep_reward: 11.0, ep_step: 11 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 10, ep_reward: 17.0, ep_step: 17 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 11, ep_reward: 21.0, ep_step: 21 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 12, ep_reward: 15.0, ep_step: 15 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 13, ep_reward: 19.0, ep_step: 19 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 14, ep_reward: 9.0, ep_step: 9 +2023-05-17 22:38:43 - SimpleLog - INFO: - episode: 15, ep_reward: 10.0, ep_step: 10 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 16, ep_reward: 11.0, ep_step: 11 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 17, ep_reward: 17.0, ep_step: 17 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 18, ep_reward: 13.0, ep_step: 13 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 19, ep_reward: 16.0, ep_step: 16 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 20, ep_reward: 18.0, ep_step: 18 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 21, ep_reward: 14.0, ep_step: 14 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 22, ep_reward: 16.0, ep_step: 16 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 23, ep_reward: 14.0, ep_step: 14 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 24, ep_reward: 10.0, ep_step: 10 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 25, ep_reward: 14.0, ep_step: 14 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 26, ep_reward: 18.0, ep_step: 18 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 27, ep_reward: 44.0, ep_step: 44 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 28, ep_reward: 12.0, ep_step: 12 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 29, ep_reward: 12.0, ep_step: 12 +2023-05-17 22:38:44 - SimpleLog - INFO: - episode: 30, ep_reward: 14.0, ep_step: 14 +2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 31, ep_reward: 10.0, ep_step: 10 +2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 32, ep_reward: 10.0, ep_step: 10 +2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 33, ep_reward: 15.0, ep_step: 15 +2023-05-17 22:38:45 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 10.000 +2023-05-17 22:38:45 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 10.000, save the best model! +2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 34, ep_reward: 39.0, ep_step: 39 +2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 35, ep_reward: 70.0, ep_step: 70 +2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 36, ep_reward: 92.0, ep_step: 92 +2023-05-17 22:38:45 - SimpleLog - INFO: - episode: 37, ep_reward: 46.0, ep_step: 46 +2023-05-17 22:38:46 - SimpleLog - INFO: - episode: 38, ep_reward: 139.0, ep_step: 139 +2023-05-17 22:38:46 - SimpleLog - INFO: - episode: 39, ep_reward: 58.0, ep_step: 58 +2023-05-17 22:38:46 - SimpleLog - INFO: - episode: 40, ep_reward: 45.0, ep_step: 45 +2023-05-17 22:38:46 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 57.000 +2023-05-17 22:38:46 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 57.000, save the best model! +2023-05-17 22:38:47 - SimpleLog - INFO: - episode: 41, ep_reward: 70.0, ep_step: 70 +2023-05-17 22:38:47 - SimpleLog - INFO: - episode: 42, ep_reward: 131.0, ep_step: 131 +2023-05-17 22:38:47 - SimpleLog - INFO: - episode: 43, ep_reward: 67.0, ep_step: 67 +2023-05-17 22:38:48 - SimpleLog - INFO: - episode: 44, ep_reward: 111.0, ep_step: 111 +2023-05-17 22:38:48 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 158.000 +2023-05-17 22:38:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 158.000, save the best model! +2023-05-17 22:38:49 - SimpleLog - INFO: - episode: 45, ep_reward: 153.0, ep_step: 153 +2023-05-17 22:38:49 - SimpleLog - INFO: - episode: 46, ep_reward: 188.0, ep_step: 188 +2023-05-17 22:38:50 - SimpleLog - INFO: - episode: 47, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:50 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 200.000 +2023-05-17 22:38:50 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model! +2023-05-17 22:38:51 - SimpleLog - INFO: - episode: 48, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:52 - SimpleLog - INFO: - episode: 49, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:53 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 200.000 +2023-05-17 22:38:53 - SimpleLog - INFO: - episode: 50, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:53 - SimpleLog - INFO: - episode: 51, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:54 - SimpleLog - INFO: - episode: 52, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:55 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 200.000 +2023-05-17 22:38:55 - SimpleLog - INFO: - episode: 53, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:56 - SimpleLog - INFO: - episode: 54, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:57 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 200.000 +2023-05-17 22:38:57 - SimpleLog - INFO: - episode: 55, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:57 - SimpleLog - INFO: - episode: 56, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:58 - SimpleLog - INFO: - episode: 57, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:38:59 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 200.000 +2023-05-17 22:38:59 - SimpleLog - INFO: - episode: 58, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:00 - SimpleLog - INFO: - episode: 59, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:01 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 200.000 +2023-05-17 22:39:01 - SimpleLog - INFO: - episode: 60, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:02 - SimpleLog - INFO: - episode: 61, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:02 - SimpleLog - INFO: - episode: 62, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:03 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 200.000 +2023-05-17 22:39:03 - SimpleLog - INFO: - episode: 63, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:04 - SimpleLog - INFO: - episode: 64, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:05 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 200.000 +2023-05-17 22:39:05 - SimpleLog - INFO: - episode: 65, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:06 - SimpleLog - INFO: - episode: 66, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:07 - SimpleLog - INFO: - episode: 67, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:07 - SimpleLog - INFO: - update_step: 6000, online_eval_reward: 200.000 +2023-05-17 22:39:08 - SimpleLog - INFO: - episode: 68, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:08 - SimpleLog - INFO: - episode: 69, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:09 - SimpleLog - INFO: - update_step: 6500, online_eval_reward: 200.000 +2023-05-17 22:39:09 - SimpleLog - INFO: - episode: 70, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:10 - SimpleLog - INFO: - episode: 71, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:11 - SimpleLog - INFO: - episode: 72, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:12 - SimpleLog - INFO: - update_step: 7000, online_eval_reward: 200.000 +2023-05-17 22:39:12 - SimpleLog - INFO: - episode: 73, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:13 - SimpleLog - INFO: - episode: 74, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:13 - SimpleLog - INFO: - update_step: 7500, online_eval_reward: 200.000 +2023-05-17 22:39:13 - SimpleLog - INFO: - episode: 75, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:15 - SimpleLog - INFO: - episode: 76, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:15 - SimpleLog - INFO: - episode: 77, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:16 - SimpleLog - INFO: - update_step: 8000, online_eval_reward: 200.000 +2023-05-17 22:39:16 - SimpleLog - INFO: - episode: 78, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:17 - SimpleLog - INFO: - episode: 79, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:18 - SimpleLog - INFO: - update_step: 8500, online_eval_reward: 200.000 +2023-05-17 22:39:18 - SimpleLog - INFO: - episode: 80, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:19 - SimpleLog - INFO: - episode: 81, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:19 - SimpleLog - INFO: - episode: 82, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:20 - SimpleLog - INFO: - update_step: 9000, online_eval_reward: 200.000 +2023-05-17 22:39:20 - SimpleLog - INFO: - episode: 83, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:21 - SimpleLog - INFO: - episode: 84, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:22 - SimpleLog - INFO: - update_step: 9500, online_eval_reward: 200.000 +2023-05-17 22:39:22 - SimpleLog - INFO: - episode: 85, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:23 - SimpleLog - INFO: - episode: 86, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:23 - SimpleLog - INFO: - episode: 87, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:24 - SimpleLog - INFO: - update_step: 10000, online_eval_reward: 200.000 +2023-05-17 22:39:25 - SimpleLog - INFO: - episode: 88, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:25 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:26 - SimpleLog - INFO: - update_step: 10500, online_eval_reward: 200.000 +2023-05-17 22:39:26 - SimpleLog - INFO: - episode: 90, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:27 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:28 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:28 - SimpleLog - INFO: - update_step: 11000, online_eval_reward: 200.000 +2023-05-17 22:39:29 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:30 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:31 - SimpleLog - INFO: - update_step: 11500, online_eval_reward: 200.000 +2023-05-17 22:39:31 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:32 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:32 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:33 - SimpleLog - INFO: - update_step: 12000, online_eval_reward: 200.000 +2023-05-17 22:39:34 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:34 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200 +2023-05-17 22:39:34 - SimpleLog - INFO: - Finish training! total time consumed: 51.81s diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1000 new file mode 100644 index 0000000000000000000000000000000000000000..7e7c2e4f6bb11f41ea2a29249e34ad03696aafde Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10000 new file mode 100644 index 0000000000000000000000000000000000000000..0bed7eb6ce5f628bc782c24a4950b9b2a99f7dd8 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10500 new file mode 100644 index 0000000000000000000000000000000000000000..a48e789c0a921609b744c9ad8af4f89a86331f5e Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/10500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11000 new file mode 100644 index 0000000000000000000000000000000000000000..093809c058fe2d94035d14fc3ed0af8fb4fc0f2e Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11500 new file mode 100644 index 0000000000000000000000000000000000000000..85a1ea724fb21ac95dd0e3e63488c4566186cee9 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/11500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/12000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/12000 new file mode 100644 index 0000000000000000000000000000000000000000..761cb30eca6b89559f93b2bb82cf2055a1f3aefb Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/12000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1500 new file mode 100644 index 0000000000000000000000000000000000000000..d153981ff07400b1259fde874b00b1a2645704c3 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/1500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2000 new file mode 100644 index 0000000000000000000000000000000000000000..6281022c0a450df782a362b51827a0520ad4e6a5 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2500 new file mode 100644 index 0000000000000000000000000000000000000000..f9f478c7c75e17273d194e35f33f3d26742e682b Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/2500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3000 new file mode 100644 index 0000000000000000000000000000000000000000..3fd92f6ced8d31878ec3e64d6cc8c33c33e66ac5 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3500 new file mode 100644 index 0000000000000000000000000000000000000000..ef8d2641fd40c7549a24e6d9e074d62a06241cb5 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/3500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4000 new file mode 100644 index 0000000000000000000000000000000000000000..740a1e693a9a27bac202492d2485bfd4043bad0c Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4500 new file mode 100644 index 0000000000000000000000000000000000000000..e1bd86ef93ae1fca062e5d6ed3dee37b2558947e Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/4500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/500 new file mode 100644 index 0000000000000000000000000000000000000000..89c9b14402f1fae05fe12ba0814d5dc8506b868f Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5000 new file mode 100644 index 0000000000000000000000000000000000000000..e0b6af9f53c5950da60d0a286d1da9e74ca278c0 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5500 new file mode 100644 index 0000000000000000000000000000000000000000..696cab4d60acf09813d6f3338f7c08a12f758684 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/5500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6000 new file mode 100644 index 0000000000000000000000000000000000000000..75d3cdb8a2b3b531144789e39bee75e2b6af91e0 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6500 new file mode 100644 index 0000000000000000000000000000000000000000..2502198dbe83f446e89f3988ab789cb54d872f2e Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/6500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7000 new file mode 100644 index 0000000000000000000000000000000000000000..922f8b938b70d301aecaf19072f416c93eec60ec Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7500 new file mode 100644 index 0000000000000000000000000000000000000000..a812949325f0506a7e575b45d6ef6fc8b9c2d100 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/7500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8000 new file mode 100644 index 0000000000000000000000000000000000000000..77f3df7c1e27836ed286113dd9a648733ecdb768 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8500 new file mode 100644 index 0000000000000000000000000000000000000000..3e70a1fcba267c642d86d5ab66d13be3f25faa62 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/8500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9000 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9000 new file mode 100644 index 0000000000000000000000000000000000000000..f11d60bf987f0b15fdc7ce8be6ba7392913dc75f Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9000 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9500 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9500 new file mode 100644 index 0000000000000000000000000000000000000000..474ebafb615b4e3f8b98711b4fce0d61b33667a5 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/9500 differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/best b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/best new file mode 100644 index 0000000000000000000000000000000000000000..6281022c0a450df782a362b51827a0520ad4e6a5 Binary files /dev/null and b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/models/best differ diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/interact/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.0 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/interact/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.0 new file mode 100644 index 0000000000000000000000000000000000000000..2e8899b7b2f3f7a88a8e3cc14397214c8229a6d7 --- /dev/null +++ b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/interact/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1caa6e526f308946db961e2fc4735b6e26eb12584b222b5250d20217905f0f +size 10436 diff --git a/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/model/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.1 b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/model/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.1 new file mode 100644 index 0000000000000000000000000000000000000000..2c6dd676c852f6e1168ccf74bf4f9473d9408900 --- /dev/null +++ b/CartPole-v1/Train_single_CartPole-v1_DuelingDQN_20230517-223843/tb_logs/model/events.out.tfevents.1684334323.DESKTOP-H34HQIQ.30484.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467410bc502ff48858b3003f77303d779925a2f505c21b5901f849383ad693fd +size 602907