gomoku / DI-engine /dizoo /bitflip /config /bitflip_her_dqn_config.py
zjowowen's picture
init space
079c32c
raw
history blame
2.62 kB
from easydict import EasyDict
n_bits = 5 # 15 or 20 n_bits can show differences between pure DQN and HER DQN, 5 n_bits for unittest
bitflip_her_dqn_config = dict(
exp_name='bitflip_{}bit_herdqn_seed0'.format(n_bits),
env=dict(
collector_env_num=8,
evaluator_env_num=16,
n_bits=n_bits,
n_evaluator_episode=16,
stop_value=0.9,
),
policy=dict(
cuda=False,
model=dict(
obs_shape=2 * n_bits,
action_shape=n_bits,
encoder_hidden_size_list=[128, 128, 64],
dueling=True,
),
# == Different from most DQN algorithms ==
# If discount_factor(gamma) > 0.9, it would be very difficult to converge
discount_factor=0.8,
learn=dict(
update_per_collect=10,
# batch_size = episode_size * sample_per_episode
# You can refer to cfg.other.her to learn about `episode_size` and `sample_per_episode`
batch_size=128,
learning_rate=0.0005,
target_update_freq=500,
),
collect=dict(
n_episode=8,
unroll_len=1,
),
eval=dict(evaluator=dict(eval_freq=1000)),
other=dict(
# == Different from most DQN algorithms ==
# Fix epsilon to 0.2 leads to easier convergence, proposed in the paper.
eps=dict(
type='exp',
start=0.2, # 0.8
end=0.2, # original0.1, paper0.15~0.2
decay=100, # 10000
),
replay_buffer=dict(replay_buffer_size=4000, ),
her=dict(
her_strategy='future',
# her_replay_k=2, # `her_replay_k` is not used in episodic HER
# Sample how many episodes in each train iteration.
episode_size=32,
# Generate how many samples from one episode.
sample_per_episode=4,
),
),
),
)
bitflip_her_dqn_config = EasyDict(bitflip_her_dqn_config)
main_config = bitflip_her_dqn_config
bitflip_her_dqn_create_config = dict(
env=dict(
type='bitflip',
import_names=['dizoo.bitflip.envs.bitflip_env'],
),
env_manager=dict(type='base'),
policy=dict(type='dqn'),
replay_buffer=dict(type='episode'),
collector=dict(type='episode'),
)
bitflip_her_dqn_create_config = EasyDict(bitflip_her_dqn_create_config)
create_config = bitflip_her_dqn_create_config
if __name__ == '__main__':
from dizoo.bitflip.entry.bitflip_dqn_main import main
main(main_config, seed=0)