File size: 2,616 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from easydict import EasyDict

n_bits = 5  # 15 or 20 n_bits can show differences between pure DQN and HER DQN, 5 n_bits for unittest
bitflip_her_dqn_config = dict(
    exp_name='bitflip_{}bit_herdqn_seed0'.format(n_bits),
    env=dict(
        collector_env_num=8,
        evaluator_env_num=16,
        n_bits=n_bits,
        n_evaluator_episode=16,
        stop_value=0.9,
    ),
    policy=dict(
        cuda=False,
        model=dict(
            obs_shape=2 * n_bits,
            action_shape=n_bits,
            encoder_hidden_size_list=[128, 128, 64],
            dueling=True,
        ),
        # == Different from most DQN algorithms ==
        # If discount_factor(gamma) > 0.9, it would be very difficult to converge
        discount_factor=0.8,
        learn=dict(
            update_per_collect=10,
            # batch_size = episode_size * sample_per_episode
            # You can refer to cfg.other.her to learn about `episode_size` and `sample_per_episode`
            batch_size=128,
            learning_rate=0.0005,
            target_update_freq=500,
        ),
        collect=dict(
            n_episode=8,
            unroll_len=1,
        ),
        eval=dict(evaluator=dict(eval_freq=1000)),
        other=dict(
            # == Different from most DQN algorithms ==
            # Fix epsilon to 0.2 leads to easier convergence, proposed in the paper.
            eps=dict(
                type='exp',
                start=0.2,  # 0.8
                end=0.2,  # original0.1, paper0.15~0.2
                decay=100,  # 10000
            ),
            replay_buffer=dict(replay_buffer_size=4000, ),
            her=dict(
                her_strategy='future',
                # her_replay_k=2,  # `her_replay_k` is not used in episodic HER
                # Sample how many episodes in each train iteration.
                episode_size=32,
                # Generate how many samples from one episode.
                sample_per_episode=4,
            ),
        ),
    ),
)
bitflip_her_dqn_config = EasyDict(bitflip_her_dqn_config)
main_config = bitflip_her_dqn_config

bitflip_her_dqn_create_config = dict(
    env=dict(
        type='bitflip',
        import_names=['dizoo.bitflip.envs.bitflip_env'],
    ),
    env_manager=dict(type='base'),
    policy=dict(type='dqn'),
    replay_buffer=dict(type='episode'),
    collector=dict(type='episode'),
)
bitflip_her_dqn_create_config = EasyDict(bitflip_her_dqn_create_config)
create_config = bitflip_her_dqn_create_config

if __name__ == '__main__':
    from dizoo.bitflip.entry.bitflip_dqn_main import main
    main(main_config, seed=0)