File size: 2,644 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from easydict import EasyDict
import torch
import torch.nn as nn
from ding.model.common import FCEncoder, ReparameterizationHead

bipedalwalker_ppo_config = dict(
    exp_name='bipedalwalker_ppopg',
    env=dict(
        env_id='BipedalWalker-v3',
        collector_env_num=8,
        evaluator_env_num=5,
        # (bool) Scale output action into legal range.
        act_scale=True,
        n_evaluator_episode=5,
        stop_value=500,
        rew_clip=True,
    ),
    policy=dict(
        cuda=True,
        action_space='continuous',
        model=dict(
            obs_shape=24,
            action_shape=4,
        ),
        learn=dict(
            epoch_per_collect=10,
            batch_size=64,
            learning_rate=3e-4,
            entropy_weight=0.0001,
            clip_ratio=0.2,
            adv_norm=True,
        ),
        collect=dict(
            n_episode=16,
            discount_factor=0.99,
            collector=dict(get_train_sample=True),
        ),
    ),
)
bipedalwalker_ppo_config = EasyDict(bipedalwalker_ppo_config)
main_config = bipedalwalker_ppo_config
bipedalwalker_ppo_create_config = dict(
    env=dict(
        type='bipedalwalker',
        import_names=['dizoo.box2d.bipedalwalker.envs.bipedalwalker_env'],
    ),
    env_manager=dict(type='subprocess'),
    policy=dict(type='ppo_pg'),
    collector=dict(type='episode'),
)
bipedalwalker_ppo_create_config = EasyDict(bipedalwalker_ppo_create_config)
create_config = bipedalwalker_ppo_create_config


class PPOPGContinuousModel(nn.Module):

    def __init__(self, obs_shape, action_shape):
        super(PPOPGContinuousModel, self).__init__()
        self.encoder = nn.Sequential(nn.Linear(obs_shape, 64), nn.Tanh())
        self.head = ReparameterizationHead(
            hidden_size=64,
            output_size=action_shape,
            layer_num=2,
            sigma_type='conditioned',
            activation=nn.Tanh(),
        )

    def forward(self, inputs):
        x = self.encoder(inputs)
        x = self.head(x)
        return {'logit': x}


if __name__ == "__main__":
    # or you can enter `ding -m serial_onpolicy -c bipedalwalker_ppo_config.py -s 0`
    from ding.entry import serial_pipeline_onpolicy
    from copy import deepcopy
    for seed in [1, 2, 3]:
        new_main_config = deepcopy(main_config)
        new_main_config.exp_name += "_seed{}".format(seed)
        model = PPOPGContinuousModel(new_main_config.policy.model.obs_shape, new_main_config.policy.model.action_shape)
        serial_pipeline_onpolicy(
            [new_main_config, deepcopy(create_config)], seed=seed, max_env_step=int(5e6), model=model
        )