File size: 6,532 Bytes
05c9ac2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
import yaml
import pytest
from mlagents.trainers.upgrade_config import convert_behaviors, remove_nones, convert
from mlagents.trainers.settings import RewardSignalType
from mlagents.trainers.ppo.trainer import PPOSettings, TRAINER_NAME as PPO_TRAINER_NAME
from mlagents.trainers.sac.trainer import SACSettings, TRAINER_NAME as SAC_TRAINER_NAME
BRAIN_NAME = "testbehavior"
# Check one per category
BATCH_SIZE = 256
HIDDEN_UNITS = 32
SUMMARY_FREQ = 500
PPO_CONFIG = f"""
default:
trainer: ppo
batch_size: 1024
beta: 5.0e-3
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
beta_schedule: constant
epsilon_schedule: linear
max_steps: 5.0e5
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
{BRAIN_NAME}:
trainer: ppo
batch_size: {BATCH_SIZE}
beta: 5.0e-3
buffer_size: 64
epsilon: 0.2
hidden_units: {HIDDEN_UNITS}
lambd: 0.95
learning_rate: 5.0e-3
max_steps: 2500
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: {SUMMARY_FREQ}
use_recurrent: false
reward_signals:
curiosity:
strength: 1.0
gamma: 0.99
encoding_size: 128
"""
SAC_CONFIG = f"""
default:
trainer: sac
batch_size: 128
buffer_size: 50000
buffer_init_steps: 0
hidden_units: 128
init_entcoef: 1.0
learning_rate: 3.0e-4
learning_rate_schedule: constant
max_steps: 5.0e5
memory_size: 256
normalize: false
num_update: 1
train_interval: 1
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
tau: 0.005
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
{BRAIN_NAME}:
trainer: sac
batch_size: {BATCH_SIZE}
buffer_size: 64
buffer_init_steps: 100
hidden_units: {HIDDEN_UNITS}
init_entcoef: 0.01
learning_rate: 3.0e-4
max_steps: 1000
memory_size: 256
normalize: false
num_update: 1
train_interval: 1
num_layers: 1
time_horizon: 64
sequence_length: 64
summary_freq: {SUMMARY_FREQ}
tau: 0.005
use_recurrent: false
curiosity_enc_size: 128
demo_path: None
vis_encode_type: simple
reward_signals:
curiosity:
strength: 1.0
gamma: 0.99
encoding_size: 128
"""
CURRICULUM = """
BigWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 200
signal_smoothing: true
parameters:
big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
SmallWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
small_wall_height: [1.5, 2.0, 2.5, 4.0]
"""
RANDOMIZATION = """
resampling-interval: 5000
mass:
sampler-type: uniform
min_value: 0.5
max_value: 10
gravity:
sampler-type: uniform
min_value: 7
max_value: 12
scale:
sampler-type: uniform
min_value: 0.75
max_value: 3
"""
@pytest.mark.parametrize("use_recurrent", [True, False])
@pytest.mark.parametrize("trainer_type", [PPO_TRAINER_NAME, SAC_TRAINER_NAME])
def test_convert_behaviors(trainer_type, use_recurrent):
if trainer_type == PPO_TRAINER_NAME:
trainer_config = PPO_CONFIG
trainer_settings_type = PPOSettings
else:
trainer_config = SAC_CONFIG
trainer_settings_type = SACSettings
old_config = yaml.safe_load(trainer_config)
old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent
new_config = convert_behaviors(old_config)
# Test that the new config can be converted to TrainerSettings w/o exceptions
trainer_settings = new_config[BRAIN_NAME]
# Test that the trainer_settings contains the settings for BRAIN_NAME and
# the defaults where specified
assert trainer_settings.trainer_type == trainer_type
assert isinstance(trainer_settings.hyperparameters, trainer_settings_type)
assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE
assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals
def test_convert():
old_behaviors = yaml.safe_load(PPO_CONFIG)
old_curriculum = yaml.safe_load(CURRICULUM)
old_sampler = yaml.safe_load(RANDOMIZATION)
config = convert(old_behaviors, old_curriculum, old_sampler)
assert BRAIN_NAME in config["behaviors"]
assert "big_wall_min_height" in config["environment_parameters"]
curriculum = config["environment_parameters"]["big_wall_min_height"]["curriculum"]
assert len(curriculum) == 4
for i, expected_value in enumerate([0.0, 4.0, 6.0, 8.0]):
assert curriculum[i][f"Lesson{i}"]["value"] == expected_value
for i, threshold in enumerate([0.1, 0.3, 0.5]):
criteria = curriculum[i][f"Lesson{i}"]["completion_criteria"]
assert criteria["threshold"] == threshold
assert criteria["behavior"] == "BigWallJump"
assert criteria["signal_smoothing"]
assert criteria["min_lesson_length"] == 200
assert criteria["measure"] == "progress"
assert "gravity" in config["environment_parameters"]
gravity = config["environment_parameters"]["gravity"]
assert gravity["sampler_type"] == "uniform"
assert gravity["sampler_parameters"]["min_value"] == 7
assert gravity["sampler_parameters"]["max_value"] == 12
def test_remove_nones():
dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None}
dict_without_nones = {"hello": {"hello2": 2}}
output = remove_nones(dict_with_nones)
assert output == dict_without_nones
|